diff --git a/api/delegation/cinder/messages.go b/api/delegation/cinder/messages.go index b1e9bebbf..cc4a5b5dc 100644 --- a/api/delegation/cinder/messages.go +++ b/api/delegation/cinder/messages.go @@ -3,7 +3,11 @@ package api -import "log/slog" +import ( + "log/slog" + + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" +) // Host object from the Cinder scheduler pipeline. type ExternalSchedulerHost struct { @@ -46,6 +50,16 @@ func (r ExternalSchedulerRequest) GetTraceLogArgs() []slog.Attr { slog.String("project", r.Context.ProjectID), } } +func (r ExternalSchedulerRequest) FilterSubjects(includedSubjects map[string]float64) lib.FilterWeigherPipelineRequest { + filteredHosts := make([]ExternalSchedulerHost, 0, len(includedSubjects)) + for _, host := range r.Hosts { + if _, exists := includedSubjects[host.VolumeHost]; exists { + filteredHosts = append(filteredHosts, host) + } + } + r.Hosts = filteredHosts + return r +} // Response generated by cortex for the Cinder scheduler. // Cortex returns an ordered list of hosts that the share should be scheduled on. diff --git a/api/delegation/ironcore/messages.go b/api/delegation/ironcore/messages.go index 37c1bf8c7..c346f20e8 100644 --- a/api/delegation/ironcore/messages.go +++ b/api/delegation/ironcore/messages.go @@ -7,6 +7,7 @@ import ( "log/slog" ironcorev1alpha1 "github.com/cobaltcore-dev/cortex/api/delegation/ironcore/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" ) type MachinePipelineRequest struct { @@ -31,3 +32,13 @@ func (r MachinePipelineRequest) GetWeights() map[string]float64 { func (r MachinePipelineRequest) GetTraceLogArgs() []slog.Attr { return []slog.Attr{} } +func (r MachinePipelineRequest) FilterSubjects(includedSubjects map[string]float64) lib.FilterWeigherPipelineRequest { + filteredPools := make([]ironcorev1alpha1.MachinePool, 0, len(includedSubjects)) + for _, pool := range r.Pools { + if _, exists := includedSubjects[pool.Name]; exists { + filteredPools = append(filteredPools, pool) + } + } + r.Pools = filteredPools + return r +} diff --git a/api/delegation/manila/messages.go b/api/delegation/manila/messages.go index b1b4eb26e..c21a701ab 100644 --- a/api/delegation/manila/messages.go +++ b/api/delegation/manila/messages.go @@ -3,7 +3,11 @@ package api -import "log/slog" +import ( + "log/slog" + + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" +) // Host object from the Manila scheduler pipeline. type ExternalSchedulerHost struct { @@ -46,6 +50,16 @@ func (r ExternalSchedulerRequest) GetTraceLogArgs() []slog.Attr { slog.String("project", r.Context.ProjectID), } } +func (r ExternalSchedulerRequest) FilterSubjects(includedSubjects map[string]float64) lib.FilterWeigherPipelineRequest { + filteredHosts := make([]ExternalSchedulerHost, 0, len(includedSubjects)) + for _, host := range r.Hosts { + if _, exists := includedSubjects[host.ShareHost]; exists { + filteredHosts = append(filteredHosts, host) + } + } + r.Hosts = filteredHosts + return r +} // Response generated by cortex for the Manila scheduler. // Cortex returns an ordered list of hosts that the share should be scheduled on. diff --git a/api/delegation/nova/messages.go b/api/delegation/nova/messages.go index ff69ca2e9..8e9a097e2 100644 --- a/api/delegation/nova/messages.go +++ b/api/delegation/nova/messages.go @@ -7,6 +7,8 @@ import ( "errors" "fmt" "log/slog" + + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" ) // Host object from the Nova scheduler pipeline. @@ -69,6 +71,16 @@ func (r ExternalSchedulerRequest) GetTraceLogArgs() []slog.Attr { slog.String("project", r.Context.ProjectID), } } +func (r ExternalSchedulerRequest) FilterSubjects(includedSubjects map[string]float64) lib.FilterWeigherPipelineRequest { + filteredHosts := make([]ExternalSchedulerHost, 0, len(includedSubjects)) + for _, host := range r.Hosts { + if _, exists := includedSubjects[host.ComputeHost]; exists { + filteredHosts = append(filteredHosts, host) + } + } + r.Hosts = filteredHosts + return r +} // Response generated by cortex for the Nova scheduler. // Cortex returns an ordered list of hosts that the VM should be scheduled on. diff --git a/api/delegation/pods/messages.go b/api/delegation/pods/messages.go index a3c9fd956..862aa7a40 100644 --- a/api/delegation/pods/messages.go +++ b/api/delegation/pods/messages.go @@ -6,6 +6,7 @@ package pods import ( "log/slog" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" corev1 "k8s.io/api/core/v1" ) @@ -33,3 +34,13 @@ func (r PodPipelineRequest) GetWeights() map[string]float64 { func (r PodPipelineRequest) GetTraceLogArgs() []slog.Attr { return []slog.Attr{} } +func (r PodPipelineRequest) FilterSubjects(includedSubjects map[string]float64) lib.FilterWeigherPipelineRequest { + filteredNodes := make([]corev1.Node, 0, len(includedSubjects)) + for _, node := range r.Nodes { + if _, exists := includedSubjects[node.Name]; exists { + filteredNodes = append(filteredNodes, node) + } + } + r.Nodes = filteredNodes + return r +} diff --git a/api/v1alpha1/pipeline_types.go b/api/v1alpha1/pipeline_types.go index f64ed2008..cb2dfbf71 100644 --- a/api/v1alpha1/pipeline_types.go +++ b/api/v1alpha1/pipeline_types.go @@ -4,63 +4,59 @@ package v1alpha1 import ( - corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) -type DisabledValidationsSpec struct { - // Whether to validate that no subjects are removed or added from the scheduler - // step. This should only be disabled for scheduler steps that remove subjects. - // Thus, if no value is provided, the default is false. - SameSubjectNumberInOut bool `json:"sameSubjectNumberInOut,omitempty"` - // Whether to validate that, after running the step, there are remaining subjects. - // This should only be disabled for scheduler steps that are expected to - // remove all subjects. - SomeSubjectsRemain bool `json:"someSubjectsRemain,omitempty"` -} +type FilterSpec struct { + // The name of the scheduler step in the cortex implementation. + // Must match to a step implemented by the pipeline controller. + Name string `json:"name"` -type StepType string + // Additional configuration for the step that can be used + // +kubebuilder:validation:Optional + Params runtime.RawExtension `json:"params,omitempty"` -const ( - // Step for assigning weights to hosts. - StepTypeWeigher StepType = "weigher" - // Step for filtering hosts. - StepTypeFilter StepType = "filter" - // Step for generating descheduling recommendations. - StepTypeDescheduler StepType = "descheduler" -) + // Additional description of the step which helps understand its purpose + // and decisions made by it. + // +kubebuilder:validation:Optional + Description string `json:"description,omitempty"` +} type WeigherSpec struct { - // The validations to disable for this step. If none are provided, all - // applied validations are enabled. + // The name of the scheduler step in the cortex implementation. + // Must match to a step implemented by the pipeline controller. + Name string `json:"name"` + + // Additional configuration for the step that can be used // +kubebuilder:validation:Optional - DisabledValidations DisabledValidationsSpec `json:"disabledValidations,omitempty"` -} + Params runtime.RawExtension `json:"params,omitempty"` + + // Additional description of the step which helps understand its purpose + // and decisions made by it. + // +kubebuilder:validation:Optional + Description string `json:"description,omitempty"` -type StepSpec struct { - // The type of the scheduler step. - Type StepType `json:"type"` - // If the type is "weigher", this contains additional configuration for it. + // Optional multiplier to apply to the step's output. + // This can be used to increase or decrease the weight of a step + // relative to other steps in the same pipeline. // +kubebuilder:validation:Optional - Weigher *WeigherSpec `json:"weigher,omitempty"` + Multiplier *float64 `json:"multiplier,omitempty"` +} +type DetectorSpec struct { // The name of the scheduler step in the cortex implementation. - Impl string `json:"impl"` - // Additional configuration for the extractor that can be used - // +kubebuilder:validation:Optional - Opts runtime.RawExtension `json:"opts,omitempty"` - // Knowledges this step depends on to be ready. + // Must match to a step implemented by the pipeline controller. + Name string `json:"name"` + + // Additional configuration for the step that can be used // +kubebuilder:validation:Optional - Knowledges []corev1.ObjectReference `json:"knowledges,omitempty"` + Params runtime.RawExtension `json:"params,omitempty"` + // Additional description of the step which helps understand its purpose // and decisions made by it. // +kubebuilder:validation:Optional Description string `json:"description,omitempty"` - - // Whether this step is mandatory for the pipeline to be runnable. - // +kubebuilder:default=true - Mandatory bool `json:"mandatory"` } type PipelineType string @@ -69,41 +65,113 @@ const ( // Pipeline containing filter-weigher steps for initial placement, // migration, etc. of instances. PipelineTypeFilterWeigher PipelineType = "filter-weigher" - // Pipeline containing descheduler steps for generating descheduling + // Pipeline containing detector steps, e.g. for generating descheduling // recommendations. - PipelineTypeDescheduler PipelineType = "descheduler" + PipelineTypeDetector PipelineType = "detector" ) type PipelineSpec struct { // SchedulingDomain defines in which scheduling domain this pipeline // is used (e.g., nova, cinder, manila). SchedulingDomain SchedulingDomain `json:"schedulingDomain"` - // An optional description of the pipeline. + + // An optional description of the pipeline, helping understand its purpose. // +kubebuilder:validation:Optional Description string `json:"description,omitempty"` + // If this pipeline should create decision objects. // When this is false, the pipeline will still process requests. // +kubebuilder:default=false CreateDecisions bool `json:"createDecisions,omitempty"` - // The type of the pipeline. + + // The type of the pipeline, used to differentiate between + // filter-weigher and detector pipelines within the same + // scheduling domain. + // + // If the type is filter-weigher, the filter and weigher attributes + // must be set. If the type is detector, the detectors attribute + // must be set. + // + // +kubebuilder:validation:Enum=filter-weigher;detector Type PipelineType `json:"type"` - // The ordered list of steps that make up this pipeline. - Steps []StepSpec `json:"steps,omitempty"` + + // Ordered list of filters to apply in a scheduling pipeline. + // + // This attribute is set only if the pipeline type is filter-weigher. + // Filters remove host candidates from an initial set, leaving + // valid candidates. Filters are run before weighers are applied. + // +kubebuilder:validation:Optional + Filters []FilterSpec `json:"filters,omitempty"` + + // Ordered list of weighers to apply in a scheduling pipeline. + // + // This attribute is set only if the pipeline type is filter-weigher. + // These weighers are run after filters are applied. + // +kubebuilder:validation:Optional + Weighers []WeigherSpec `json:"weighers,omitempty"` + + // Ordered list of detectors to apply in a descheduling pipeline. + // + // This attribute is set only if the pipeline type is detector. + // Detectors find candidates for descheduling (migration off current host). + // These detectors are run after weighers are applied. + // +kubebuilder:validation:Optional + Detectors []DetectorSpec `json:"detectors,omitempty"` +} + +const ( + FilterConditionReady = "Ready" + WeigherConditionReady = "Ready" + DetectorConditionReady = "Ready" +) + +type FilterStatus struct { + // The name of the filter. + Name string `json:"name"` + + // The current status conditions of the filter. + // +kubebuilder:validation:Optional + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` +} + +type WeigherStatus struct { + // The name of the weigher. + Name string `json:"name"` + + // The current status conditions of the weigher. + // +kubebuilder:validation:Optional + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` +} + +type DetectorStatus struct { + // The name of the detector. + Name string `json:"name"` + + // The current status conditions of the detector. + // +kubebuilder:validation:Optional + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` } const ( // The pipeline is ready to be used. PipelineConditionReady = "Ready" + // All steps in the pipeline are ready. + PipelineConditionAllStepsReady = "AllStepsReady" ) type PipelineStatus struct { - // The total number of steps configured in the pipeline. - TotalSteps int `json:"totalSteps"` - // The number of steps that are ready. - ReadySteps int `json:"readySteps"` - // An overview of the readiness of the steps in the pipeline. - // Format: "ReadySteps / TotalSteps steps ready". - StepsReadyFrac string `json:"stepsReadyFrac,omitempty"` + // List of statuses for each filter in the pipeline. + // +kubebuilder:validation:Optional + Filters []FilterStatus `json:"filters,omitempty"` + + // List of statuses for each weigher in the pipeline. + // +kubebuilder:validation:Optional + Weighers []WeigherStatus `json:"weighers,omitempty"` + + // List of statuses for each detector in the pipeline. + // +kubebuilder:validation:Optional + Detectors []DetectorStatus `json:"detectors,omitempty"` + // The current status conditions of the pipeline. // +kubebuilder:validation:Optional Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` @@ -115,8 +183,8 @@ type PipelineStatus struct { // +kubebuilder:printcolumn:name="Created",type="date",JSONPath=".metadata.creationTimestamp" // +kubebuilder:printcolumn:name="Domain",type="string",JSONPath=".spec.schedulingDomain" // +kubebuilder:printcolumn:name="Type",type="string",JSONPath=".spec.type" -// +kubebuilder:printcolumn:name="Steps",type="string",JSONPath=".status.stepsReadyFrac" -// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status" +// +kubebuilder:printcolumn:name="All Steps Ready",type="string",JSONPath=".status.conditions[?(@.type=='AllStepsReady')].status" +// +kubebuilder:printcolumn:name="Pipeline Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status" // Pipeline is the Schema for the decisions API type Pipeline struct { diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index a9ac69e2f..2551ef3ea 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -426,16 +426,77 @@ func (in *DeschedulingStatus) DeepCopy() *DeschedulingStatus { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *DisabledValidationsSpec) DeepCopyInto(out *DisabledValidationsSpec) { +func (in *DetectorSpec) DeepCopyInto(out *DetectorSpec) { *out = *in + in.Params.DeepCopyInto(&out.Params) } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DisabledValidationsSpec. -func (in *DisabledValidationsSpec) DeepCopy() *DisabledValidationsSpec { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DetectorSpec. +func (in *DetectorSpec) DeepCopy() *DetectorSpec { if in == nil { return nil } - out := new(DisabledValidationsSpec) + out := new(DetectorSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DetectorStatus) DeepCopyInto(out *DetectorStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DetectorStatus. +func (in *DetectorStatus) DeepCopy() *DetectorStatus { + if in == nil { + return nil + } + out := new(DetectorStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FilterSpec) DeepCopyInto(out *FilterSpec) { + *out = *in + in.Params.DeepCopyInto(&out.Params) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FilterSpec. +func (in *FilterSpec) DeepCopy() *FilterSpec { + if in == nil { + return nil + } + out := new(FilterSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FilterStatus) DeepCopyInto(out *FilterStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FilterStatus. +func (in *FilterStatus) DeepCopy() *FilterStatus { + if in == nil { + return nil + } + out := new(FilterStatus) in.DeepCopyInto(out) return out } @@ -855,9 +916,23 @@ func (in *PipelineList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PipelineSpec) DeepCopyInto(out *PipelineSpec) { *out = *in - if in.Steps != nil { - in, out := &in.Steps, &out.Steps - *out = make([]StepSpec, len(*in)) + if in.Filters != nil { + in, out := &in.Filters, &out.Filters + *out = make([]FilterSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Weighers != nil { + in, out := &in.Weighers, &out.Weighers + *out = make([]WeigherSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Detectors != nil { + in, out := &in.Detectors, &out.Detectors + *out = make([]DetectorSpec, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) } @@ -877,6 +952,27 @@ func (in *PipelineSpec) DeepCopy() *PipelineSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PipelineStatus) DeepCopyInto(out *PipelineStatus) { *out = *in + if in.Filters != nil { + in, out := &in.Filters, &out.Filters + *out = make([]FilterStatus, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Weighers != nil { + in, out := &in.Weighers, &out.Weighers + *out = make([]WeigherStatus, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Detectors != nil { + in, out := &in.Detectors, &out.Detectors + *out = make([]DetectorStatus, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.Conditions != nil { in, out := &in.Conditions, &out.Conditions *out = make([]metav1.Condition, len(*in)) @@ -1099,43 +1195,44 @@ func (in *StepResult) DeepCopy() *StepResult { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *StepSpec) DeepCopyInto(out *StepSpec) { +func (in *WeigherSpec) DeepCopyInto(out *WeigherSpec) { *out = *in - if in.Weigher != nil { - in, out := &in.Weigher, &out.Weigher - *out = new(WeigherSpec) + in.Params.DeepCopyInto(&out.Params) + if in.Multiplier != nil { + in, out := &in.Multiplier, &out.Multiplier + *out = new(float64) **out = **in } - in.Opts.DeepCopyInto(&out.Opts) - if in.Knowledges != nil { - in, out := &in.Knowledges, &out.Knowledges - *out = make([]v1.ObjectReference, len(*in)) - copy(*out, *in) - } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StepSpec. -func (in *StepSpec) DeepCopy() *StepSpec { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WeigherSpec. +func (in *WeigherSpec) DeepCopy() *WeigherSpec { if in == nil { return nil } - out := new(StepSpec) + out := new(WeigherSpec) in.DeepCopyInto(out) return out } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *WeigherSpec) DeepCopyInto(out *WeigherSpec) { +func (in *WeigherStatus) DeepCopyInto(out *WeigherStatus) { *out = *in - out.DisabledValidations = in.DisabledValidations + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WeigherSpec. -func (in *WeigherSpec) DeepCopy() *WeigherSpec { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WeigherStatus. +func (in *WeigherStatus) DeepCopy() *WeigherStatus { if in == nil { return nil } - out := new(WeigherSpec) + out := new(WeigherStatus) in.DeepCopyInto(out) return out } diff --git a/cmd/main.go b/cmd/main.go index 61930393d..0bb89168a 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -38,20 +38,13 @@ import ( "github.com/cobaltcore-dev/cortex/internal/knowledge/datasources/prometheus" "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor" "github.com/cobaltcore-dev/cortex/internal/knowledge/kpis" - decisionscinder "github.com/cobaltcore-dev/cortex/internal/scheduling/decisions/cinder" - "github.com/cobaltcore-dev/cortex/internal/scheduling/decisions/explanation" - decisionsmachines "github.com/cobaltcore-dev/cortex/internal/scheduling/decisions/machines" - decisionsmanila "github.com/cobaltcore-dev/cortex/internal/scheduling/decisions/manila" - decisionsnova "github.com/cobaltcore-dev/cortex/internal/scheduling/decisions/nova" - decisionpods "github.com/cobaltcore-dev/cortex/internal/scheduling/decisions/pods" - deschedulingnova "github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova" - cindere2e "github.com/cobaltcore-dev/cortex/internal/scheduling/e2e/cinder" - manilae2e "github.com/cobaltcore-dev/cortex/internal/scheduling/e2e/manila" - novae2e "github.com/cobaltcore-dev/cortex/internal/scheduling/e2e/nova" - cinderexternal "github.com/cobaltcore-dev/cortex/internal/scheduling/external/cinder" - manilaexternal "github.com/cobaltcore-dev/cortex/internal/scheduling/external/manila" - novaexternal "github.com/cobaltcore-dev/cortex/internal/scheduling/external/nova" + "github.com/cobaltcore-dev/cortex/internal/scheduling/cinder" + "github.com/cobaltcore-dev/cortex/internal/scheduling/explanation" schedulinglib "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/machines" + "github.com/cobaltcore-dev/cortex/internal/scheduling/manila" + "github.com/cobaltcore-dev/cortex/internal/scheduling/nova" + "github.com/cobaltcore-dev/cortex/internal/scheduling/pods" "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/commitments" reservationscontroller "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/controller" "github.com/cobaltcore-dev/cortex/pkg/conf" @@ -93,13 +86,13 @@ func main() { client := must.Return(client.New(restConfig, copts)) switch os.Args[1] { case "e2e-nova": - novae2e.RunChecks(ctx, client, config) + nova.RunChecks(ctx, client, config) return case "e2e-cinder": - cindere2e.RunChecks(ctx, client, config) + cinder.RunChecks(ctx, client, config) return case "e2e-manila": - manilae2e.RunChecks(ctx, client, config) + manila.RunChecks(ctx, client, config) return } } @@ -295,7 +288,7 @@ func main() { metrics.Registry.MustRegister(&pipelineMonitor) if slices.Contains(config.EnabledControllers, "nova-decisions-pipeline-controller") { - decisionController := &decisionsnova.DecisionPipelineController{ + decisionController := &nova.FilterWeigherPipelineController{ Monitor: pipelineMonitor, Conf: config, } @@ -305,16 +298,16 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "DecisionReconciler") os.Exit(1) } - novaexternal.NewAPI(config, decisionController).Init(mux) + nova.NewAPI(config, decisionController).Init(mux) } if slices.Contains(config.EnabledControllers, "nova-deschedulings-pipeline-controller") { // Deschedulings controller - monitor := deschedulingnova.NewPipelineMonitor() + monitor := schedulinglib.NewDetectorPipelineMonitor() metrics.Registry.MustRegister(&monitor) - deschedulingsController := &deschedulingnova.DeschedulingsPipelineController{ - Monitor: monitor, - Conf: config, - CycleDetector: deschedulingnova.NewCycleDetector(), + deschedulingsController := &nova.DetectorPipelineController{ + Monitor: monitor, + Conf: config, + DetectorCycleBreaker: nova.NewDetectorCycleBreaker(), } // Inferred through the base controller. deschedulingsController.Client = multiclusterClient @@ -324,7 +317,7 @@ func main() { } go deschedulingsController.CreateDeschedulingsPeriodically(ctx) // Deschedulings cleanup on startup - if err := (&deschedulingnova.Cleanup{ + if err := (&nova.DeschedulingsCleanup{ Client: multiclusterClient, Scheme: mgr.GetScheme(), }).SetupWithManager(mgr, multiclusterClient); err != nil { @@ -333,7 +326,7 @@ func main() { } } if slices.Contains(config.EnabledControllers, "manila-decisions-pipeline-controller") { - controller := &decisionsmanila.DecisionPipelineController{ + controller := &manila.FilterWeigherPipelineController{ Monitor: pipelineMonitor, Conf: config, } @@ -343,10 +336,10 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "DecisionReconciler") os.Exit(1) } - manilaexternal.NewAPI(config, controller).Init(mux) + manila.NewAPI(config, controller).Init(mux) } if slices.Contains(config.EnabledControllers, "cinder-decisions-pipeline-controller") { - controller := &decisionscinder.DecisionPipelineController{ + controller := &cinder.FilterWeigherPipelineController{ Monitor: pipelineMonitor, Conf: config, } @@ -356,10 +349,10 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "DecisionReconciler") os.Exit(1) } - cinderexternal.NewAPI(config, controller).Init(mux) + cinder.NewAPI(config, controller).Init(mux) } if slices.Contains(config.EnabledControllers, "ironcore-decisions-pipeline-controller") { - controller := &decisionsmachines.DecisionPipelineController{ + controller := &machines.FilterWeigherPipelineController{ Monitor: pipelineMonitor, Conf: config, } @@ -371,7 +364,7 @@ func main() { } } if slices.Contains(config.EnabledControllers, "pods-decisions-pipeline-controller") { - controller := &decisionpods.DecisionPipelineController{ + controller := &pods.FilterWeigherPipelineController{ Monitor: pipelineMonitor, Conf: config, } @@ -512,7 +505,7 @@ func main() { Interval: time.Hour, Name: "nova-decisions-cleanup-task", Run: func(ctx context.Context) error { - return decisionsnova.Cleanup(ctx, multiclusterClient, config) + return nova.DecisionsCleanup(ctx, multiclusterClient, config) }, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to add nova decisions cleanup task to manager") @@ -526,7 +519,7 @@ func main() { Interval: time.Hour, Name: "manila-decisions-cleanup-task", Run: func(ctx context.Context) error { - return decisionsmanila.Cleanup(ctx, multiclusterClient, config) + return manila.DecisionsCleanup(ctx, multiclusterClient, config) }, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to add manila decisions cleanup task to manager") @@ -540,7 +533,7 @@ func main() { Interval: time.Hour, Name: "cinder-decisions-cleanup-task", Run: func(ctx context.Context) error { - return decisionscinder.Cleanup(ctx, multiclusterClient, config) + return cinder.DecisionsCleanup(ctx, multiclusterClient, config) }, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to add cinder decisions cleanup task to manager") diff --git a/config/crd/bases/cortex.cloud_pipelines.yaml b/config/crd/bases/cortex.cloud_pipelines.yaml index c6039f0dc..c86d2864d 100644 --- a/config/crd/bases/cortex.cloud_pipelines.yaml +++ b/config/crd/bases/cortex.cloud_pipelines.yaml @@ -24,11 +24,11 @@ spec: - jsonPath: .spec.type name: Type type: string - - jsonPath: .status.stepsReadyFrac - name: Steps + - jsonPath: .status.conditions[?(@.type=='AllStepsReady')].status + name: All Steps Ready type: string - jsonPath: .status.conditions[?(@.type=='Ready')].status - name: Ready + name: Pipeline Ready type: string name: v1alpha1 schema: @@ -62,15 +62,16 @@ spec: When this is false, the pipeline will still process requests. type: boolean description: - description: An optional description of the pipeline. + description: An optional description of the pipeline, helping understand + its purpose. type: string - schedulingDomain: + detectors: description: |- - SchedulingDomain defines in which scheduling domain this pipeline - is used (e.g., nova, cinder, manila). - type: string - steps: - description: The ordered list of steps that make up this pipeline. + Ordered list of detectors to apply in a descheduling pipeline. + + This attribute is set only if the pipeline type is detector. + Detectors find candidates for descheduling (migration off current host). + These detectors are run after weighers are applied. items: properties: description: @@ -78,101 +79,99 @@ spec: Additional description of the step which helps understand its purpose and decisions made by it. type: string - impl: - description: The name of the scheduler step in the cortex implementation. + name: + description: |- + The name of the scheduler step in the cortex implementation. + Must match to a step implemented by the pipeline controller. type: string - knowledges: - description: Knowledges this step depends on to be ready. - items: - description: ObjectReference contains enough information to - let you inspect or modify the referred object. - properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic - type: array - mandatory: - default: true - description: Whether this step is mandatory for the pipeline - to be runnable. - type: boolean - opts: - description: Additional configuration for the extractor that - can be used + params: + description: Additional configuration for the step that can + be used type: object x-kubernetes-preserve-unknown-fields: true - type: - description: The type of the scheduler step. + required: + - name + type: object + type: array + filters: + description: |- + Ordered list of filters to apply in a scheduling pipeline. + + This attribute is set only if the pipeline type is filter-weigher. + Filters remove host candidates from an initial set, leaving + valid candidates. Filters are run before weighers are applied. + items: + properties: + description: + description: |- + Additional description of the step which helps understand its purpose + and decisions made by it. + type: string + name: + description: |- + The name of the scheduler step in the cortex implementation. + Must match to a step implemented by the pipeline controller. type: string - weigher: - description: If the type is "weigher", this contains additional - configuration for it. - properties: - disabledValidations: - description: |- - The validations to disable for this step. If none are provided, all - applied validations are enabled. - properties: - sameSubjectNumberInOut: - description: |- - Whether to validate that no subjects are removed or added from the scheduler - step. This should only be disabled for scheduler steps that remove subjects. - Thus, if no value is provided, the default is false. - type: boolean - someSubjectsRemain: - description: |- - Whether to validate that, after running the step, there are remaining subjects. - This should only be disabled for scheduler steps that are expected to - remove all subjects. - type: boolean - type: object + params: + description: Additional configuration for the step that can + be used type: object + x-kubernetes-preserve-unknown-fields: true required: - - impl - - mandatory - - type + - name type: object type: array + schedulingDomain: + description: |- + SchedulingDomain defines in which scheduling domain this pipeline + is used (e.g., nova, cinder, manila). + type: string type: - description: The type of the pipeline. + description: |- + The type of the pipeline, used to differentiate between + filter-weigher and detector pipelines within the same + scheduling domain. + + If the type is filter-weigher, the filter and weigher attributes + must be set. If the type is detector, the detectors attribute + must be set. + enum: + - filter-weigher + - detector type: string + weighers: + description: |- + Ordered list of weighers to apply in a scheduling pipeline. + + This attribute is set only if the pipeline type is filter-weigher. + These weighers are run after filters are applied. + items: + properties: + description: + description: |- + Additional description of the step which helps understand its purpose + and decisions made by it. + type: string + multiplier: + description: |- + Optional multiplier to apply to the step's output. + This can be used to increase or decrease the weight of a step + relative to other steps in the same pipeline. + type: number + name: + description: |- + The name of the scheduler step in the cortex implementation. + Must match to a step implemented by the pipeline controller. + type: string + params: + description: Additional configuration for the step that can + be used + type: object + x-kubernetes-preserve-unknown-fields: true + required: + - name + type: object + type: array required: - schedulingDomain - type @@ -237,20 +236,213 @@ spec: - type type: object type: array - readySteps: - description: The number of steps that are ready. - type: integer - stepsReadyFrac: - description: |- - An overview of the readiness of the steps in the pipeline. - Format: "ReadySteps / TotalSteps steps ready". - type: string - totalSteps: - description: The total number of steps configured in the pipeline. - type: integer - required: - - readySteps - - totalSteps + detectors: + description: List of statuses for each detector in the pipeline. + items: + properties: + conditions: + description: The current status conditions of the detector. + items: + description: Condition contains details for one aspect of + the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, + Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + name: + description: The name of the detector. + type: string + required: + - name + type: object + type: array + filters: + description: List of statuses for each filter in the pipeline. + items: + properties: + conditions: + description: The current status conditions of the filter. + items: + description: Condition contains details for one aspect of + the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, + Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + name: + description: The name of the filter. + type: string + required: + - name + type: object + type: array + weighers: + description: List of statuses for each weigher in the pipeline. + items: + properties: + conditions: + description: The current status conditions of the weigher. + items: + description: Condition contains details for one aspect of + the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, + Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + name: + description: The name of the weigher. + type: string + required: + - name + type: object + type: array type: object required: - spec diff --git a/config/crd/cortex.cloud_pipelines.yaml b/config/crd/cortex.cloud_pipelines.yaml index c6039f0dc..c86d2864d 100644 --- a/config/crd/cortex.cloud_pipelines.yaml +++ b/config/crd/cortex.cloud_pipelines.yaml @@ -24,11 +24,11 @@ spec: - jsonPath: .spec.type name: Type type: string - - jsonPath: .status.stepsReadyFrac - name: Steps + - jsonPath: .status.conditions[?(@.type=='AllStepsReady')].status + name: All Steps Ready type: string - jsonPath: .status.conditions[?(@.type=='Ready')].status - name: Ready + name: Pipeline Ready type: string name: v1alpha1 schema: @@ -62,15 +62,16 @@ spec: When this is false, the pipeline will still process requests. type: boolean description: - description: An optional description of the pipeline. + description: An optional description of the pipeline, helping understand + its purpose. type: string - schedulingDomain: + detectors: description: |- - SchedulingDomain defines in which scheduling domain this pipeline - is used (e.g., nova, cinder, manila). - type: string - steps: - description: The ordered list of steps that make up this pipeline. + Ordered list of detectors to apply in a descheduling pipeline. + + This attribute is set only if the pipeline type is detector. + Detectors find candidates for descheduling (migration off current host). + These detectors are run after weighers are applied. items: properties: description: @@ -78,101 +79,99 @@ spec: Additional description of the step which helps understand its purpose and decisions made by it. type: string - impl: - description: The name of the scheduler step in the cortex implementation. + name: + description: |- + The name of the scheduler step in the cortex implementation. + Must match to a step implemented by the pipeline controller. type: string - knowledges: - description: Knowledges this step depends on to be ready. - items: - description: ObjectReference contains enough information to - let you inspect or modify the referred object. - properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic - type: array - mandatory: - default: true - description: Whether this step is mandatory for the pipeline - to be runnable. - type: boolean - opts: - description: Additional configuration for the extractor that - can be used + params: + description: Additional configuration for the step that can + be used type: object x-kubernetes-preserve-unknown-fields: true - type: - description: The type of the scheduler step. + required: + - name + type: object + type: array + filters: + description: |- + Ordered list of filters to apply in a scheduling pipeline. + + This attribute is set only if the pipeline type is filter-weigher. + Filters remove host candidates from an initial set, leaving + valid candidates. Filters are run before weighers are applied. + items: + properties: + description: + description: |- + Additional description of the step which helps understand its purpose + and decisions made by it. + type: string + name: + description: |- + The name of the scheduler step in the cortex implementation. + Must match to a step implemented by the pipeline controller. type: string - weigher: - description: If the type is "weigher", this contains additional - configuration for it. - properties: - disabledValidations: - description: |- - The validations to disable for this step. If none are provided, all - applied validations are enabled. - properties: - sameSubjectNumberInOut: - description: |- - Whether to validate that no subjects are removed or added from the scheduler - step. This should only be disabled for scheduler steps that remove subjects. - Thus, if no value is provided, the default is false. - type: boolean - someSubjectsRemain: - description: |- - Whether to validate that, after running the step, there are remaining subjects. - This should only be disabled for scheduler steps that are expected to - remove all subjects. - type: boolean - type: object + params: + description: Additional configuration for the step that can + be used type: object + x-kubernetes-preserve-unknown-fields: true required: - - impl - - mandatory - - type + - name type: object type: array + schedulingDomain: + description: |- + SchedulingDomain defines in which scheduling domain this pipeline + is used (e.g., nova, cinder, manila). + type: string type: - description: The type of the pipeline. + description: |- + The type of the pipeline, used to differentiate between + filter-weigher and detector pipelines within the same + scheduling domain. + + If the type is filter-weigher, the filter and weigher attributes + must be set. If the type is detector, the detectors attribute + must be set. + enum: + - filter-weigher + - detector type: string + weighers: + description: |- + Ordered list of weighers to apply in a scheduling pipeline. + + This attribute is set only if the pipeline type is filter-weigher. + These weighers are run after filters are applied. + items: + properties: + description: + description: |- + Additional description of the step which helps understand its purpose + and decisions made by it. + type: string + multiplier: + description: |- + Optional multiplier to apply to the step's output. + This can be used to increase or decrease the weight of a step + relative to other steps in the same pipeline. + type: number + name: + description: |- + The name of the scheduler step in the cortex implementation. + Must match to a step implemented by the pipeline controller. + type: string + params: + description: Additional configuration for the step that can + be used + type: object + x-kubernetes-preserve-unknown-fields: true + required: + - name + type: object + type: array required: - schedulingDomain - type @@ -237,20 +236,213 @@ spec: - type type: object type: array - readySteps: - description: The number of steps that are ready. - type: integer - stepsReadyFrac: - description: |- - An overview of the readiness of the steps in the pipeline. - Format: "ReadySteps / TotalSteps steps ready". - type: string - totalSteps: - description: The total number of steps configured in the pipeline. - type: integer - required: - - readySteps - - totalSteps + detectors: + description: List of statuses for each detector in the pipeline. + items: + properties: + conditions: + description: The current status conditions of the detector. + items: + description: Condition contains details for one aspect of + the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, + Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + name: + description: The name of the detector. + type: string + required: + - name + type: object + type: array + filters: + description: List of statuses for each filter in the pipeline. + items: + properties: + conditions: + description: The current status conditions of the filter. + items: + description: Condition contains details for one aspect of + the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, + Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + name: + description: The name of the filter. + type: string + required: + - name + type: object + type: array + weighers: + description: List of statuses for each weigher in the pipeline. + items: + properties: + conditions: + description: The current status conditions of the weigher. + items: + description: Condition contains details for one aspect of + the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, + Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + name: + description: The name of the weigher. + type: string + required: + - name + type: object + type: array type: object required: - spec diff --git a/cortex.secrets.example.yaml b/cortex.secrets.example.yaml index 0870f7208..daab61cc9 100644 --- a/cortex.secrets.example.yaml +++ b/cortex.secrets.example.yaml @@ -20,6 +20,10 @@ sharedSSOCert: &sharedSSOCert # If true, the certificate is not verified. selfSigned: "false" +# Enable kvm pipelines and scheduling support. +kvm: + enabled: true + prometheus: url: "https://path-to-your-prometheus" sso: diff --git a/dist/chart/templates/crd/cortex.cloud_pipelines.yaml b/dist/chart/templates/crd/cortex.cloud_pipelines.yaml index 83075a59c..2f68972d1 100644 --- a/dist/chart/templates/crd/cortex.cloud_pipelines.yaml +++ b/dist/chart/templates/crd/cortex.cloud_pipelines.yaml @@ -30,11 +30,11 @@ spec: - jsonPath: .spec.type name: Type type: string - - jsonPath: .status.stepsReadyFrac - name: Steps + - jsonPath: .status.conditions[?(@.type=='AllStepsReady')].status + name: All Steps Ready type: string - jsonPath: .status.conditions[?(@.type=='Ready')].status - name: Ready + name: Pipeline Ready type: string name: v1alpha1 schema: @@ -68,15 +68,16 @@ spec: When this is false, the pipeline will still process requests. type: boolean description: - description: An optional description of the pipeline. + description: An optional description of the pipeline, helping understand + its purpose. type: string - schedulingDomain: + detectors: description: |- - SchedulingDomain defines in which scheduling domain this pipeline - is used (e.g., nova, cinder, manila). - type: string - steps: - description: The ordered list of steps that make up this pipeline. + Ordered list of detectors to apply in a descheduling pipeline. + + This attribute is set only if the pipeline type is detector. + Detectors find candidates for descheduling (migration off current host). + These detectors are run after weighers are applied. items: properties: description: @@ -84,101 +85,99 @@ spec: Additional description of the step which helps understand its purpose and decisions made by it. type: string - impl: - description: The name of the scheduler step in the cortex implementation. + name: + description: |- + The name of the scheduler step in the cortex implementation. + Must match to a step implemented by the pipeline controller. type: string - knowledges: - description: Knowledges this step depends on to be ready. - items: - description: ObjectReference contains enough information to - let you inspect or modify the referred object. - properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic - type: array - mandatory: - default: true - description: Whether this step is mandatory for the pipeline - to be runnable. - type: boolean - opts: - description: Additional configuration for the extractor that - can be used + params: + description: Additional configuration for the step that can + be used type: object x-kubernetes-preserve-unknown-fields: true - type: - description: The type of the scheduler step. + required: + - name + type: object + type: array + filters: + description: |- + Ordered list of filters to apply in a scheduling pipeline. + + This attribute is set only if the pipeline type is filter-weigher. + Filters remove host candidates from an initial set, leaving + valid candidates. Filters are run before weighers are applied. + items: + properties: + description: + description: |- + Additional description of the step which helps understand its purpose + and decisions made by it. + type: string + name: + description: |- + The name of the scheduler step in the cortex implementation. + Must match to a step implemented by the pipeline controller. type: string - weigher: - description: If the type is "weigher", this contains additional - configuration for it. - properties: - disabledValidations: - description: |- - The validations to disable for this step. If none are provided, all - applied validations are enabled. - properties: - sameSubjectNumberInOut: - description: |- - Whether to validate that no subjects are removed or added from the scheduler - step. This should only be disabled for scheduler steps that remove subjects. - Thus, if no value is provided, the default is false. - type: boolean - someSubjectsRemain: - description: |- - Whether to validate that, after running the step, there are remaining subjects. - This should only be disabled for scheduler steps that are expected to - remove all subjects. - type: boolean - type: object + params: + description: Additional configuration for the step that can + be used type: object + x-kubernetes-preserve-unknown-fields: true required: - - impl - - mandatory - - type + - name type: object type: array + schedulingDomain: + description: |- + SchedulingDomain defines in which scheduling domain this pipeline + is used (e.g., nova, cinder, manila). + type: string type: - description: The type of the pipeline. + description: |- + The type of the pipeline, used to differentiate between + filter-weigher and detector pipelines within the same + scheduling domain. + + If the type is filter-weigher, the filter and weigher attributes + must be set. If the type is detector, the detectors attribute + must be set. + enum: + - filter-weigher + - detector type: string + weighers: + description: |- + Ordered list of weighers to apply in a scheduling pipeline. + + This attribute is set only if the pipeline type is filter-weigher. + These weighers are run after filters are applied. + items: + properties: + description: + description: |- + Additional description of the step which helps understand its purpose + and decisions made by it. + type: string + multiplier: + description: |- + Optional multiplier to apply to the step's output. + This can be used to increase or decrease the weight of a step + relative to other steps in the same pipeline. + type: number + name: + description: |- + The name of the scheduler step in the cortex implementation. + Must match to a step implemented by the pipeline controller. + type: string + params: + description: Additional configuration for the step that can + be used + type: object + x-kubernetes-preserve-unknown-fields: true + required: + - name + type: object + type: array required: - schedulingDomain - type @@ -243,20 +242,213 @@ spec: - type type: object type: array - readySteps: - description: The number of steps that are ready. - type: integer - stepsReadyFrac: - description: |- - An overview of the readiness of the steps in the pipeline. - Format: "ReadySteps / TotalSteps steps ready". - type: string - totalSteps: - description: The total number of steps configured in the pipeline. - type: integer - required: - - readySteps - - totalSteps + detectors: + description: List of statuses for each detector in the pipeline. + items: + properties: + conditions: + description: The current status conditions of the detector. + items: + description: Condition contains details for one aspect of + the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, + Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + name: + description: The name of the detector. + type: string + required: + - name + type: object + type: array + filters: + description: List of statuses for each filter in the pipeline. + items: + properties: + conditions: + description: The current status conditions of the filter. + items: + description: Condition contains details for one aspect of + the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, + Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + name: + description: The name of the filter. + type: string + required: + - name + type: object + type: array + weighers: + description: List of statuses for each weigher in the pipeline. + items: + properties: + conditions: + description: The current status conditions of the weigher. + items: + description: Condition contains details for one aspect of + the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, + Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + name: + description: The name of the weigher. + type: string + required: + - name + type: object + type: array type: object required: - spec diff --git a/helm/bundles/cortex-cinder/templates/pipelines.yaml b/helm/bundles/cortex-cinder/templates/pipelines.yaml index 1e876cff0..72c47b019 100644 --- a/helm/bundles/cortex-cinder/templates/pipelines.yaml +++ b/helm/bundles/cortex-cinder/templates/pipelines.yaml @@ -11,4 +11,5 @@ spec: for additional filtering and weighing via this external scheduler pipeline. Cortex returns a ranked list of hosts back to cinder for final selection. type: filter-weigher - steps: [] + filters: [] + weighers: [] \ No newline at end of file diff --git a/helm/bundles/cortex-ironcore/templates/pipelines.yaml b/helm/bundles/cortex-ironcore/templates/pipelines.yaml index 231e95e47..5598e60da 100644 --- a/helm/bundles/cortex-ironcore/templates/pipelines.yaml +++ b/helm/bundles/cortex-ironcore/templates/pipelines.yaml @@ -9,12 +9,10 @@ spec: This pipeline is used to schedule ironcore machines onto machinepools. type: filter-weigher createDecisions: true - steps: - - type: weigher - impl: noop + filters: [] + weighers: + - name: noop description: | This is only a passthrough step which assigns a zero-weight to all machinepool candidates. It is used as a placeholder step in the ironcore machines scheduler pipeline. - knowledges: [] - mandatory: false diff --git a/helm/bundles/cortex-manila/templates/pipelines.yaml b/helm/bundles/cortex-manila/templates/pipelines.yaml index aba1e5313..80f4999d3 100644 --- a/helm/bundles/cortex-manila/templates/pipelines.yaml +++ b/helm/bundles/cortex-manila/templates/pipelines.yaml @@ -11,15 +11,15 @@ spec: for additional filtering and weighing via this external scheduler pipeline. Cortex returns a ranked list of hosts back to manila for final selection. type: filter-weigher - steps: - - type: weigher - impl: netapp_cpu_usage_balancing + filters: [] + weighers: + - name: netapp_cpu_usage_balancing description: | This step uses netapp storage pool cpu metrics condensed into a feature to balance manila share placements across available storage pools. Its main purpose is to avoid cpu overutilization on a storage pool which may lead to performance degradation for shares placed on that pool. - opts: + params: # Min-max scaling for gap-fitting based on CPU usage (pct) avgCPUUsageLowerBound: 0 # pct avgCPUUsageUpperBound: 10 # pct @@ -29,4 +29,3 @@ spec: maxCPUUsageUpperBound: 10 # pct maxCPUUsageActivationLowerBound: 0.0 maxCPUUsageActivationUpperBound: -0.25 - mandatory: false diff --git a/helm/bundles/cortex-nova/templates/pipelines.yaml b/helm/bundles/cortex-nova/templates/pipelines.yaml index c3b13acc4..4c6148019 100644 --- a/helm/bundles/cortex-nova/templates/pipelines.yaml +++ b/helm/bundles/cortex-nova/templates/pipelines.yaml @@ -1,4 +1,3 @@ -{{- $createDecisions := .Values.pipelines.createDecisions | default false }} --- apiVersion: cortex.cloud/v1alpha1 kind: Pipeline @@ -14,47 +13,35 @@ spec: This is the pipeline used for VMware. type: filter-weigher createDecisions: false - steps: - - type: weigher - impl: vmware_hana_binpacking + filters: [] + weighers: + - name: vmware_hana_binpacking description: | This step pulls HANA VMs onto the smallest possible gaps on HANA-exclusive VMware hosts. In this way hosts with much free space are held free for larger HANA VMs, improving overall packing efficiency for HANA workloads. - knowledges: - - name: host-utilization - - name: host-capabilities - opts: + params: ramUtilizedAfterLowerBoundPct: 0 ramUtilizedAfterUpperBoundPct: 100 ramUtilizedAfterActivationLowerBound: 0.0 ramUtilizedAfterActivationUpperBound: 1.0 - mandatory: false - - type: weigher - impl: vmware_general_purpose_balancing + - name: vmware_general_purpose_balancing description: | This step balances non-HANA VMs across non-HANA exclusive VMware hosts. It pulls vms onto the freeest hosts possible to ensure an even distribution of workloads across the available infrastructure. - knowledges: - - name: host-utilization - - name: host-capabilities - opts: + params: ramUtilizedLowerBoundPct: 0 ramUtilizedUpperBoundPct: 100 ramUtilizedActivationLowerBound: 1.0 ramUtilizedActivationUpperBound: 0.0 - mandatory: false - - type: weigher - impl: vmware_avoid_long_term_contended_hosts + - name: vmware_avoid_long_term_contended_hosts description: | This step avoids placing vms on vmware hosts with a high CPU contention over a longer period of time, based on vrops contention metrics. In particular, this step looks at a longer time window of 4 weeks to identify hosts that are consistently contended. - knowledges: - - name: vmware-long-term-contended-hosts - opts: + params: avgCPUContentionLowerBound: 0 # pct avgCPUContentionUpperBound: 10 # pct avgCPUContentionActivationLowerBound: 0.0 @@ -63,17 +50,13 @@ spec: maxCPUContentionUpperBound: 10 # pct maxCPUContentionActivationLowerBound: 0.0 maxCPUContentionActivationUpperBound: -0.25 - mandatory: false - - type: weigher - impl: vmware_avoid_short_term_contended_hosts + - name: vmware_avoid_short_term_contended_hosts description: | This step avoids placing vms on vmware hosts with a high CPU contention over a shorter period of time, based on vrops contention metrics. In particular, this step looks at a shorter time window of 20 minutes to identify hosts that are currently contended. - knowledges: - - name: vmware-short-term-contended-hosts - opts: + params: avgCPUContentionLowerBound: 0 # pct avgCPUContentionUpperBound: 10 # pct avgCPUContentionActivationLowerBound: 0.0 @@ -82,4 +65,3 @@ spec: maxCPUContentionUpperBound: 10 # pct maxCPUContentionActivationLowerBound: 0.0 maxCPUContentionActivationUpperBound: -0.25 - mandatory: false diff --git a/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml b/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml index 0ba7926da..89214ae63 100644 --- a/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml +++ b/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml @@ -1,4 +1,3 @@ -{{- $createDecisions := .Values.pipelines.createDecisions | default false }} {{- if .Values.kvm.enabled }} --- apiVersion: cortex.cloud/v1alpha1 @@ -14,10 +13,9 @@ spec: Cortex returns a ranked list of hosts back to nova for final selection. This is the pipeline used for KVM hypervisors (qemu and cloud-hypervisor). type: filter-weigher - {{- if $createDecisions }} createDecisions: true - {{- end }} - steps: [] + filters: [] + weighers: [] --- apiVersion: cortex.cloud/v1alpha1 kind: Pipeline @@ -32,81 +30,67 @@ spec: cortex's weighing steps to provide an optimized host selection for the reservation. This is the pipeline used for KVM hypervisors (qemu and cloud-hypervisor). type: filter-weigher - {{- if $createDecisions }} createDecisions: true - {{- end }} - steps: - - type: filter - impl: filter_host_instructions + filters: + - name: filter_host_instructions description: | This step will consider the `ignore_hosts` and `force_hosts` instructions from the nova scheduler request spec to filter out or exclusively allow certain hosts. - knowledges: [] - - type: filter - impl: filter_has_enough_capacity + - name: filter_has_enough_capacity description: | This step will filter out hosts that do not have enough available capacity to host the requested flavor. If enabled, this step will subtract the current reservations residing on this host from the available capacity. - opts: + params: # If reserved space should be locked even for matching requests. # For the reservations pipeline, we don't want to unlock # reserved space, to avoid reservations for the same project # and flavor to overlap. lockReserved: true - - type: filter - impl: filter_has_requested_traits + - name: filter_has_requested_traits description: | This step filters hosts that do not have the requested traits given by the nova flavor extra spec: "trait:": "forbidden" means the host must not have the specified trait. "trait:": "required" means the host must have the specified trait. - - type: filter - impl: filter_has_accelerators + - name: filter_has_accelerators description: | This step will filter out hosts without the trait `COMPUTE_ACCELERATORS` if the nova flavor extra specs request accelerators via "accel:device_profile". - - type: filter - impl: filter_correct_az + - name: filter_correct_az description: | This step will filter out hosts whose aggregate information indicates they are not placed in the requested availability zone. - - type: filter - impl: filter_status_conditions + - name: filter_status_conditions description: | This step will filter out hosts for which the hypervisor status conditions do not meet the expected values, for example, that the hypervisor is ready and not disabled. - - type: filter - impl: filter_maintenance + - name: filter_maintenance description: | This step will filter out hosts that are currently in maintenance mode that prevents scheduling, for example, manual maintenance or termination. - - type: filter - impl: filter_external_customer + - name: filter_external_customer description: | This step prefix-matches the domain name for external customer domains and filters out hosts that are not intended for external customers. It considers the `CUSTOM_EXTERNAL_CUSTOMER_SUPPORTED` trait on hosts as well as the `domain_name` scheduler hint from the nova request spec. - opts: + params: domainNamePrefixes: ["iaas-"] - - type: filter - impl: filter_packed_virtqueue + - name: filter_packed_virtqueue description: | If the flavor extra specs contain the `hw:virtio_packed_ring` key, or the image properties contain the `hw_virtio_packed_ring` key, this step will filter out hosts that do not have the `COMPUTE_NET_VIRTIO_PACKED` trait. - - type: filter - impl: filter_allowed_projects + - name: filter_allowed_projects description: | This step filters hosts based on allowed projects defined in the hypervisor resource. Note that hosts allowing all projects are still accessible and will not be filtered out. In this way some hypervisors are made accessible to some projects only. - - type: filter - impl: filter_capabilities + - name: filter_capabilities description: | This step will filter out hosts that do not meet the compute capabilities requested by the nova flavor extra specs, like `{"arch": "x86_64", @@ -115,29 +99,26 @@ spec: Note: currently, advanced boolean/numeric operators for the capabilities like `>`, `!`, ... are not supported because they are not used by any of our flavors in production. - - type: filter - impl: filter_instance_group_affinity + - name: filter_instance_group_affinity description: | This step selects hosts in the instance group specified in the nova scheduler request spec. - - type: filter - impl: filter_instance_group_anti_affinity + - name: filter_instance_group_anti_affinity description: | This step selects hosts not in the instance group specified in the nova scheduler request spec, but only until the max_server_per_host limit is reached (default = 1). - - type: filter - impl: filter_live_migratable + - name: filter_live_migratable description: | This step ensures that the target host of a live migration can accept the migrating VM, by checking cpu architecture, cpu features, emulated devices, and cpu modes. - - type: filter - impl: filter_requested_destination + - name: filter_requested_destination description: | This step filters hosts based on the `requested_destination` instruction from the nova scheduler request spec. It supports filtering by host and by aggregates. + weighers: [] --- apiVersion: cortex.cloud/v1alpha1 kind: Pipeline @@ -149,19 +130,13 @@ spec: This pipeline runs steps that select virtual machines to deschedule from compute hosts in order to optimize resource usage and performance. This is the pipeline used for KVM hypervisors (qemu and cloud-hypervisor). - type: descheduler - {{- if $createDecisions }} + type: detector createDecisions: true - {{- end }} - steps: - - type: descheduler - impl: avoid_high_steal_pct + detectors: + - name: avoid_high_steal_pct description: | This step will deschedule VMs once they reach this CPU steal percentage over the observed time span. - knowledges: - - name: kvm-libvirt-domain-cpu-steal-pct - opts: + params: maxStealPctOverObservedTimeSpan: 20.0 - mandatory: false {{- end }} \ No newline at end of file diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml index a2aece78a..fcbce1cfd 100644 --- a/helm/bundles/cortex-nova/values.yaml +++ b/helm/bundles/cortex-nova/values.yaml @@ -62,10 +62,6 @@ openstack: enabled: false <<: *sharedSSOCert -pipelines: - # Use this flag to disable the creation of decisions across all pipelines. - createDecisions: false - kvm: # Use this flag to enable/disable KVM host related features. enabled: false diff --git a/helm/bundles/cortex-pods/templates/pipelines.yaml b/helm/bundles/cortex-pods/templates/pipelines.yaml index ed41ca3b2..0dd3babd0 100644 --- a/helm/bundles/cortex-pods/templates/pipelines.yaml +++ b/helm/bundles/cortex-pods/templates/pipelines.yaml @@ -9,25 +9,17 @@ spec: This pipeline is used to schedule pods onto nodes. type: filter-weigher createDecisions: true - steps: - - type: filter - impl: noop + filters: + - name: noop description: | This is only a passthrough step which lets all pod candidates through. It is used as a placeholder step in the pods scheduler pipeline. - knowledges: [] - mandatory: false - - type: filter - impl: taint + - name: taint description: | Filters nodes based on taints, excluding nodes with NoSchedule taints unless the pod has matching tolerations. - knowledges: [] - mandatory: true - - type: filter - impl: nodeaffinity + - name: nodeaffinity description: | Filters nodes based on pod's node affinity requirements, matching nodes that satisfy the specified label selectors. - knowledges: [] - mandatory: true + weighers: [] diff --git a/internal/knowledge/datasources/openstack/controller.go b/internal/knowledge/datasources/openstack/controller.go index 7daec347c..95227aa1b 100644 --- a/internal/knowledge/datasources/openstack/controller.go +++ b/internal/knowledge/datasources/openstack/controller.go @@ -60,7 +60,7 @@ func (r *OpenStackDatasourceReconciler) Reconcile(ctx context.Context, req ctrl. log.Info("skipping datasource, not an openstack datasource", "name", datasource.Name) return ctrl.Result{}, nil } - if datasource.Status.NextSyncTime.After(time.Now()) { + if datasource.Status.NextSyncTime.After(time.Now()) && datasource.Status.NumberOfObjects != 0 { log.Info("skipping datasource sync, not yet time", "name", datasource.Name) return ctrl.Result{RequeueAfter: time.Until(datasource.Status.NextSyncTime.Time)}, nil } diff --git a/internal/knowledge/datasources/prometheus/controller.go b/internal/knowledge/datasources/prometheus/controller.go index 089283d80..23bf6725c 100644 --- a/internal/knowledge/datasources/prometheus/controller.go +++ b/internal/knowledge/datasources/prometheus/controller.go @@ -52,7 +52,7 @@ func (r *PrometheusDatasourceReconciler) Reconcile(ctx context.Context, req ctrl log.Info("skipping datasource, not a prometheus datasource", "name", datasource.Name) return ctrl.Result{}, nil } - if datasource.Status.NextSyncTime.After(time.Now()) { + if datasource.Status.NextSyncTime.After(time.Now()) && datasource.Status.NumberOfObjects != 0 { log.Info("skipping datasource sync, not yet time", "name", datasource.Name) return ctrl.Result{RequeueAfter: time.Until(datasource.Status.NextSyncTime.Time)}, nil } diff --git a/internal/knowledge/extractor/controller.go b/internal/knowledge/extractor/controller.go index 527203ede..69c276080 100644 --- a/internal/knowledge/extractor/controller.go +++ b/internal/knowledge/extractor/controller.go @@ -47,7 +47,7 @@ func (r *KnowledgeReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // Sanity checks. lastExtracted := knowledge.Status.LastExtracted.Time recency := knowledge.Spec.Recency.Duration - if lastExtracted.Add(recency).After(time.Now()) { + if lastExtracted.Add(recency).After(time.Now()) && knowledge.Status.RawLength != 0 { log.Info("skipping knowledge extraction, not yet time", "name", knowledge.Name) return ctrl.Result{RequeueAfter: time.Until(lastExtracted.Add(recency))}, nil } diff --git a/internal/knowledge/extractor/controller_test.go b/internal/knowledge/extractor/controller_test.go index d58cc0702..09bf170d2 100644 --- a/internal/knowledge/extractor/controller_test.go +++ b/internal/knowledge/extractor/controller_test.go @@ -84,6 +84,7 @@ func TestKnowledgeReconciler_Reconcile_SkipRecentExtraction(t *testing.T) { }, Status: v1alpha1.KnowledgeStatus{ LastExtracted: metav1.NewTime(recentTime), + RawLength: 100, // Indicate that there is existing data }, } diff --git a/internal/scheduling/decisions/cinder/cleanup.go b/internal/scheduling/cinder/decisions_cleanup.go similarity index 97% rename from internal/scheduling/decisions/cinder/cleanup.go rename to internal/scheduling/cinder/decisions_cleanup.go index b25f66aa4..b4774a450 100644 --- a/internal/scheduling/decisions/cinder/cleanup.go +++ b/internal/scheduling/cinder/decisions_cleanup.go @@ -20,7 +20,7 @@ import ( ) // Delete all decisions for cinder volumes that have been deleted. -func Cleanup(ctx context.Context, client client.Client, conf conf.Config) error { +func DecisionsCleanup(ctx context.Context, client client.Client, conf conf.Config) error { var authenticatedHTTP = http.DefaultClient if conf.SSOSecretRef != nil { var err error diff --git a/internal/scheduling/decisions/cinder/cleanup_test.go b/internal/scheduling/cinder/decisions_cleanup_test.go similarity index 98% rename from internal/scheduling/decisions/cinder/cleanup_test.go rename to internal/scheduling/cinder/decisions_cleanup_test.go index 778f508e6..be3a9dbf5 100644 --- a/internal/scheduling/decisions/cinder/cleanup_test.go +++ b/internal/scheduling/cinder/decisions_cleanup_test.go @@ -289,7 +289,7 @@ func TestCleanupCinder(t *testing.T) { Namespace: "default", }, } - err := Cleanup(context.Background(), client, config) + err := DecisionsCleanup(context.Background(), client, config) if tt.expectError && err == nil { t.Error("Expected error but got none") @@ -377,7 +377,7 @@ func TestCleanupCinderDecisionsCancel(t *testing.T) { defer cancel() // This should exit quickly due to context cancellation - if err := Cleanup(ctx, client, config); err != nil { + if err := DecisionsCleanup(ctx, client, config); err != nil { if !errors.Is(err, context.DeadlineExceeded) { t.Errorf("Unexpected error during cleanup: %v", err) } diff --git a/internal/scheduling/e2e/cinder/checks.go b/internal/scheduling/cinder/e2e_checks.go similarity index 100% rename from internal/scheduling/e2e/cinder/checks.go rename to internal/scheduling/cinder/e2e_checks.go diff --git a/internal/scheduling/external/cinder/api.go b/internal/scheduling/cinder/external_scheduler_api.go similarity index 99% rename from internal/scheduling/external/cinder/api.go rename to internal/scheduling/cinder/external_scheduler_api.go index 00aa8becf..3df397a68 100644 --- a/internal/scheduling/external/cinder/api.go +++ b/internal/scheduling/cinder/external_scheduler_api.go @@ -1,7 +1,7 @@ // Copyright SAP SE // SPDX-License-Identifier: Apache-2.0 -package http +package cinder import ( "bytes" diff --git a/internal/scheduling/external/cinder/api_test.go b/internal/scheduling/cinder/external_scheduler_api_test.go similarity index 88% rename from internal/scheduling/external/cinder/api_test.go rename to internal/scheduling/cinder/external_scheduler_api_test.go index f6f6c253d..480a5a6d5 100644 --- a/internal/scheduling/external/cinder/api_test.go +++ b/internal/scheduling/cinder/external_scheduler_api_test.go @@ -1,7 +1,7 @@ // Copyright SAP SE // SPDX-License-Identifier: Apache-2.0 -package http +package cinder import ( "bytes" @@ -308,6 +308,55 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { } } +func TestHTTPAPI_inferPipelineName(t *testing.T) { + config := conf.Config{SchedulingDomain: "test-operator"} + delegate := &mockHTTPAPIDelegate{} + api := NewAPI(config, delegate).(*httpAPI) + + tests := []struct { + name string + request cinderapi.ExternalSchedulerRequest + expectedName string + expectError bool + }{ + { + name: "returns default pipeline name", + request: cinderapi.ExternalSchedulerRequest{ + Hosts: []cinderapi.ExternalSchedulerHost{ + {VolumeHost: "host1"}, + }, + Weights: map[string]float64{ + "host1": 1.0, + }, + }, + expectedName: "cinder-external-scheduler", + expectError: false, + }, + { + name: "returns default pipeline name for empty request", + request: cinderapi.ExternalSchedulerRequest{}, + expectedName: "cinder-external-scheduler", + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pipelineName, err := api.inferPipelineName(tt.request) + + if tt.expectError && err == nil { + t.Error("expected error, got nil") + } + if !tt.expectError && err != nil { + t.Errorf("expected no error, got %v", err) + } + if pipelineName != tt.expectedName { + t.Errorf("expected pipeline name %s, got %s", tt.expectedName, pipelineName) + } + }) + } +} + func TestHTTPAPI_CinderExternalScheduler_DecisionCreation(t *testing.T) { config := conf.Config{SchedulingDomain: v1alpha1.SchedulingDomainCinder} diff --git a/internal/scheduling/decisions/cinder/pipeline_controller.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go similarity index 85% rename from internal/scheduling/decisions/cinder/pipeline_controller.go rename to internal/scheduling/cinder/filter_weigher_pipeline_controller.go index 41d7d378e..b6c456b99 100644 --- a/internal/scheduling/decisions/cinder/pipeline_controller.go +++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go @@ -33,26 +33,26 @@ import ( // // Additionally, the controller watches for pipeline and step changes to // reconfigure the pipelines as needed. -type DecisionPipelineController struct { +type FilterWeigherPipelineController struct { // Toolbox shared between all pipeline controllers. - lib.BasePipelineController[lib.Pipeline[api.ExternalSchedulerRequest]] + lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]] // Mutex to only allow one process at a time processMu sync.Mutex // Monitor to pass down to all pipelines. - Monitor lib.PipelineMonitor + Monitor lib.FilterWeigherPipelineMonitor // Config for the scheduling operator. Conf conf.Config } // The type of pipeline this controller manages. -func (c *DecisionPipelineController) PipelineType() v1alpha1.PipelineType { +func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { return v1alpha1.PipelineTypeFilterWeigher } // Callback executed when kubernetes asks to reconcile a decision resource. -func (c *DecisionPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { c.processMu.Lock() defer c.processMu.Unlock() @@ -72,7 +72,7 @@ func (c *DecisionPipelineController) Reconcile(ctx context.Context, req ctrl.Req } // Process the decision from the API. Should create and return the updated decision. -func (c *DecisionPipelineController) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { +func (c *FilterWeigherPipelineController) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { c.processMu.Lock() defer c.processMu.Unlock() @@ -111,7 +111,7 @@ func (c *DecisionPipelineController) ProcessNewDecisionFromAPI(ctx context.Conte return err } -func (c *DecisionPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { +func (c *FilterWeigherPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { log := ctrl.LoggerFrom(ctx) startedAt := time.Now() // So we can measure sync duration. @@ -141,15 +141,20 @@ func (c *DecisionPipelineController) process(ctx context.Context, decision *v1al } // The base controller will delegate the pipeline creation down to this method. -func (c *DecisionPipelineController) InitPipeline( +func (c *FilterWeigherPipelineController) InitPipeline( ctx context.Context, p v1alpha1.Pipeline, -) (lib.Pipeline[api.ExternalSchedulerRequest], error) { - - return lib.NewPipeline(ctx, c.Client, p.Name, supportedSteps, p.Spec.Steps, c.Monitor) +) lib.PipelineInitResult[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]] { + + return lib.InitNewFilterWeigherPipeline( + ctx, c.Client, p.Name, + supportedFilters, p.Spec.Filters, + supportedWeighers, p.Spec.Weighers, + c.Monitor, + ) } -func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { +func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainCinder if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { diff --git a/internal/scheduling/decisions/cinder/pipeline_controller_test.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go similarity index 76% rename from internal/scheduling/decisions/cinder/pipeline_controller_test.go rename to internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go index 091958392..62ac788a9 100644 --- a/internal/scheduling/decisions/cinder/pipeline_controller_test.go +++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go @@ -23,7 +23,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -func TestDecisionPipelineController_Reconcile(t *testing.T) { +func TestFilterWeigherPipelineController_Reconcile(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add v1alpha1 scheme: %v", err) @@ -84,7 +84,8 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, expectError: false, @@ -112,7 +113,8 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, expectError: true, @@ -154,32 +156,33 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { WithStatusSubresource(&v1alpha1.Decision{}). Build() - controller := &DecisionPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.Pipeline[api.ExternalSchedulerRequest]]{ + controller := &FilterWeigherPipelineController{ + BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ Client: client, - Pipelines: make(map[string]lib.Pipeline[api.ExternalSchedulerRequest]), + Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), }, - Monitor: lib.PipelineMonitor{}, + Monitor: lib.FilterWeigherPipelineMonitor{}, Conf: conf.Config{ SchedulingDomain: v1alpha1.SchedulingDomainCinder, }, } if tt.pipeline != nil { - pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + initResult := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pipeline", }, Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }) - if err != nil { - t.Fatalf("Failed to init pipeline: %v", err) + if len(initResult.FilterErrors) > 0 || len(initResult.WeigherErrors) > 0 { + t.Fatalf("Failed to init pipeline: %v", initResult) } - controller.Pipelines[tt.pipeline.Name] = pipeline + controller.Pipelines[tt.pipeline.Name] = initResult.Pipeline } req := ctrl.Request{ @@ -217,7 +220,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { } } -func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { +func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add v1alpha1 scheme: %v", err) @@ -281,7 +284,8 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, CreateDecisions: true, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, createDecisions: true, @@ -314,7 +318,8 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, CreateDecisions: false, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, createDecisions: false, @@ -367,7 +372,8 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, CreateDecisions: true, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, createDecisions: true, @@ -390,13 +396,13 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { WithStatusSubresource(&v1alpha1.Decision{}). Build() - controller := &DecisionPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.Pipeline[api.ExternalSchedulerRequest]]{ + controller := &FilterWeigherPipelineController{ + BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ Client: client, - Pipelines: make(map[string]lib.Pipeline[api.ExternalSchedulerRequest]), + Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), PipelineConfigs: make(map[string]v1alpha1.Pipeline), }, - Monitor: lib.PipelineMonitor{}, + Monitor: lib.FilterWeigherPipelineMonitor{}, Conf: conf.Config{ SchedulingDomain: v1alpha1.SchedulingDomainCinder, }, @@ -404,11 +410,11 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { if tt.pipelineConfig != nil { controller.PipelineConfigs[tt.pipelineConfig.Name] = *tt.pipelineConfig - pipeline, err := controller.InitPipeline(t.Context(), *tt.pipelineConfig) - if err != nil { - t.Fatalf("Failed to init pipeline: %v", err) + initResult := controller.InitPipeline(t.Context(), *tt.pipelineConfig) + if len(initResult.FilterErrors) > 0 || len(initResult.WeigherErrors) > 0 { + t.Fatalf("Failed to init pipeline: %v", initResult) } - controller.Pipelines[tt.pipelineConfig.Name] = pipeline + controller.Pipelines[tt.pipelineConfig.Name] = initResult.Pipeline } err := controller.ProcessNewDecisionFromAPI(context.Background(), tt.decision) @@ -463,54 +469,73 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { } } -func TestDecisionPipelineController_InitPipeline(t *testing.T) { - controller := &DecisionPipelineController{ - Monitor: lib.PipelineMonitor{}, +func TestFilterWeigherPipelineController_PipelineType(t *testing.T) { + controller := &FilterWeigherPipelineController{} + + pipelineType := controller.PipelineType() + + if pipelineType != v1alpha1.PipelineTypeFilterWeigher { + t.Errorf("expected pipeline type %s, got %s", v1alpha1.PipelineTypeFilterWeigher, pipelineType) + } +} + +func TestFilterWeigherPipelineController_InitPipeline(t *testing.T) { + controller := &FilterWeigherPipelineController{ + Monitor: lib.FilterWeigherPipelineMonitor{}, } tests := []struct { - name string - steps []v1alpha1.StepSpec - expectError bool + name string + filters []v1alpha1.FilterSpec + weighers []v1alpha1.WeigherSpec + expectNonCriticalError bool + expectCriticalError bool }{ { - name: "empty steps", - steps: []v1alpha1.StepSpec{}, - expectError: false, + name: "empty steps", + filters: []v1alpha1.FilterSpec{}, + weighers: []v1alpha1.WeigherSpec{}, + expectNonCriticalError: false, + expectCriticalError: false, }, { name: "unsupported step", - steps: []v1alpha1.StepSpec{ + filters: []v1alpha1.FilterSpec{ { - Type: v1alpha1.StepTypeFilter, - Impl: "test-plugin", + Name: "test-plugin", }, }, - expectError: true, // Expected because test-plugin is not in supportedSteps + expectNonCriticalError: false, + expectCriticalError: true, // Expected because test-plugin is not in supportedSteps }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + initResult := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pipeline", }, Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, - Steps: tt.steps, + Filters: tt.filters, + Weighers: tt.weighers, }, }) - if tt.expectError && err == nil { + if tt.expectCriticalError && len(initResult.FilterErrors) == 0 { t.Error("Expected error but got none") } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) + if !tt.expectCriticalError && len(initResult.FilterErrors) > 0 { + t.Errorf("Expected no error but got: %v", initResult.FilterErrors) + } + + if tt.expectNonCriticalError && len(initResult.WeigherErrors) == 0 { + t.Error("Expected non-critical error but got none") } - if !tt.expectError && pipeline == nil { - t.Error("Expected pipeline but got nil") + if !tt.expectNonCriticalError && len(initResult.WeigherErrors) > 0 { + t.Errorf("Expected no non-critical error but got: %v", initResult.WeigherErrors) } }) } diff --git a/internal/scheduling/cinder/supported_filters.go b/internal/scheduling/cinder/supported_filters.go new file mode 100644 index 000000000..f751ee9dd --- /dev/null +++ b/internal/scheduling/cinder/supported_filters.go @@ -0,0 +1,14 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package cinder + +import ( + api "github.com/cobaltcore-dev/cortex/api/delegation/cinder" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" +) + +type CinderFilter = lib.Filter[api.ExternalSchedulerRequest] + +// Configuration of filters supported by the cinder scheduling. +var supportedFilters = map[string]func() CinderFilter{} diff --git a/internal/scheduling/cinder/supported_weighers.go b/internal/scheduling/cinder/supported_weighers.go new file mode 100644 index 000000000..cc45cf26e --- /dev/null +++ b/internal/scheduling/cinder/supported_weighers.go @@ -0,0 +1,14 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package cinder + +import ( + api "github.com/cobaltcore-dev/cortex/api/delegation/cinder" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" +) + +type CinderWeigher = lib.Weigher[api.ExternalSchedulerRequest] + +// Configuration of weighers supported by the cinder scheduling. +var supportedWeighers = map[string]func() CinderWeigher{} diff --git a/internal/scheduling/decisions/cinder/supported_steps.go b/internal/scheduling/decisions/cinder/supported_steps.go deleted file mode 100644 index 307d44a4d..000000000 --- a/internal/scheduling/decisions/cinder/supported_steps.go +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package cinder - -import ( - api "github.com/cobaltcore-dev/cortex/api/delegation/cinder" - "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" -) - -type CinderStep = lib.Step[api.ExternalSchedulerRequest] - -// Configuration of steps supported by the scheduler. -// The steps actually used by the scheduler are defined through the configuration file. -var supportedSteps = map[string]func() CinderStep{} diff --git a/internal/scheduling/decisions/machines/supported_steps.go b/internal/scheduling/decisions/machines/supported_steps.go deleted file mode 100644 index cfa9b13eb..000000000 --- a/internal/scheduling/decisions/machines/supported_steps.go +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package machines - -import ( - "github.com/cobaltcore-dev/cortex/api/delegation/ironcore" - "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" -) - -type MachineStep = lib.Step[ironcore.MachinePipelineRequest] - -// Configuration of steps supported by the scheduling. -// The steps actually used by the scheduler are defined through the configuration file. -var supportedSteps = map[string]func() MachineStep{ - "noop": func() MachineStep { return &NoopFilter{} }, -} diff --git a/internal/scheduling/decisions/manila/supported_steps.go b/internal/scheduling/decisions/manila/supported_steps.go deleted file mode 100644 index a9ec7ebd1..000000000 --- a/internal/scheduling/decisions/manila/supported_steps.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package manila - -import ( - api "github.com/cobaltcore-dev/cortex/api/delegation/manila" - "github.com/cobaltcore-dev/cortex/internal/scheduling/decisions/manila/plugins/weighers" - "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" -) - -type ManilaStep = lib.Step[api.ExternalSchedulerRequest] - -// Configuration of steps supported by the scheduling. -// The steps actually used by the scheduler are defined through the configuration file. -var supportedSteps = map[string]func() ManilaStep{ - "netapp_cpu_usage_balancing": func() ManilaStep { return &weighers.NetappCPUUsageBalancingStep{} }, -} diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_has_enough_capacity_test.go b/internal/scheduling/decisions/nova/plugins/filters/filter_has_enough_capacity_test.go deleted file mode 100644 index e1bdaaa84..000000000 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_has_enough_capacity_test.go +++ /dev/null @@ -1,807 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package filters - -import ( - "log/slog" - "testing" - - api "github.com/cobaltcore-dev/cortex/api/delegation/nova" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" - "k8s.io/apimachinery/pkg/api/resource" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -func TestFilterHasEnoughCapacity_Run(t *testing.T) { - scheme, err := hv1.SchemeBuilder.Build() - if err != nil { - t.Fatalf("expected no error building hypervisor scheme, got %v", err) - } - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("expected no error adding v1alpha1 to scheme, got %v", err) - } - - // Define hypervisors with various capacity configurations - hvs := []client.Object{ - &hv1.Hypervisor{ - ObjectMeta: v1.ObjectMeta{ - Name: "host1", - }, - Status: hv1.HypervisorStatus{ - Capacity: map[string]resource.Quantity{ - "cpu": resource.MustParse("32"), // 32 vCPUs - "memory": resource.MustParse("64Gi"), // 64 GiB = 68719476736 bytes - }, - Allocation: map[string]resource.Quantity{ - "cpu": resource.MustParse("8"), // 8 vCPUs used - "memory": resource.MustParse("16Gi"), // 16 GiB used - }, - }, - }, - &hv1.Hypervisor{ - ObjectMeta: v1.ObjectMeta{ - Name: "host2", - }, - Status: hv1.HypervisorStatus{ - Capacity: map[string]resource.Quantity{ - "cpu": resource.MustParse("16"), - "memory": resource.MustParse("32Gi"), - }, - Allocation: map[string]resource.Quantity{ - "cpu": resource.MustParse("14"), - "memory": resource.MustParse("28Gi"), - }, - }, - }, - &hv1.Hypervisor{ - ObjectMeta: v1.ObjectMeta{ - Name: "host3", - }, - Status: hv1.HypervisorStatus{ - Capacity: map[string]resource.Quantity{ - "cpu": resource.MustParse("64"), - "memory": resource.MustParse("128Gi"), - }, - Allocation: map[string]resource.Quantity{ - "cpu": resource.MustParse("0"), - "memory": resource.MustParse("0"), - }, - }, - }, - &hv1.Hypervisor{ - ObjectMeta: v1.ObjectMeta{ - Name: "host4", - }, - Status: hv1.HypervisorStatus{ - Capacity: map[string]resource.Quantity{ - "cpu": resource.MustParse("8"), - "memory": resource.MustParse("16Gi"), - }, - Allocation: map[string]resource.Quantity{ - "cpu": resource.MustParse("4"), - "memory": resource.MustParse("12Gi"), - }, - }, - }, - &hv1.Hypervisor{ - ObjectMeta: v1.ObjectMeta{ - Name: "host5", - }, - Status: hv1.HypervisorStatus{ - Capacity: map[string]resource.Quantity{ - "cpu": resource.MustParse("48"), - "memory": resource.MustParse("96Gi"), - }, - Allocation: map[string]resource.Quantity{ - "cpu": resource.MustParse("40"), - "memory": resource.MustParse("80Gi"), - }, - }, - }, - } - - tests := []struct { - name string - request api.ExternalSchedulerRequest - reservations []client.Object - opts FilterHasEnoughCapacityOpts - expectedHosts []string - filteredHosts []string - expectError bool - }{ - { - name: "Single instance with sufficient capacity on all hosts", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - NumInstances: 1, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.small", - VCPUs: 2, - MemoryMB: 2048, // 2 GB - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - {ComputeHost: "host2"}, - {ComputeHost: "host3"}, - {ComputeHost: "host4"}, - {ComputeHost: "host5"}, - }, - }, - expectedHosts: []string{"host1", "host2", "host3", "host4", "host5"}, - filteredHosts: []string{}, - }, - { - name: "Single instance - filter host with insufficient CPU", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - NumInstances: 1, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.large", - VCPUs: 4, - MemoryMB: 4096, - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - {ComputeHost: "host2"}, - {ComputeHost: "host4"}, - }, - }, - expectedHosts: []string{"host1", "host4"}, - filteredHosts: []string{"host2"}, - }, - { - name: "Single instance - filter host with insufficient memory", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - NumInstances: 1, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.xlarge", - VCPUs: 2, - MemoryMB: 20480, // 20 GB - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - {ComputeHost: "host2"}, - {ComputeHost: "host5"}, - }, - }, - expectedHosts: []string{"host1"}, - filteredHosts: []string{"host2", "host5"}, - }, - { - name: "Multiple instances - require capacity for all on same host", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - NumInstances: 3, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.medium", - VCPUs: 4, - MemoryMB: 8192, // 8 GB - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - {ComputeHost: "host3"}, - {ComputeHost: "host5"}, - }, - }, - expectedHosts: []string{"host1", "host3"}, - filteredHosts: []string{"host5"}, - }, - { - name: "No hosts have sufficient capacity", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - NumInstances: 1, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.huge", - VCPUs: 32, - MemoryMB: 65536, // 64 GB - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - {ComputeHost: "host2"}, - {ComputeHost: "host4"}, - }, - }, - expectedHosts: []string{}, - filteredHosts: []string{"host1", "host2", "host4"}, - }, - { - name: "Active reservation locks resources - filter host", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - ProjectID: "project-1", - NumInstances: 1, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.small", - VCPUs: 2, - MemoryMB: 2048, - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host4"}, - }, - }, - reservations: []client.Object{ - &v1alpha1.Reservation{ - ObjectMeta: v1.ObjectMeta{ - Name: "reservation-1", - }, - Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{ - ProjectID: "project-2", - FlavorName: "m1.medium", - }, - }, - Requests: map[string]resource.Quantity{ - "cpu": resource.MustParse("2"), - "memory": resource.MustParse("2Gi"), - }, - }, - Status: v1alpha1.ReservationStatus{ - Phase: v1alpha1.ReservationStatusPhaseActive, - Host: "host4", - }, - }, - }, - expectedHosts: []string{"host4"}, - filteredHosts: []string{}, - }, - { - name: "Matching reservation unlocks resources - host passes", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - ProjectID: "project-1", - NumInstances: 1, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.small", - VCPUs: 2, - MemoryMB: 2048, - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host4"}, - }, - }, - reservations: []client.Object{ - &v1alpha1.Reservation{ - ObjectMeta: v1.ObjectMeta{ - Name: "reservation-matching", - }, - Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{ - ProjectID: "project-1", - FlavorName: "m1.small", - }, - }, - Requests: map[string]resource.Quantity{ - "cpu": resource.MustParse("2"), - "memory": resource.MustParse("2Gi"), - }, - }, - Status: v1alpha1.ReservationStatus{ - Phase: v1alpha1.ReservationStatusPhaseActive, - Host: "host4", - }, - }, - }, - opts: FilterHasEnoughCapacityOpts{LockReserved: false}, - expectedHosts: []string{"host4"}, - filteredHosts: []string{}, - }, - { - name: "LockReserved option - matching reservation still locks resources", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - ProjectID: "project-1", - NumInstances: 1, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.small", - VCPUs: 2, - MemoryMB: 2048, - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host4"}, - }, - }, - reservations: []client.Object{ - &v1alpha1.Reservation{ - ObjectMeta: v1.ObjectMeta{ - Name: "reservation-locked", - }, - Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{ - ProjectID: "project-1", - FlavorName: "m1.small", - }, - }, - Requests: map[string]resource.Quantity{ - "cpu": resource.MustParse("2"), - "memory": resource.MustParse("2Gi"), - }, - }, - Status: v1alpha1.ReservationStatus{ - Phase: v1alpha1.ReservationStatusPhaseActive, - Host: "host4", - }, - }, - }, - opts: FilterHasEnoughCapacityOpts{LockReserved: true}, - expectedHosts: []string{"host4"}, - filteredHosts: []string{}, - }, - { - name: "Inactive reservation does not affect capacity", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - ProjectID: "project-1", - NumInstances: 1, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.small", - VCPUs: 2, - MemoryMB: 2048, - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host4"}, - }, - }, - reservations: []client.Object{ - &v1alpha1.Reservation{ - ObjectMeta: v1.ObjectMeta{ - Name: "reservation-inactive", - }, - Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{ - ProjectID: "project-2", - FlavorName: "m1.medium", - }, - }, - Requests: map[string]resource.Quantity{ - "cpu": resource.MustParse("2"), - "memory": resource.MustParse("2Gi"), - }, - }, - Status: v1alpha1.ReservationStatus{ - Phase: v1alpha1.ReservationStatusPhaseFailed, - Host: "host4", - }, - }, - }, - expectedHosts: []string{"host4"}, - filteredHosts: []string{}, - }, - { - name: "Reservation without CortexNova scheduler is ignored", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - ProjectID: "project-1", - NumInstances: 1, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.small", - VCPUs: 2, - MemoryMB: 2048, - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host4"}, - }, - }, - reservations: []client.Object{ - &v1alpha1.Reservation{ - ObjectMeta: v1.ObjectMeta{ - Name: "reservation-other-scheduler", - }, - Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: nil, - }, - Requests: map[string]resource.Quantity{ - "cpu": resource.MustParse("2"), - "memory": resource.MustParse("2Gi"), - }, - }, - Status: v1alpha1.ReservationStatus{ - Phase: v1alpha1.ReservationStatusPhaseActive, - Host: "host4", - }, - }, - }, - expectedHosts: []string{"host4"}, - filteredHosts: []string{}, - }, - { - name: "Multiple reservations on different hosts", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - ProjectID: "project-1", - NumInstances: 1, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.small", - VCPUs: 2, - MemoryMB: 2048, - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host2"}, - {ComputeHost: "host4"}, - {ComputeHost: "host5"}, - }, - }, - reservations: []client.Object{ - &v1alpha1.Reservation{ - ObjectMeta: v1.ObjectMeta{ - Name: "reservation-host2", - }, - Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{ - ProjectID: "project-2", - FlavorName: "m1.medium", - }, - }, - Requests: map[string]resource.Quantity{ - "cpu": resource.MustParse("2"), - "memory": resource.MustParse("4Gi"), - }, - }, - Status: v1alpha1.ReservationStatus{ - Phase: v1alpha1.ReservationStatusPhaseActive, - Host: "host2", - }, - }, - &v1alpha1.Reservation{ - ObjectMeta: v1.ObjectMeta{ - Name: "reservation-host5", - }, - Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{ - ProjectID: "project-3", - FlavorName: "m1.large", - }, - }, - Requests: map[string]resource.Quantity{ - "cpu": resource.MustParse("4"), - "memory": resource.MustParse("8Gi"), - }, - }, - Status: v1alpha1.ReservationStatus{ - Phase: v1alpha1.ReservationStatusPhaseActive, - Host: "host5", - }, - }, - }, - expectedHosts: []string{"host4", "host5"}, - filteredHosts: []string{"host2"}, - }, - { - name: "Empty host list", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - NumInstances: 1, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.small", - VCPUs: 2, - MemoryMB: 2048, - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{}, - }, - expectedHosts: []string{}, - filteredHosts: []string{}, - }, - { - name: "Host not in database is filtered out", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - NumInstances: 1, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.small", - VCPUs: 2, - MemoryMB: 2048, - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - {ComputeHost: "host-unknown"}, - }, - }, - expectedHosts: []string{"host1"}, - filteredHosts: []string{"host-unknown"}, - }, - { - name: "Large number of instances - edge case", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - NumInstances: 10, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.tiny", - VCPUs: 1, - MemoryMB: 512, - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - {ComputeHost: "host3"}, - }, - }, - expectedHosts: []string{"host1", "host3"}, - filteredHosts: []string{}, - }, - { - name: "Flavor with zero VCPUs - error case", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - NumInstances: 1, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "invalid-flavor", - VCPUs: 0, - MemoryMB: 2048, - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - }, - }, - expectError: true, - }, - { - name: "Flavor with zero memory - error case", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - NumInstances: 1, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "invalid-flavor", - VCPUs: 2, - MemoryMB: 0, - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - }, - }, - expectError: true, - }, - { - name: "Memory boundary - exactly enough memory", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - NumInstances: 1, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.exact", - VCPUs: 2, - MemoryMB: 4096, // Exactly 4 GB - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host4"}, // Has 4 GB free (16-12) - }, - }, - expectedHosts: []string{"host4"}, - filteredHosts: []string{}, - }, - { - name: "CPU boundary - exactly enough CPU", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - NumInstances: 1, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.exact-cpu", - VCPUs: 2, // Exactly 2 vCPUs - MemoryMB: 1024, - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host2"}, // Has 2 vCPUs free (16-14) - }, - }, - expectedHosts: []string{"host2"}, - filteredHosts: []string{}, - }, - { - name: "Complex scenario with multiple hosts and reservations", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - ProjectID: "project-test", - NumInstances: 2, - Flavor: api.NovaObject[api.NovaFlavor]{ - Data: api.NovaFlavor{ - Name: "m1.test", - VCPUs: 4, - MemoryMB: 8192, - }, - }, - }, - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - {ComputeHost: "host3"}, - {ComputeHost: "host5"}, - }, - }, - reservations: []client.Object{ - &v1alpha1.Reservation{ - ObjectMeta: v1.ObjectMeta{ - Name: "reservation-host1-matching", - }, - Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{ - ProjectID: "project-test", - FlavorName: "m1.test", - }, - }, - Requests: map[string]resource.Quantity{ - "cpu": resource.MustParse("8"), - "memory": resource.MustParse("16Gi"), - }, - }, - Status: v1alpha1.ReservationStatus{ - Phase: v1alpha1.ReservationStatusPhaseActive, - Host: "host1", - }, - }, - &v1alpha1.Reservation{ - ObjectMeta: v1.ObjectMeta{ - Name: "reservation-host5-nonmatching", - }, - Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{ - ProjectID: "project-other", - FlavorName: "m1.other", - }, - }, - Requests: map[string]resource.Quantity{ - "cpu": resource.MustParse("4"), - "memory": resource.MustParse("8Gi"), - }, - }, - Status: v1alpha1.ReservationStatus{ - Phase: v1alpha1.ReservationStatusPhaseActive, - Host: "host5", - }, - }, - }, - opts: FilterHasEnoughCapacityOpts{LockReserved: false}, - expectedHosts: []string{"host1", "host3"}, - filteredHosts: []string{"host5"}, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - // Build the fake client with hypervisors and reservations - objects := make([]client.Object, 0, len(hvs)+len(tt.reservations)) - objects = append(objects, hvs...) - objects = append(objects, tt.reservations...) - - step := &FilterHasEnoughCapacity{} - step.Options = tt.opts - step.Client = fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(objects...). - Build() - - result, err := step.Run(slog.Default(), tt.request) - - if tt.expectError { - if err == nil { - t.Fatalf("expected error, got nil") - } - return - } - - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - - // Check expected hosts are present - for _, host := range tt.expectedHosts { - if _, ok := result.Activations[host]; !ok { - t.Errorf("expected host %s to be present in activations", host) - } - } - - // Check filtered hosts are not present - for _, host := range tt.filteredHosts { - if _, ok := result.Activations[host]; ok { - t.Errorf("expected host %s to be filtered out", host) - } - } - - // Check total count - if len(result.Activations) != len(tt.expectedHosts) { - t.Errorf("expected %d hosts, got %d", len(tt.expectedHosts), len(result.Activations)) - } - }) - } -} diff --git a/internal/scheduling/decisions/nova/plugins/weighers/vmware_anti_affinity_noisy_projects_test.go b/internal/scheduling/decisions/nova/plugins/weighers/vmware_anti_affinity_noisy_projects_test.go deleted file mode 100644 index af653fd7a..000000000 --- a/internal/scheduling/decisions/nova/plugins/weighers/vmware_anti_affinity_noisy_projects_test.go +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package weighers - -import ( - "log/slog" - "testing" - - api "github.com/cobaltcore-dev/cortex/api/delegation/nova" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -func TestVMwareAntiAffinityNoisyProjectsStep_Run(t *testing.T) { - scheme, err := v1alpha1.SchemeBuilder.Build() - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - - vropsProjectNoisiness, err := v1alpha1.BoxFeatureList([]any{ - &compute.VROpsProjectNoisiness{Project: "project1", ComputeHost: "host1", AvgCPUOfProject: 25.0}, - &compute.VROpsProjectNoisiness{Project: "project1", ComputeHost: "host2", AvgCPUOfProject: 30.0}, - &compute.VROpsProjectNoisiness{Project: "project2", ComputeHost: "host3", AvgCPUOfProject: 15.0}, - }) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - - step := &VMwareAntiAffinityNoisyProjectsStep{} - step.Options.AvgCPUUsageLowerBound = 20.0 - step.Options.AvgCPUUsageUpperBound = 100.0 - step.Options.AvgCPUUsageActivationLowerBound = 0.0 - step.Options.AvgCPUUsageActivationUpperBound = -0.5 - step.Client = fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(&v1alpha1.Knowledge{ - ObjectMeta: v1.ObjectMeta{Name: "vmware-project-noisiness"}, - Status: v1alpha1.KnowledgeStatus{Raw: vropsProjectNoisiness}, - }). - Build() - - tests := []struct { - name string - request api.ExternalSchedulerRequest - downvotedHosts map[string]struct{} - }{ - { - name: "Noisy project", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - ProjectID: "project1", - }, - }, - VMware: true, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - {ComputeHost: "host2"}, - {ComputeHost: "host3"}, - }, - }, - downvotedHosts: map[string]struct{}{ - "host1": {}, - "host2": {}, - }, - }, - { - name: "Non-noisy project", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - ProjectID: "project2", - }, - }, - VMware: true, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - {ComputeHost: "host2"}, - {ComputeHost: "host3"}, - }, - }, - downvotedHosts: map[string]struct{}{}, - }, - { - name: "No noisy project data", - request: api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Data: api.NovaSpec{ - ProjectID: "project3", - }, - }, - VMware: true, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - {ComputeHost: "host2"}, - {ComputeHost: "host3"}, - }, - }, - downvotedHosts: map[string]struct{}{}, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result, err := step.Run(slog.Default(), tt.request) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - // Check that the weights have decreased - for host, weight := range result.Activations { - if _, ok := tt.downvotedHosts[host]; ok { - if weight >= 0 { - t.Errorf("expected weight for host %s to be less than 0, got %f", host, weight) - } - } else { - if weight != 0 { - t.Errorf("expected weight for host %s to be 0, got %f", host, weight) - } - } - } - }) - } -} diff --git a/internal/scheduling/decisions/nova/plugins/weighers/vmware_avoid_long_term_contended_hosts_test.go b/internal/scheduling/decisions/nova/plugins/weighers/vmware_avoid_long_term_contended_hosts_test.go deleted file mode 100644 index 72a7378a4..000000000 --- a/internal/scheduling/decisions/nova/plugins/weighers/vmware_avoid_long_term_contended_hosts_test.go +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package weighers - -import ( - "log/slog" - "testing" - - api "github.com/cobaltcore-dev/cortex/api/delegation/nova" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -func TestVMwareAvoidLongTermContendedHostsStep_Run(t *testing.T) { - scheme, err := v1alpha1.SchemeBuilder.Build() - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - - vropsHostsystemContentionLongTerm, err := v1alpha1.BoxFeatureList([]any{ - &compute.VROpsHostsystemContentionLongTerm{ComputeHost: "host1", AvgCPUContention: 0.0, MaxCPUContention: 0.0}, - &compute.VROpsHostsystemContentionLongTerm{ComputeHost: "host2", AvgCPUContention: 100.0, MaxCPUContention: 0.0}, - &compute.VROpsHostsystemContentionLongTerm{ComputeHost: "host3", AvgCPUContention: 0.0, MaxCPUContention: 100.0}, - &compute.VROpsHostsystemContentionLongTerm{ComputeHost: "host4", AvgCPUContention: 100.0, MaxCPUContention: 100.0}, - }) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - - // Create an instance of the step - step := &VMwareAvoidLongTermContendedHostsStep{} - step.Options.AvgCPUContentionLowerBound = 0 - step.Options.AvgCPUContentionUpperBound = 100 - step.Options.AvgCPUContentionActivationLowerBound = 0.0 - step.Options.AvgCPUContentionActivationUpperBound = -1.0 - step.Options.MaxCPUContentionLowerBound = 0 - step.Options.MaxCPUContentionUpperBound = 100 - step.Options.MaxCPUContentionActivationLowerBound = 0.0 - step.Options.MaxCPUContentionActivationUpperBound = -1.0 - step.Client = fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(&v1alpha1.Knowledge{ - ObjectMeta: v1.ObjectMeta{Name: "vmware-long-term-contended-hosts"}, - Status: v1alpha1.KnowledgeStatus{Raw: vropsHostsystemContentionLongTerm}, - }). - Build() - - tests := []struct { - name string - request api.ExternalSchedulerRequest - expected map[string]float64 - }{ - { - name: "Avoid contended hosts", - request: api.ExternalSchedulerRequest{ - VMware: true, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - {ComputeHost: "host2"}, - {ComputeHost: "host3"}, - {ComputeHost: "host4"}, - }, - }, - expected: map[string]float64{ - "host1": 0, - "host2": -1, - "host3": -1, - "host4": -2, // Max and avg contention stack up. - }, - }, - { - name: "Missing data", - request: api.ExternalSchedulerRequest{ - VMware: true, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host4"}, - {ComputeHost: "host5"}, - }, - }, - expected: map[string]float64{ - "host4": -2, - "host5": 0, // No data but still contained in the result. - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result, err := step.Run(slog.Default(), tt.request) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - // Check that the weights have decreased - for host, weight := range result.Activations { - expected := tt.expected[host] - if weight != expected { - t.Errorf("expected weight for host %s to be %f, got %f", host, expected, weight) - } - } - }) - } -} diff --git a/internal/scheduling/decisions/nova/plugins/weighers/vmware_avoid_short_term_contended_hosts_test.go b/internal/scheduling/decisions/nova/plugins/weighers/vmware_avoid_short_term_contended_hosts_test.go deleted file mode 100644 index 25cbded43..000000000 --- a/internal/scheduling/decisions/nova/plugins/weighers/vmware_avoid_short_term_contended_hosts_test.go +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package weighers - -import ( - "log/slog" - "testing" - - api "github.com/cobaltcore-dev/cortex/api/delegation/nova" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -func TestVMwareAvoidShortTermContendedHostsStep_Run(t *testing.T) { - scheme, err := v1alpha1.SchemeBuilder.Build() - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - - vropsHostsystemContentionShortTerm, err := v1alpha1.BoxFeatureList([]any{ - &compute.VROpsHostsystemContentionShortTerm{ComputeHost: "host1", AvgCPUContention: 0.0, MaxCPUContention: 0.0}, - &compute.VROpsHostsystemContentionShortTerm{ComputeHost: "host2", AvgCPUContention: 100.0, MaxCPUContention: 0.0}, - &compute.VROpsHostsystemContentionShortTerm{ComputeHost: "host3", AvgCPUContention: 0.0, MaxCPUContention: 100.0}, - &compute.VROpsHostsystemContentionShortTerm{ComputeHost: "host4", AvgCPUContention: 100.0, MaxCPUContention: 100.0}, - }) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - - // Create an instance of the step - step := &VMwareAvoidShortTermContendedHostsStep{} - step.Options.AvgCPUContentionLowerBound = 0 - step.Options.AvgCPUContentionUpperBound = 100 - step.Options.AvgCPUContentionActivationLowerBound = 0.0 - step.Options.AvgCPUContentionActivationUpperBound = -1.0 - step.Options.MaxCPUContentionLowerBound = 0 - step.Options.MaxCPUContentionUpperBound = 100 - step.Options.MaxCPUContentionActivationLowerBound = 0.0 - step.Options.MaxCPUContentionActivationUpperBound = -1.0 - step.Client = fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(&v1alpha1.Knowledge{ - ObjectMeta: v1.ObjectMeta{Name: "vmware-short-term-contended-hosts"}, - Status: v1alpha1.KnowledgeStatus{Raw: vropsHostsystemContentionShortTerm}, - }). - Build() - - tests := []struct { - name string - request api.ExternalSchedulerRequest - expected map[string]float64 - }{ - { - name: "Avoid contended hosts", - request: api.ExternalSchedulerRequest{ - VMware: true, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - {ComputeHost: "host2"}, - {ComputeHost: "host3"}, - {ComputeHost: "host4"}, - }, - }, - expected: map[string]float64{ - "host1": 0, - "host2": -1, - "host3": -1, - "host4": -2, // Max and avg contention stack up. - }, - }, - { - name: "Missing data", - request: api.ExternalSchedulerRequest{ - VMware: true, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "host4"}, - {ComputeHost: "host5"}, // No data for host5 - }, - }, - expected: map[string]float64{ - "host4": -2, - "host5": 0, // No data but still contained in the result. - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result, err := step.Run(slog.Default(), tt.request) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - // Check that the weights have decreased - for host, weight := range result.Activations { - expected := tt.expected[host] - if weight != expected { - t.Errorf("expected weight for host %s to be %f, got %f", host, expected, weight) - } - } - }) - } -} diff --git a/internal/scheduling/decisions/nova/supported_steps.go b/internal/scheduling/decisions/nova/supported_steps.go deleted file mode 100644 index 3a223f5fc..000000000 --- a/internal/scheduling/decisions/nova/supported_steps.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package nova - -import ( - api "github.com/cobaltcore-dev/cortex/api/delegation/nova" - "github.com/cobaltcore-dev/cortex/internal/scheduling/decisions/nova/plugins/filters" - "github.com/cobaltcore-dev/cortex/internal/scheduling/decisions/nova/plugins/weighers" - "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" -) - -type NovaStep = lib.Step[api.ExternalSchedulerRequest] - -// Configuration of steps supported by the scheduling. -// The steps actually used by the scheduler are defined through the configuration file. -var supportedSteps = map[string]func() NovaStep{ - "vmware_anti_affinity_noisy_projects": func() NovaStep { return &weighers.VMwareAntiAffinityNoisyProjectsStep{} }, - "vmware_avoid_long_term_contended_hosts": func() NovaStep { return &weighers.VMwareAvoidLongTermContendedHostsStep{} }, - "vmware_avoid_short_term_contended_hosts": func() NovaStep { return &weighers.VMwareAvoidShortTermContendedHostsStep{} }, - "vmware_hana_binpacking": func() NovaStep { return &weighers.VMwareHanaBinpackingStep{} }, - "vmware_general_purpose_balancing": func() NovaStep { return &weighers.VMwareGeneralPurposeBalancingStep{} }, - "filter_has_accelerators": func() NovaStep { return &filters.FilterHasAcceleratorsStep{} }, - "filter_correct_az": func() NovaStep { return &filters.FilterCorrectAZStep{} }, - "filter_status_conditions": func() NovaStep { return &filters.FilterStatusConditionsStep{} }, - "filter_maintenance": func() NovaStep { return &filters.FilterMaintenanceStep{} }, - "filter_packed_virtqueue": func() NovaStep { return &filters.FilterPackedVirtqueueStep{} }, - "filter_external_customer": func() NovaStep { return &filters.FilterExternalCustomerStep{} }, - "filter_allowed_projects": func() NovaStep { return &filters.FilterAllowedProjectsStep{} }, - "filter_capabilities": func() NovaStep { return &filters.FilterCapabilitiesStep{} }, - "filter_has_requested_traits": func() NovaStep { return &filters.FilterHasRequestedTraits{} }, - "filter_has_enough_capacity": func() NovaStep { return &filters.FilterHasEnoughCapacity{} }, - "filter_host_instructions": func() NovaStep { return &filters.FilterHostInstructionsStep{} }, - "filter_instance_group_affinity": func() NovaStep { return &filters.FilterInstanceGroupAffinityStep{} }, - "filter_instance_group_anti_affinity": func() NovaStep { return &filters.FilterInstanceGroupAntiAffinityStep{} }, - "filter_live_migratable": func() NovaStep { return &filters.FilterLiveMigratableStep{} }, - "filter_requested_destination": func() NovaStep { return &filters.FilterRequestedDestinationStep{} }, -} diff --git a/internal/scheduling/decisions/pods/supported_steps.go b/internal/scheduling/decisions/pods/supported_steps.go deleted file mode 100644 index ea0017c53..000000000 --- a/internal/scheduling/decisions/pods/supported_steps.go +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package pods - -import ( - "github.com/cobaltcore-dev/cortex/api/delegation/pods" - "github.com/cobaltcore-dev/cortex/internal/scheduling/decisions/pods/plugins/filters" - "github.com/cobaltcore-dev/cortex/internal/scheduling/decisions/pods/plugins/weighers" - "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" -) - -type PodStep = lib.Step[pods.PodPipelineRequest] - -// Configuration of steps supported by the scheduling. -// The steps actually used by the scheduler are defined through the configuration file. -var supportedSteps = map[string]func() PodStep{ - "noop": func() PodStep { return &filters.NoopFilter{} }, - "taint": func() PodStep { return &filters.TaintFilter{} }, - "nodeaffinity": func() PodStep { return &filters.NodeAffinityFilter{} }, - "nodecapacity": func() PodStep { return &filters.NodeCapacityFilter{} }, - "binpack": func() PodStep { return &weighers.BinpackingStep{} }, -} diff --git a/internal/scheduling/descheduling/nova/pipeline.go b/internal/scheduling/descheduling/nova/pipeline.go deleted file mode 100644 index 838620d58..000000000 --- a/internal/scheduling/descheduling/nova/pipeline.go +++ /dev/null @@ -1,185 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package nova - -import ( - "context" - "errors" - "log/slog" - "slices" - "strings" - "sync" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova/plugins" - "github.com/prometheus/client_golang/prometheus" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -type Pipeline struct { - // Kubernetes client to create descheduling resources. - client.Client - // Cycle detector to avoid cycles in descheduling. - CycleDetector CycleDetector - // Monitor to use for tracking the pipeline. - Monitor Monitor - - // The order in which scheduler steps are applied, by their step name. - order []string - // The steps by their name. - steps map[string]Step -} - -func (p *Pipeline) Init( - ctx context.Context, - confedSteps []v1alpha1.StepSpec, - supportedSteps map[string]Step, -) error { - - p.order = []string{} - // Load all steps from the configuration. - p.steps = make(map[string]Step, len(confedSteps)) - for _, stepConf := range confedSteps { - step, ok := supportedSteps[stepConf.Impl] - if !ok { - return errors.New("descheduler: unsupported step: " + stepConf.Impl) - } - step = monitorStep(step, stepConf, p.Monitor) - if err := step.Init(ctx, p.Client, stepConf); err != nil { - return err - } - p.steps[stepConf.Impl] = step - p.order = append(p.order, stepConf.Impl) - slog.Info("descheduler: added step", "name", stepConf.Impl) - } - return nil -} - -// Execute the descheduler steps in parallel and collect the decisions made by -// each step. -func (p *Pipeline) run() map[string][]plugins.Decision { - if p.Monitor.pipelineRunTimer != nil { - timer := prometheus.NewTimer(p.Monitor.pipelineRunTimer) - defer timer.ObserveDuration() - } - var lock sync.Mutex - decisionsByStep := map[string][]plugins.Decision{} - var wg sync.WaitGroup - for stepName, step := range p.steps { - wg.Go(func() { - slog.Info("descheduler: running step") - decisions, err := step.Run() - if errors.Is(err, ErrStepSkipped) { - slog.Info("descheduler: step skipped") - return - } - if err != nil { - slog.Error("descheduler: failed to run step", "error", err) - return - } - slog.Info("descheduler: finished step") - lock.Lock() - defer lock.Unlock() - decisionsByStep[stepName] = decisions - }) - } - wg.Wait() - return decisionsByStep -} - -// Combine the decisions made by each step into a single list of vms to deschedule. -func (p *Pipeline) combine(decisionsByStep map[string][]plugins.Decision) []plugins.Decision { - // Order the step names to have a consistent order of processing. - stepNames := make([]string, 0, len(decisionsByStep)) - for stepName := range decisionsByStep { - stepNames = append(stepNames, stepName) - } - slices.Sort(stepNames) - // If there are more than one decision for the same vm, we need to combine them. - decisionsByVMID := make(map[string][]plugins.Decision) - for _, stepName := range stepNames { - decisions := decisionsByStep[stepName] - for _, decision := range decisions { - decisionsByVMID[decision.VMID] = append(decisionsByVMID[decision.VMID], decision) - } - } - - combinedDecisions := make([]plugins.Decision, 0, len(decisionsByVMID)) - for vmID, decisions := range decisionsByVMID { - if len(decisions) == 1 { - combinedDecisions = append(combinedDecisions, decisions[0]) - continue - } - // If the host is not the same in all decisions, we need to skip this vm. - host := decisions[0].Host - sameHost := true - for _, decision := range decisions[1:] { - if decision.Host != host { - sameHost = false - break - } - } - if !sameHost { - slog.Error("descheduler: skipping vm with conflicting origin hosts", "vmId", vmID, "decisions", decisions) - continue - } - var reasonBuilder strings.Builder - reasonBuilder.WriteString("multiple reasons: ") - for i, decision := range decisions { - if i > 0 { - reasonBuilder.WriteString("; ") - } - reasonBuilder.WriteString(decision.Reason) - } - combinedDecisions = append(combinedDecisions, plugins.Decision{ - VMID: vmID, - Reason: reasonBuilder.String(), - Host: host, - }) - } - - slog.Info("descheduler: combined decisions", "combined", combinedDecisions) - return combinedDecisions -} - -func (p *Pipeline) createDeschedulings(ctx context.Context) error { - decisionsByStep := p.run() - if len(decisionsByStep) == 0 { - slog.Info("descheduler: no decisions made in this run") - return nil - } - slog.Info("descheduler: decisions made", "decisionsByStep", decisionsByStep) - decisions := p.combine(decisionsByStep) - var err error - decisions, err = p.CycleDetector.Filter(ctx, decisions) - if err != nil { - slog.Error("descheduler: failed to filter decisions for cycles", "error", err) - return err - } - for _, decision := range decisions { - // Precaution: If a descheduling for the VM already exists, skip it. - // The TTL controller will clean up old deschedulings so the vm - // can be descheduled again later if needed, or we can manually - // delete the descheduling if we want to deschedule the VM again. - var existing v1alpha1.Descheduling - err := p.Get(ctx, client.ObjectKey{Name: decision.VMID}, &existing) - if err == nil { - slog.Info("descheduler: descheduling already exists for VM, skipping", "vmId", decision.VMID) - continue - } - - descheduling := &v1alpha1.Descheduling{} - descheduling.Name = decision.VMID - descheduling.Spec.Ref = decision.VMID - descheduling.Spec.RefType = v1alpha1.DeschedulingSpecVMReferenceNovaServerUUID - descheduling.Spec.PrevHostType = v1alpha1.DeschedulingSpecHostTypeNovaComputeHostName - descheduling.Spec.PrevHost = decision.Host - descheduling.Spec.Reason = decision.Reason - if err := p.Create(ctx, descheduling); err != nil { - return err - } - slog.Info("descheduler: created descheduling", "vmId", decision.VMID, "host", decision.Host, "reason", decision.Reason) - } - return nil -} diff --git a/internal/scheduling/descheduling/nova/pipeline_controller_test.go b/internal/scheduling/descheduling/nova/pipeline_controller_test.go deleted file mode 100644 index 78369e5a0..000000000 --- a/internal/scheduling/descheduling/nova/pipeline_controller_test.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package nova - -import ( - "context" - "testing" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - - "github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova/plugins" - "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" - "github.com/cobaltcore-dev/cortex/pkg/conf" - "k8s.io/apimachinery/pkg/runtime" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -type mockCycleDetector struct{} - -func (m *mockCycleDetector) Init(ctx context.Context, client client.Client, conf conf.Config) error { - return nil -} - -func (m *mockCycleDetector) Filter(ctx context.Context, decisions []plugins.Decision) ([]plugins.Decision, error) { - return decisions, nil -} - -type mockControllerStep struct{} - -func (m *mockControllerStep) Run() ([]plugins.Decision, error) { - return nil, nil -} -func (m *mockControllerStep) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { - return nil -} - -func TestDeschedulingsPipelineController_InitPipeline(t *testing.T) { - tests := []struct { - name string - steps []v1alpha1.StepSpec - expectError bool - expectedError string - }{ - { - name: "successful pipeline initialization", - steps: []v1alpha1.StepSpec{ - { - Type: v1alpha1.StepTypeDescheduler, - Impl: "mock-step", - }, - }, - expectError: false, - }, - { - name: "unsupported step", - steps: []v1alpha1.StepSpec{ - { - - Type: v1alpha1.StepTypeDescheduler, - Impl: "unsupported", - }, - }, - expectError: true, - expectedError: "descheduler: unsupported step: unsupported", - }, - { - name: "empty steps", - steps: []v1alpha1.StepSpec{}, - expectError: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - controller := &DeschedulingsPipelineController{ - Monitor: NewPipelineMonitor(), - CycleDetector: &mockCycleDetector{}, - } - - pipeline := Pipeline{ - CycleDetector: controller.CycleDetector, - Monitor: controller.Monitor, - } - err := pipeline.Init(t.Context(), tt.steps, map[string]Step{ - "mock-step": &mockControllerStep{}, - }) - - if tt.expectError { - if err == nil { - t.Error("expected error but got none") - } - if tt.expectedError != "" && err.Error() != tt.expectedError { - t.Errorf("expected error %q, got %q", tt.expectedError, err.Error()) - } - return - } - - if err != nil { - t.Errorf("unexpected error: %v", err) - return - } - - if pipeline.CycleDetector != controller.CycleDetector { - t.Error("expected pipeline to have cycle detector set") - } - - if pipeline.Monitor != controller.Monitor { - t.Error("expected pipeline to have monitor set") - } - }) - } -} - -func TestDeschedulingsPipelineController_Reconcile(t *testing.T) { - scheme := runtime.NewScheme() - err := v1alpha1.AddToScheme(scheme) - if err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - client := fake.NewClientBuilder().WithScheme(scheme).Build() - - controller := &DeschedulingsPipelineController{ - BasePipelineController: lib.BasePipelineController[*Pipeline]{ - Client: client, - }, - } - - req := ctrl.Request{} - result, err := controller.Reconcile(t.Context(), req) - - if err != nil { - t.Errorf("unexpected error: %v", err) - } - - if result.RequeueAfter > 0 { - t.Error("expected no requeue") - } -} diff --git a/internal/scheduling/descheduling/nova/pipeline_test.go b/internal/scheduling/descheduling/nova/pipeline_test.go deleted file mode 100644 index c5e00b9f8..000000000 --- a/internal/scheduling/descheduling/nova/pipeline_test.go +++ /dev/null @@ -1,370 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package nova - -import ( - "context" - "errors" - "reflect" - "testing" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova/plugins" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -// Mock implementations for testing pipeline functionality - -type mockPipelineStep struct { - decisions []plugins.Decision - runError error - initError error - initialized bool -} - -func (m *mockPipelineStep) Run() ([]plugins.Decision, error) { - if m.runError != nil { - return nil, m.runError - } - return m.decisions, nil -} - -func (m *mockPipelineStep) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { - if m.initError != nil { - return m.initError - } - m.initialized = true - return nil -} - -func TestPipeline_Init(t *testing.T) { - tests := []struct { - name string - supportedSteps map[string]Step - confedSteps []v1alpha1.StepSpec - expectedSteps int - expectedError bool - }{ - { - name: "successful initialization with single step", - supportedSteps: map[string]Step{ - "test-step": &mockPipelineStep{}, - }, - confedSteps: []v1alpha1.StepSpec{{ - Impl: "test-step", - Type: v1alpha1.StepTypeDescheduler, - }}, - expectedSteps: 1, - }, - { - name: "initialization with unsupported step", - supportedSteps: map[string]Step{ - "test-step": &mockPipelineStep{}, - }, - confedSteps: []v1alpha1.StepSpec{{ - Impl: "unsupported-step", - Type: v1alpha1.StepTypeDescheduler, - }}, - expectedError: true, - }, - { - name: "initialization with step init error", - supportedSteps: map[string]Step{ - "failing-step": &mockPipelineStep{initError: errors.New("init failed")}, - }, - confedSteps: []v1alpha1.StepSpec{{ - Impl: "failing-step", - Type: v1alpha1.StepTypeDescheduler, - }}, - expectedError: true, - }, - { - name: "initialization with multiple steps", - supportedSteps: map[string]Step{ - "step1": &mockPipelineStep{}, - "step2": &mockPipelineStep{}, - }, - confedSteps: []v1alpha1.StepSpec{ - { - Impl: "step1", - Type: v1alpha1.StepTypeDescheduler, - }, - { - Impl: "step2", - Type: v1alpha1.StepTypeDescheduler, - }, - }, - expectedSteps: 2, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - pipeline := &Pipeline{} - - err := pipeline.Init(t.Context(), tt.confedSteps, tt.supportedSteps) - if tt.expectedError { - if err == nil { - t.Fatalf("expected error during initialization, got none") - } - return - } - if err != nil { - t.Fatalf("Failed to initialize pipeline: %v", err) - } - - if len(pipeline.steps) != tt.expectedSteps { - t.Errorf("expected %d steps, got %d", tt.expectedSteps, len(pipeline.steps)) - } - - // Verify that successfully initialized steps are actually initialized - for _, step := range pipeline.steps { - if stepMonitor, ok := step.(StepMonitor); ok { - if mockStep, ok := stepMonitor.step.(*mockPipelineStep); ok { - if !mockStep.initialized { - t.Error("step was not properly initialized") - } - } - } - } - }) - } -} - -func TestPipeline_run(t *testing.T) { - tests := []struct { - name string - steps map[string]Step - order []string - expectedResults map[string][]plugins.Decision - }{ - { - name: "successful run with single step", - steps: map[string]Step{ - "test-step": &mockPipelineStep{ - decisions: []plugins.Decision{ - {VMID: "vm1", Reason: "test reason", Host: "host1"}, - }, - }, - }, - order: []string{"test-step"}, - expectedResults: map[string][]plugins.Decision{ - "test-step": { - {VMID: "vm1", Reason: "test reason", Host: "host1"}, - }, - }, - }, - { - name: "run with step error", - steps: map[string]Step{ - "failing-step": &mockPipelineStep{ - runError: errors.New("step failed"), - }, - }, - order: []string{"failing-step"}, - expectedResults: map[string][]plugins.Decision{}, - }, - { - name: "run with step skipped", - steps: map[string]Step{ - "skipped-step": &mockPipelineStep{ - runError: ErrStepSkipped, - }, - }, - order: []string{"skipped-step"}, - expectedResults: map[string][]plugins.Decision{}, - }, - { - name: "run with multiple steps", - steps: map[string]Step{ - "step1": &mockPipelineStep{ - decisions: []plugins.Decision{ - {VMID: "vm1", Reason: "reason1", Host: "host1"}, - }, - }, - "step2": &mockPipelineStep{ - decisions: []plugins.Decision{ - {VMID: "vm2", Reason: "reason2", Host: "host2"}, - }, - }, - }, - order: []string{"step1", "step2"}, - expectedResults: map[string][]plugins.Decision{ - "step1": { - {VMID: "vm1", Reason: "reason1", Host: "host1"}, - }, - "step2": { - {VMID: "vm2", Reason: "reason2", Host: "host2"}, - }, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - pipeline := &Pipeline{ - steps: tt.steps, - order: tt.order, - } - - results := pipeline.run() - - if !reflect.DeepEqual(results, tt.expectedResults) { - t.Errorf("expected results %v, got %v", tt.expectedResults, results) - } - }) - } -} - -func TestPipeline_combine(t *testing.T) { - tests := []struct { - name string - decisionsByStep map[string][]plugins.Decision - expectedDecisions []plugins.Decision - }{ - { - name: "single decision per VM", - decisionsByStep: map[string][]plugins.Decision{ - "step1": { - {VMID: "vm1", Reason: "reason1", Host: "host1"}, - {VMID: "vm2", Reason: "reason2", Host: "host2"}, - }, - }, - expectedDecisions: []plugins.Decision{ - {VMID: "vm1", Reason: "reason1", Host: "host1"}, - {VMID: "vm2", Reason: "reason2", Host: "host2"}, - }, - }, - { - name: "multiple decisions for same VM with same host", - decisionsByStep: map[string][]plugins.Decision{ - "step1": { - {VMID: "vm1", Reason: "reason1", Host: "host1"}, - }, - "step2": { - {VMID: "vm1", Reason: "reason2", Host: "host1"}, - }, - }, - expectedDecisions: []plugins.Decision{ - {VMID: "vm1", Reason: "multiple reasons: reason1; reason2", Host: "host1"}, - }, - }, - { - name: "multiple decisions for same VM with different hosts", - decisionsByStep: map[string][]plugins.Decision{ - "step1": { - {VMID: "vm1", Reason: "reason1", Host: "host1"}, - }, - "step2": { - {VMID: "vm1", Reason: "reason2", Host: "host2"}, - }, - }, - expectedDecisions: []plugins.Decision{}, // Should be skipped due to conflicting hosts - }, - { - name: "mixed scenario", - decisionsByStep: map[string][]plugins.Decision{ - "step1": { - {VMID: "vm1", Reason: "reason1", Host: "host1"}, - {VMID: "vm2", Reason: "reason2", Host: "host2"}, - }, - "step2": { - {VMID: "vm1", Reason: "reason3", Host: "host1"}, - {VMID: "vm3", Reason: "reason4", Host: "host3"}, - }, - }, - expectedDecisions: []plugins.Decision{ - {VMID: "vm1", Reason: "multiple reasons: reason1; reason3", Host: "host1"}, - {VMID: "vm2", Reason: "reason2", Host: "host2"}, - {VMID: "vm3", Reason: "reason4", Host: "host3"}, - }, - }, - { - name: "empty input", - decisionsByStep: map[string][]plugins.Decision{}, - expectedDecisions: []plugins.Decision{}, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - pipeline := &Pipeline{} - results := pipeline.combine(tt.decisionsByStep) - - // Sort results for consistent comparison - if len(results) != len(tt.expectedDecisions) { - t.Errorf("expected %d decisions, got %d", len(tt.expectedDecisions), len(results)) - return - } - - // Create maps for easier comparison (order doesn't matter) - expectedMap := make(map[string]plugins.Decision) - for _, d := range tt.expectedDecisions { - expectedMap[d.VMID] = d - } - - resultMap := make(map[string]plugins.Decision) - for _, d := range results { - resultMap[d.VMID] = d - } - - if !reflect.DeepEqual(expectedMap, resultMap) { - t.Errorf("expected decisions %v, got %v", tt.expectedDecisions, results) - } - }) - } -} - -func TestSupportedSteps(t *testing.T) { - // Test that SupportedSteps is properly initialized - if len(supportedSteps) == 0 { - t.Error("SupportedSteps should not be empty") - } -} - -// Benchmark tests -func BenchmarkPipeline_run(b *testing.B) { - steps := map[string]Step{ - "step1": &mockPipelineStep{ - decisions: []plugins.Decision{ - {VMID: "vm1", Reason: "bench reason", Host: "host1"}, - }, - }, - "step2": &mockPipelineStep{ - decisions: []plugins.Decision{ - {VMID: "vm2", Reason: "bench reason", Host: "host2"}, - }, - }, - } - - pipeline := &Pipeline{ - steps: steps, - order: []string{"step1", "step2"}, - } - - b.ResetTimer() - for range b.N { - pipeline.run() - } -} - -func BenchmarkPipeline_combine(b *testing.B) { - decisionsByStep := map[string][]plugins.Decision{ - "step1": { - {VMID: "vm1", Reason: "reason1", Host: "host1"}, - {VMID: "vm2", Reason: "reason2", Host: "host2"}, - }, - "step2": { - {VMID: "vm1", Reason: "reason3", Host: "host1"}, - {VMID: "vm3", Reason: "reason4", Host: "host3"}, - }, - } - - pipeline := &Pipeline{} - - b.ResetTimer() - for range b.N { - pipeline.combine(decisionsByStep) - } -} diff --git a/internal/scheduling/descheduling/nova/plugins/base.go b/internal/scheduling/descheduling/nova/plugins/base.go deleted file mode 100644 index f312f402b..000000000 --- a/internal/scheduling/descheduling/nova/plugins/base.go +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package plugins - -import ( - "context" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/pkg/conf" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -// Common base for all steps that provides some functionality -// that would otherwise be duplicated across all steps. -type BaseStep[Opts any] struct { - // Options to pass via yaml to this step. - conf.JsonOpts[Opts] - // The kubernetes client to use. - Client client.Client -} - -// Init the step with the database and options. -func (s *BaseStep[Opts]) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { - opts := conf.NewRawOptsBytes(step.Opts.Raw) - if err := s.Load(opts); err != nil { - return err - } - - s.Client = client - return nil -} - -type Decision struct { - // Get the VM ID for which this decision applies. - VMID string - // Get a human-readable reason for this decision. - Reason string - // Get the compute host where the vm should be migrated away from. - Host string -} diff --git a/internal/scheduling/descheduling/nova/plugins/base_test.go b/internal/scheduling/descheduling/nova/plugins/base_test.go deleted file mode 100644 index f646523bb..000000000 --- a/internal/scheduling/descheduling/nova/plugins/base_test.go +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package plugins - -import ( - "testing" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "k8s.io/apimachinery/pkg/runtime" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -type MockOptions struct { - Option1 string `json:"option1"` - Option2 int `json:"option2"` -} - -func (o MockOptions) Validate() error { - return nil -} - -func TestBaseStep_Init(t *testing.T) { - step := BaseStep[MockOptions]{} - cl := fake.NewClientBuilder().Build() - err := step.Init(t.Context(), cl, v1alpha1.StepSpec{ - Opts: runtime.RawExtension{Raw: []byte(`{ - "option1": "value1", - "option2": 2 - }`)}, - }) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - - if step.Options.Option1 != "value1" { - t.Errorf("expected Option1 to be 'value1', got %s", step.Options.Option1) - } - - if step.Options.Option2 != 2 { - t.Errorf("expected Option2 to be 2, got %d", step.Options.Option2) - } -} diff --git a/internal/scheduling/descheduling/nova/step.go b/internal/scheduling/descheduling/nova/step.go deleted file mode 100644 index 7c53bc991..000000000 --- a/internal/scheduling/descheduling/nova/step.go +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package nova - -import ( - "context" - "errors" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova/plugins" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -var ( - // This error is returned from the step at any time when the step should be skipped. - ErrStepSkipped = errors.New("step skipped") -) - -type Step interface { - // Get the VMs on their current hosts that should be considered for descheduling. - Run() ([]plugins.Decision, error) - // Configure the step with a database and options. - Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error -} diff --git a/internal/scheduling/descheduling/nova/step_test.go b/internal/scheduling/descheduling/nova/step_test.go deleted file mode 100644 index 2c60931c4..000000000 --- a/internal/scheduling/descheduling/nova/step_test.go +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package nova - -import ( - "testing" - - "github.com/cobaltcore-dev/cortex/pkg/conf" - "github.com/cobaltcore-dev/cortex/pkg/db" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -type MockOptions struct { - Option1 string `json:"option1"` - Option2 int `json:"option2"` -} - -func (o MockOptions) Validate() error { - return nil -} - -type BaseStep struct { - Options MockOptions - DB *db.DB - Client client.Client -} - -func (s *BaseStep) Init(db *db.DB, client client.Client, opts conf.RawOpts) error { - s.DB = db - s.Client = client - // Use the actual unmarshal logic from conf.RawOpts - if err := opts.Unmarshal(&s.Options); err != nil { - return err - } - return s.Options.Validate() -} - -func TestBaseStep_Init(t *testing.T) { - opts := conf.NewRawOpts(`{ - "option1": "value1", - "option2": 2 - }`) - - step := &BaseStep{} - err := step.Init(nil, nil, opts) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - - if step.Options.Option1 != "value1" { - t.Errorf("expected Option1 to be 'value1', got %s", step.Options.Option1) - } - - if step.Options.Option2 != 2 { - t.Errorf("expected Option2 to be 2, got %d", step.Options.Option2) - } -} diff --git a/internal/scheduling/descheduling/nova/supported_steps.go b/internal/scheduling/descheduling/nova/supported_steps.go deleted file mode 100644 index fc9fb0d29..000000000 --- a/internal/scheduling/descheduling/nova/supported_steps.go +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package nova - -import "github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova/plugins/kvm" - -// Configuration of steps supported by the descheduler. -// The steps actually used by the scheduler are defined through the configuration file. -var supportedSteps = map[string]Step{ - "avoid_high_steal_pct": &kvm.AvoidHighStealPctStep{}, -} diff --git a/internal/scheduling/decisions/explanation/controller.go b/internal/scheduling/explanation/controller.go similarity index 100% rename from internal/scheduling/decisions/explanation/controller.go rename to internal/scheduling/explanation/controller.go diff --git a/internal/scheduling/decisions/explanation/controller_test.go b/internal/scheduling/explanation/controller_test.go similarity index 100% rename from internal/scheduling/decisions/explanation/controller_test.go rename to internal/scheduling/explanation/controller_test.go diff --git a/internal/scheduling/decisions/explanation/explainer.go b/internal/scheduling/explanation/explainer.go similarity index 100% rename from internal/scheduling/decisions/explanation/explainer.go rename to internal/scheduling/explanation/explainer.go diff --git a/internal/scheduling/decisions/explanation/explainer_test.go b/internal/scheduling/explanation/explainer_test.go similarity index 100% rename from internal/scheduling/decisions/explanation/explainer_test.go rename to internal/scheduling/explanation/explainer_test.go diff --git a/internal/scheduling/decisions/explanation/templates.go b/internal/scheduling/explanation/templates.go similarity index 100% rename from internal/scheduling/decisions/explanation/templates.go rename to internal/scheduling/explanation/templates.go diff --git a/internal/scheduling/decisions/explanation/types.go b/internal/scheduling/explanation/types.go similarity index 100% rename from internal/scheduling/decisions/explanation/types.go rename to internal/scheduling/explanation/types.go diff --git a/internal/scheduling/lib/activation.go b/internal/scheduling/lib/activation.go index b74f965c2..78704e7e5 100644 --- a/internal/scheduling/lib/activation.go +++ b/internal/scheduling/lib/activation.go @@ -18,14 +18,14 @@ func (m *ActivationFunction) Norm(activation float64) float64 { // Apply the activation function to the weights map. // All hosts that are not in the activations map are removed. -func (m *ActivationFunction) Apply(in, activations map[string]float64) map[string]float64 { +func (m *ActivationFunction) Apply(in, activations map[string]float64, multiplier float64) map[string]float64 { for host, prevWeight := range in { // Remove hosts that are not in the weights map. if _, ok := activations[host]; !ok { delete(in, host) } else { // Apply the activation from the step. - (in)[host] = prevWeight + math.Tanh(activations[host]) + (in)[host] = prevWeight + multiplier*math.Tanh(activations[host]) } } return in diff --git a/internal/scheduling/lib/activation_test.go b/internal/scheduling/lib/activation_test.go index 018890aad..408f1c12f 100644 --- a/internal/scheduling/lib/activation_test.go +++ b/internal/scheduling/lib/activation_test.go @@ -67,7 +67,7 @@ func TestActivationFunction_Apply(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result := af.Apply(tt.in, tt.activations) + result := af.Apply(tt.in, tt.activations, 1.0) if len(result) != len(tt.expected) { t.Fatalf("expected %d hosts, got %d", len(tt.expected), len(result)) } diff --git a/internal/scheduling/lib/detector.go b/internal/scheduling/lib/detector.go new file mode 100644 index 000000000..752c637cd --- /dev/null +++ b/internal/scheduling/lib/detector.go @@ -0,0 +1,79 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "errors" + "fmt" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/pkg/conf" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type Detection interface { + // Get the ID of the detected resource. + GetResource() string + // Get the host on which this resource is currently located. + GetHost() string + // Get the reason for the detection. + GetReason() string + // Set the reason for the detection. + WithReason(reason string) Detection +} + +type Detector[DetectionType Detection] interface { + // Detect resources such as VMs on their current hosts that should be + // considered for descheduling. + Run() ([]DetectionType, error) + // Configure the step with a database and options. + Init(ctx context.Context, client client.Client, step v1alpha1.DetectorSpec) error +} + +// Common base for all descheduler steps that provides some functionality +// that would otherwise be duplicated across all steps. +type BaseDetector[Opts any] struct { + // Options to pass via yaml to this step. + conf.JsonOpts[Opts] + // The kubernetes client to use. + Client client.Client +} + +// Init the step with the database and options. +func (d *BaseDetector[Opts]) Init(ctx context.Context, client client.Client, step v1alpha1.DetectorSpec) error { + d.Client = client + + opts := conf.NewRawOptsBytes(step.Params.Raw) + if err := d.Load(opts); err != nil { + return err + } + return nil +} + +// Check if all knowledges are ready, and if not, return an error indicating why not. +func (d *BaseDetector[Opts]) CheckKnowledges(ctx context.Context, kns ...corev1.ObjectReference) error { + if d.Client == nil { + return errors.New("kubernetes client not initialized") + } + for _, objRef := range kns { + knowledge := &v1alpha1.Knowledge{} + if err := d.Client.Get(ctx, client.ObjectKey{ + Name: objRef.Name, + Namespace: objRef.Namespace, + }, knowledge); err != nil { + return fmt.Errorf("failed to get knowledge %s: %w", objRef.Name, err) + } + // Check if the knowledge status conditions indicate an error. + if meta.IsStatusConditionFalse(knowledge.Status.Conditions, v1alpha1.KnowledgeConditionReady) { + return fmt.Errorf("knowledge %s not ready", objRef.Name) + } + if knowledge.Status.RawLength == 0 { + return fmt.Errorf("knowledge %s not ready, no data available", objRef.Name) + } + } + return nil +} diff --git a/internal/scheduling/lib/detector_cycle_breaker.go b/internal/scheduling/lib/detector_cycle_breaker.go new file mode 100644 index 000000000..1a9d01509 --- /dev/null +++ b/internal/scheduling/lib/detector_cycle_breaker.go @@ -0,0 +1,18 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + + "github.com/cobaltcore-dev/cortex/pkg/conf" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type DetectorCycleBreaker[DetectionType Detection] interface { + // Initialize the cycle detector with needed clients. + Init(ctx context.Context, client client.Client, conf conf.Config) error + // Filter descheduling decisions to avoid cycles. + Filter(ctx context.Context, decisions []DetectionType) ([]DetectionType, error) +} diff --git a/internal/scheduling/descheduling/nova/monitor.go b/internal/scheduling/lib/detector_monitor.go similarity index 56% rename from internal/scheduling/descheduling/nova/monitor.go rename to internal/scheduling/lib/detector_monitor.go index 239c2f921..cb307d410 100644 --- a/internal/scheduling/descheduling/nova/monitor.go +++ b/internal/scheduling/lib/detector_monitor.go @@ -1,79 +1,69 @@ // Copyright SAP SE // SPDX-License-Identifier: Apache-2.0 -package nova +package lib import ( "context" "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova/plugins" "github.com/prometheus/client_golang/prometheus" "sigs.k8s.io/controller-runtime/pkg/client" ) -type Monitor struct { +type DetectorPipelineMonitor struct { // A histogram to measure how long each step takes to run. stepRunTimer *prometheus.HistogramVec // A counter to measure how many vm ids are selected for descheduling by each step. stepDeschedulingCounter *prometheus.GaugeVec // A histogram to measure how long the pipeline takes to run in total. pipelineRunTimer prometheus.Histogram - // A histogram to measure how long it takes to deschedule a VM. - deschedulingRunTimer *prometheus.HistogramVec // The name of the pipeline being monitored. PipelineName string } -func NewPipelineMonitor() Monitor { - return Monitor{ +func NewDetectorPipelineMonitor() DetectorPipelineMonitor { + return DetectorPipelineMonitor{ stepRunTimer: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "cortex_descheduler_pipeline_step_run_duration_seconds", - Help: "Duration of descheduler pipeline step run", + Name: "cortex_detector_pipeline_step_run_duration_seconds", + Help: "Duration of detector pipeline step run", Buckets: prometheus.ExponentialBuckets(0.001, 2, 21), // 0.001s to ~1048s in 21 buckets }, []string{"step"}), stepDeschedulingCounter: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "cortex_descheduler_pipeline_step_vms_descheduled", - Help: "Number of vms descheduled by a descheduler pipeline step", + Name: "cortex_detector_pipeline_step_detections", + Help: "Number of resources detected by a detector pipeline step", }, []string{"step"}), pipelineRunTimer: prometheus.NewHistogram(prometheus.HistogramOpts{ - Name: "cortex_descheduler_pipeline_run_duration_seconds", + Name: "cortex_detector_pipeline_run_duration_seconds", Help: "Duration of descheduler pipeline run", Buckets: prometheus.DefBuckets, }), - deschedulingRunTimer: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "cortex_descheduler_pipeline_vm_descheduling_duration_seconds", - Help: "Duration of descheduling a VM in the descheduler pipeline", - Buckets: prometheus.ExponentialBuckets(0.001, 2, 21), // 0.001s to ~1048s in 21 buckets - }, []string{"error", "skipped", "source_host", "target_host", "vm_id"}), } } // Get a copied pipeline monitor with the name set, after binding the metrics. -func (m Monitor) SubPipeline(name string) Monitor { +func (m DetectorPipelineMonitor) SubPipeline(name string) DetectorPipelineMonitor { cp := m cp.PipelineName = name return cp } -func (m *Monitor) Describe(ch chan<- *prometheus.Desc) { +func (m *DetectorPipelineMonitor) Describe(ch chan<- *prometheus.Desc) { m.stepRunTimer.Describe(ch) m.stepDeschedulingCounter.Describe(ch) m.pipelineRunTimer.Describe(ch) - m.deschedulingRunTimer.Describe(ch) } -func (m *Monitor) Collect(ch chan<- prometheus.Metric) { +func (m *DetectorPipelineMonitor) Collect(ch chan<- prometheus.Metric) { m.stepRunTimer.Collect(ch) m.stepDeschedulingCounter.Collect(ch) m.pipelineRunTimer.Collect(ch) - m.deschedulingRunTimer.Collect(ch) } -type StepMonitor struct { +type DetectorMonitor[DetectionType Detection] struct { // The step being monitored. - step Step + step Detector[DetectionType] // The name of this step. stepName string // A timer to measure how long the step takes to run. @@ -82,42 +72,49 @@ type StepMonitor struct { descheduledCounter prometheus.Counter } -// Monitor a step by wrapping it with a StepMonitor. -func monitorStep(step Step, conf v1alpha1.StepSpec, monitor Monitor) StepMonitor { - name := conf.Impl +// Monitor a descheduler step by wrapping it with a DetectorMonitor. +func monitorDetector[DetectionType Detection]( + step Detector[DetectionType], + conf v1alpha1.DetectorSpec, + monitor DetectorPipelineMonitor, +) DetectorMonitor[DetectionType] { + var runTimer prometheus.Observer if monitor.stepRunTimer != nil { - runTimer = monitor.stepRunTimer.WithLabelValues(name) + runTimer = monitor.stepRunTimer.WithLabelValues(conf.Name) } var descheduledCounter prometheus.Counter if monitor.stepDeschedulingCounter != nil { - descheduledCounter = monitor.stepDeschedulingCounter.WithLabelValues(name) + descheduledCounter = monitor.stepDeschedulingCounter.WithLabelValues(conf.Name) } - return StepMonitor{ + return DetectorMonitor[DetectionType]{ step: step, - stepName: name, + stepName: conf.Name, runTimer: runTimer, descheduledCounter: descheduledCounter, } } // Initialize the step with the database and options. -func (m StepMonitor) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { +func (m DetectorMonitor[DetectionType]) Init( + ctx context.Context, client client.Client, step v1alpha1.DetectorSpec, +) error { + return m.step.Init(ctx, client, step) } // Run the step and measure its execution time. -func (m StepMonitor) Run() ([]plugins.Decision, error) { +func (m DetectorMonitor[DetectionType]) Run() ([]DetectionType, error) { if m.runTimer != nil { timer := prometheus.NewTimer(m.runTimer) defer timer.ObserveDuration() } - vmsToDeschedule, err := m.step.Run() + detections, err := m.step.Run() if err != nil { return nil, err } if m.descheduledCounter != nil { - m.descheduledCounter.Add(float64(len(vmsToDeschedule))) + m.descheduledCounter.Add(float64(len(detections))) } - return vmsToDeschedule, nil + return detections, nil } diff --git a/internal/scheduling/descheduling/nova/monitor_test.go b/internal/scheduling/lib/detector_monitor_test.go similarity index 56% rename from internal/scheduling/descheduling/nova/monitor_test.go rename to internal/scheduling/lib/detector_monitor_test.go index 7c665af06..008d9411f 100644 --- a/internal/scheduling/descheduling/nova/monitor_test.go +++ b/internal/scheduling/lib/detector_monitor_test.go @@ -1,7 +1,7 @@ // Copyright SAP SE // SPDX-License-Identifier: Apache-2.0 -package nova +package lib import ( "context" @@ -9,15 +9,14 @@ import ( "testing" "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova/plugins" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) -func TestNewPipelineMonitor(t *testing.T) { - monitor := NewPipelineMonitor() +func TestNewDetectorPipelineMonitor(t *testing.T) { + monitor := NewDetectorPipelineMonitor() if monitor.stepRunTimer == nil { t.Error("expected stepRunTimer to be initialized") @@ -28,13 +27,10 @@ func TestNewPipelineMonitor(t *testing.T) { if monitor.pipelineRunTimer == nil { t.Error("expected pipelineRunTimer to be initialized") } - if monitor.deschedulingRunTimer == nil { - t.Error("expected deschedulingRunTimer to be initialized") - } } func TestMonitor_Describe(t *testing.T) { - monitor := NewPipelineMonitor() + monitor := NewDetectorPipelineMonitor() descs := make(chan *prometheus.Desc, 10) go func() { @@ -53,7 +49,7 @@ func TestMonitor_Describe(t *testing.T) { } func TestMonitor_Collect(t *testing.T) { - monitor := NewPipelineMonitor() + monitor := NewDetectorPipelineMonitor() metrics := make(chan prometheus.Metric, 10) go func() { @@ -73,33 +69,33 @@ func TestMonitor_Collect(t *testing.T) { } type mockMonitorStep struct { - decisions []plugins.Decision + decisions []mockDetection initError error runError error initCalled bool runCalled bool } -func (m *mockMonitorStep) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { +func (m *mockMonitorStep) Init(ctx context.Context, client client.Client, step v1alpha1.DetectorSpec) error { m.initCalled = true return m.initError } -func (m *mockMonitorStep) Run() ([]plugins.Decision, error) { +func (m *mockMonitorStep) Run() ([]mockDetection, error) { m.runCalled = true return m.decisions, m.runError } func TestMonitorStep(t *testing.T) { - monitor := NewPipelineMonitor() + monitor := NewDetectorPipelineMonitor() step := &mockMonitorStep{ - decisions: []plugins.Decision{ - {VMID: "vm1", Reason: "test"}, + decisions: []mockDetection{ + {resource: "vm1", reason: "test"}, }, } - conf := v1alpha1.StepSpec{Impl: "test-step"} + conf := v1alpha1.DetectorSpec{Name: "test-step"} - monitoredStep := monitorStep(step, conf, monitor) + monitoredStep := monitorDetector(step, conf, monitor) if monitoredStep.step != step { t.Error("expected wrapped step to be preserved") @@ -115,11 +111,11 @@ func TestMonitorStep(t *testing.T) { } func TestStepMonitor_Init(t *testing.T) { - monitor := NewPipelineMonitor() + monitor := NewDetectorPipelineMonitor() step := &mockMonitorStep{} - conf := v1alpha1.StepSpec{Impl: "test-step"} + conf := v1alpha1.DetectorSpec{Name: "test-step"} - monitoredStep := monitorStep(step, conf, monitor) + monitoredStep := monitorDetector(step, conf, monitor) client := fake.NewClientBuilder().Build() err := monitoredStep.Init(context.Background(), client, conf) @@ -134,13 +130,13 @@ func TestStepMonitor_Init(t *testing.T) { } func TestStepMonitor_Init_WithError(t *testing.T) { - monitor := NewPipelineMonitor() + monitor := NewDetectorPipelineMonitor() expectedErr := errors.New("init failed") step := &mockMonitorStep{ initError: expectedErr, } - conf := v1alpha1.StepSpec{Impl: "test-step"} - monitoredStep := monitorStep(step, conf, monitor) + conf := v1alpha1.DetectorSpec{Name: "test-step"} + monitoredStep := monitorDetector(step, conf, monitor) client := fake.NewClientBuilder().Build() err := monitoredStep.Init(context.Background(), client, conf) @@ -151,16 +147,16 @@ func TestStepMonitor_Init_WithError(t *testing.T) { } func TestStepMonitor_Run(t *testing.T) { - monitor := NewPipelineMonitor() - decisions := []plugins.Decision{ - {VMID: "vm1", Reason: "test1"}, - {VMID: "vm2", Reason: "test2"}, + monitor := NewDetectorPipelineMonitor() + decisions := []mockDetection{ + {resource: "vm1", reason: "test1"}, + {resource: "vm2", reason: "test2"}, } step := &mockMonitorStep{ decisions: decisions, } - conf := v1alpha1.StepSpec{Impl: "test-step"} - monitoredStep := monitorStep(step, conf, monitor) + conf := v1alpha1.DetectorSpec{Name: "test-step"} + monitoredStep := monitorDetector(step, conf, monitor) result, err := monitoredStep.Run() @@ -184,13 +180,13 @@ func TestStepMonitor_Run(t *testing.T) { } func TestStepMonitor_Run_WithError(t *testing.T) { - monitor := NewPipelineMonitor() + monitor := NewDetectorPipelineMonitor() expectedErr := errors.New("run failed") step := &mockMonitorStep{ runError: expectedErr, } - conf := v1alpha1.StepSpec{Impl: "test-step"} - monitoredStep := monitorStep(step, conf, monitor) + conf := v1alpha1.DetectorSpec{Name: "test-step"} + monitoredStep := monitorDetector(step, conf, monitor) result, err := monitoredStep.Run() @@ -210,12 +206,12 @@ func TestStepMonitor_Run_WithError(t *testing.T) { } func TestStepMonitor_Run_EmptyResult(t *testing.T) { - monitor := NewPipelineMonitor() + monitor := NewDetectorPipelineMonitor() step := &mockMonitorStep{ - decisions: []plugins.Decision{}, // Empty slice + decisions: []mockDetection{}, // Empty slice } - conf := v1alpha1.StepSpec{Impl: "test-step"} - monitoredStep := monitorStep(step, conf, monitor) + conf := v1alpha1.DetectorSpec{Name: "test-step"} + monitoredStep := monitorDetector(step, conf, monitor) result, err := monitoredStep.Run() @@ -236,14 +232,14 @@ func TestStepMonitor_Run_EmptyResult(t *testing.T) { func TestMonitorStep_WithNilMonitor(t *testing.T) { // Test with empty monitor (nil fields) - monitor := Monitor{} + monitor := DetectorPipelineMonitor{} step := &mockMonitorStep{ - decisions: []plugins.Decision{ - {VMID: "vm1", Reason: "test"}, + decisions: []mockDetection{ + {resource: "vm1", reason: "test"}, }, } - conf := v1alpha1.StepSpec{Impl: "test-step"} - monitoredStep := monitorStep(step, conf, monitor) + conf := v1alpha1.DetectorSpec{Name: "test-step"} + monitoredStep := monitorDetector(step, conf, monitor) // Should not panic with nil timers/counters result, err := monitoredStep.Run() @@ -260,3 +256,65 @@ func TestMonitorStep_WithNilMonitor(t *testing.T) { t.Error("expected Run to be called on wrapped step") } } + +func TestDetectorPipelineMonitor_SubPipeline(t *testing.T) { + tests := []struct { + name string + originalName string + newPipelineName string + expectedOriginal string + expectedNew string + }{ + { + name: "creates copy with new name", + originalName: "original-pipeline", + newPipelineName: "new-pipeline", + expectedOriginal: "original-pipeline", + expectedNew: "new-pipeline", + }, + { + name: "works with empty original name", + originalName: "", + newPipelineName: "new-pipeline", + expectedOriginal: "", + expectedNew: "new-pipeline", + }, + { + name: "works with empty new name", + originalName: "original-pipeline", + newPipelineName: "", + expectedOriginal: "original-pipeline", + expectedNew: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + original := NewDetectorPipelineMonitor() + original.PipelineName = tt.originalName + + copied := original.SubPipeline(tt.newPipelineName) + + // Check that original is unchanged + if original.PipelineName != tt.expectedOriginal { + t.Errorf("original pipeline name changed, expected %s, got %s", tt.expectedOriginal, original.PipelineName) + } + + // Check that copy has new name + if copied.PipelineName != tt.expectedNew { + t.Errorf("copied pipeline name incorrect, expected %s, got %s", tt.expectedNew, copied.PipelineName) + } + + // Verify that the metrics are shared (same pointers) + if copied.stepRunTimer != original.stepRunTimer { + t.Error("expected stepRunTimer to be shared between original and copy") + } + if copied.stepDeschedulingCounter != original.stepDeschedulingCounter { + t.Error("expected stepDeschedulingCounter to be shared between original and copy") + } + if copied.pipelineRunTimer != original.pipelineRunTimer { + t.Error("expected pipelineRunTimer to be shared between original and copy") + } + }) + } +} diff --git a/internal/scheduling/lib/detector_pipeline.go b/internal/scheduling/lib/detector_pipeline.go new file mode 100644 index 000000000..a029776fc --- /dev/null +++ b/internal/scheduling/lib/detector_pipeline.go @@ -0,0 +1,149 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "errors" + "log/slog" + "slices" + "strings" + "sync" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/prometheus/client_golang/prometheus" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type DetectorPipeline[DetectionType Detection] struct { + // Kubernetes client to create descheduling resources. + client.Client + // Cycle detector to avoid cycles in descheduling. + DetectorCycleBreaker DetectorCycleBreaker[DetectionType] + // Monitor to use for tracking the pipeline. + Monitor DetectorPipelineMonitor + + // The order in which scheduler steps are applied, by their step name. + order []string + // The steps by their name. + steps map[string]Detector[DetectionType] +} + +func (p *DetectorPipeline[DetectionType]) Init( + ctx context.Context, + confedSteps []v1alpha1.DetectorSpec, + supportedSteps map[string]Detector[DetectionType], +) (detectorErrs map[string]error) { + + p.order = []string{} + // Load all steps from the configuration. + p.steps = make(map[string]Detector[DetectionType], len(confedSteps)) + detectorErrs = make(map[string]error) + for _, stepConf := range confedSteps { + step, ok := supportedSteps[stepConf.Name] + if !ok { + detectorErrs[stepConf.Name] = errors.New("descheduler: unsupported step name: " + stepConf.Name) + continue + } + step = monitorDetector(step, stepConf, p.Monitor) + if err := step.Init(ctx, p.Client, stepConf); err != nil { + detectorErrs[stepConf.Name] = errors.New("descheduler: failed to initialize step " + stepConf.Name + ": " + err.Error()) + continue + } + p.steps[stepConf.Name] = step + p.order = append(p.order, stepConf.Name) + slog.Info("descheduler: added step", "name", stepConf.Name) + } + return detectorErrs +} + +// Execute the descheduler steps in parallel and collect the decisions made by +// each step. +func (p *DetectorPipeline[DetectionType]) Run() map[string][]DetectionType { + if p.Monitor.pipelineRunTimer != nil { + timer := prometheus.NewTimer(p.Monitor.pipelineRunTimer) + defer timer.ObserveDuration() + } + var lock sync.Mutex + decisionsByStep := map[string][]DetectionType{} + var wg sync.WaitGroup + for stepName, step := range p.steps { + wg.Go(func() { + slog.Info("descheduler: running step") + decisions, err := step.Run() + if errors.Is(err, ErrStepSkipped) { + slog.Info("descheduler: step skipped") + return + } + if err != nil { + slog.Error("descheduler: failed to run step", "error", err) + return + } + slog.Info("descheduler: finished step") + lock.Lock() + defer lock.Unlock() + decisionsByStep[stepName] = decisions + }) + } + wg.Wait() + return decisionsByStep +} + +// Combine the decisions made by each step into a single list of resources to deschedule. +func (p *DetectorPipeline[DetectionType]) Combine(decisionsByStep map[string][]DetectionType) []DetectionType { + // Order the step names to have a consistent order of processing. + stepNames := make([]string, 0, len(decisionsByStep)) + for stepName := range decisionsByStep { + stepNames = append(stepNames, stepName) + } + slices.Sort(stepNames) + // If there are more than one decision for the same resource, we need to combine them. + decisionsByResource := make(map[string][]DetectionType) + for _, stepName := range stepNames { + decisions := decisionsByStep[stepName] + for _, decision := range decisions { + decisionsByResource[decision.GetResource()] = append( + decisionsByResource[decision.GetResource()], decision, + ) + } + } + + combinedDecisions := make([]DetectionType, 0, len(decisionsByResource)) + for resource, decisions := range decisionsByResource { + if len(decisions) == 0 { + continue + } + if len(decisions) == 1 { + combinedDecisions = append(combinedDecisions, decisions[0]) + continue + } + // All hosts should be the same for the same resource. + host := decisions[0].GetHost() + sameHost := true + for _, decision := range decisions[1:] { + if decision.GetHost() != host { + sameHost = false + break + } + } + if !sameHost { + slog.Error("descheduler: conflicting hosts for combined decisions", "resource", resource, "decisions", decisions) + continue + } + var reasonBuilder strings.Builder + reasonBuilder.WriteString("multiple reasons: ") + for i, decision := range decisions { + if i > 0 { + reasonBuilder.WriteString("; ") + } + reasonBuilder.WriteString(decision.GetReason()) + } + mergedDecision := decisions[0] + mergedDecision = mergedDecision.WithReason(reasonBuilder.String()).(DetectionType) + combinedDecisions = append(combinedDecisions, mergedDecision) + } + + slog.Info("descheduler: combined decisions", "combined", combinedDecisions) + return combinedDecisions +} diff --git a/internal/scheduling/lib/detector_pipeline_test.go b/internal/scheduling/lib/detector_pipeline_test.go new file mode 100644 index 000000000..4d1857f1f --- /dev/null +++ b/internal/scheduling/lib/detector_pipeline_test.go @@ -0,0 +1,361 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "errors" + "strings" + "testing" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +// mockDetectorStep implements Detector[mockDetection] +type mockDetectorStep struct { + decisions []mockDetection + initErr error + runErr error +} + +func (m *mockDetectorStep) Init(ctx context.Context, client client.Client, step v1alpha1.DetectorSpec) error { + return m.initErr +} + +func (m *mockDetectorStep) Run() ([]mockDetection, error) { + return m.decisions, m.runErr +} + +func TestDetectorPipeline_Init(t *testing.T) { + tests := []struct { + name string + confedSteps []v1alpha1.DetectorSpec + supportedSteps map[string]Detector[mockDetection] + expectNonCritical bool + expectedStepsCount int + }{ + { + name: "successful init with one step", + confedSteps: []v1alpha1.DetectorSpec{ + {Name: "step1", Params: runtime.RawExtension{Raw: []byte("{}")}}, + }, + supportedSteps: map[string]Detector[mockDetection]{ + "step1": &mockDetectorStep{}, + }, + expectNonCritical: false, + expectedStepsCount: 1, + }, + { + name: "successful init with multiple steps", + confedSteps: []v1alpha1.DetectorSpec{ + {Name: "step1", Params: runtime.RawExtension{Raw: []byte("{}")}}, + {Name: "step2", Params: runtime.RawExtension{Raw: []byte("{}")}}, + }, + supportedSteps: map[string]Detector[mockDetection]{ + "step1": &mockDetectorStep{}, + "step2": &mockDetectorStep{}, + }, + expectNonCritical: false, + expectedStepsCount: 2, + }, + { + name: "unsupported step returns non-critical error", + confedSteps: []v1alpha1.DetectorSpec{ + {Name: "unsupported_step", Params: runtime.RawExtension{Raw: []byte("{}")}}, + }, + supportedSteps: map[string]Detector[mockDetection]{}, + expectNonCritical: true, + expectedStepsCount: 0, + }, + { + name: "step init error returns non-critical error", + confedSteps: []v1alpha1.DetectorSpec{ + {Name: "failing_step", Params: runtime.RawExtension{Raw: []byte("{}")}}, + }, + supportedSteps: map[string]Detector[mockDetection]{ + "failing_step": &mockDetectorStep{initErr: errors.New("init failed")}, + }, + expectNonCritical: true, + expectedStepsCount: 0, + }, + { + name: "empty configuration", + confedSteps: []v1alpha1.DetectorSpec{}, + supportedSteps: map[string]Detector[mockDetection]{}, + expectNonCritical: false, + expectedStepsCount: 0, + }, + { + name: "mixed valid and invalid steps", + confedSteps: []v1alpha1.DetectorSpec{ + {Name: "valid_step", Params: runtime.RawExtension{Raw: []byte("{}")}}, + {Name: "invalid_step", Params: runtime.RawExtension{Raw: []byte("{}")}}, + }, + supportedSteps: map[string]Detector[mockDetection]{ + "valid_step": &mockDetectorStep{}, + }, + expectNonCritical: true, + expectedStepsCount: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cl := fake.NewClientBuilder().Build() + pipeline := &DetectorPipeline[mockDetection]{ + Client: cl, + Monitor: DetectorPipelineMonitor{}, + } + + errs := pipeline.Init( + context.Background(), + tt.confedSteps, + tt.supportedSteps, + ) + + if tt.expectNonCritical && len(errs) == 0 { + t.Errorf("expected non-critical errors, got none") + } + if !tt.expectNonCritical && len(errs) > 0 { + t.Errorf("did not expect non-critical errors, got: %v", errs) + } + if len(pipeline.steps) != tt.expectedStepsCount { + t.Errorf("expected %d steps, got %d", tt.expectedStepsCount, len(pipeline.steps)) + } + }) + } +} + +func TestDetectorPipeline_Run(t *testing.T) { + tests := []struct { + name string + steps map[string]Detector[mockDetection] + expectedCount int + expectedSteps []string + stepWithErrors bool + }{ + { + name: "run single step successfully", + steps: map[string]Detector[mockDetection]{ + "step1": &mockDetectorStep{ + decisions: []mockDetection{ + {resource: "vm1", host: "host1", reason: "reason1"}, + }, + }, + }, + expectedCount: 1, + expectedSteps: []string{"step1"}, + }, + { + name: "run multiple steps successfully", + steps: map[string]Detector[mockDetection]{ + "step1": &mockDetectorStep{ + decisions: []mockDetection{ + {resource: "vm1", host: "host1", reason: "reason1"}, + }, + }, + "step2": &mockDetectorStep{ + decisions: []mockDetection{ + {resource: "vm2", host: "host2", reason: "reason2"}, + }, + }, + }, + expectedCount: 2, + expectedSteps: []string{"step1", "step2"}, + }, + { + name: "step with error is skipped", + steps: map[string]Detector[mockDetection]{ + "failing_step": &mockDetectorStep{ + runErr: errors.New("run failed"), + }, + "working_step": &mockDetectorStep{ + decisions: []mockDetection{ + {resource: "vm1", host: "host1", reason: "reason1"}, + }, + }, + }, + expectedCount: 1, + expectedSteps: []string{"working_step"}, + stepWithErrors: true, + }, + { + name: "step returning ErrStepSkipped is skipped", + steps: map[string]Detector[mockDetection]{ + "skipped_step": &mockDetectorStep{ + runErr: ErrStepSkipped, + }, + "working_step": &mockDetectorStep{ + decisions: []mockDetection{ + {resource: "vm1", host: "host1", reason: "reason1"}, + }, + }, + }, + expectedCount: 1, + expectedSteps: []string{"working_step"}, + }, + { + name: "empty pipeline", + steps: map[string]Detector[mockDetection]{}, + expectedCount: 0, + expectedSteps: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pipeline := &DetectorPipeline[mockDetection]{ + steps: tt.steps, + Monitor: DetectorPipelineMonitor{}, + } + + result := pipeline.Run() + + if len(result) != tt.expectedCount { + t.Errorf("expected %d step results, got %d", tt.expectedCount, len(result)) + } + + for _, stepName := range tt.expectedSteps { + if _, ok := result[stepName]; !ok { + t.Errorf("expected step %s in result", stepName) + } + } + }) + } +} + +func TestDetectorPipeline_Combine(t *testing.T) { + tests := []struct { + name string + decisionsByStep map[string][]mockDetection + expectedCount int + expectConflict bool + }{ + { + name: "combine single decision", + decisionsByStep: map[string][]mockDetection{ + "step1": { + {resource: "vm1", host: "host1", reason: "reason1"}, + }, + }, + expectedCount: 1, + }, + { + name: "combine multiple decisions from different steps", + decisionsByStep: map[string][]mockDetection{ + "step1": { + {resource: "vm1", host: "host1", reason: "reason1"}, + }, + "step2": { + {resource: "vm2", host: "host2", reason: "reason2"}, + }, + }, + expectedCount: 2, + }, + { + name: "combine decisions for same resource with same host", + decisionsByStep: map[string][]mockDetection{ + "step1": { + {resource: "vm1", host: "host1", reason: "reason1"}, + }, + "step2": { + {resource: "vm1", host: "host1", reason: "reason2"}, + }, + }, + expectedCount: 1, + }, + { + name: "conflicting hosts for same resource", + decisionsByStep: map[string][]mockDetection{ + "step1": { + {resource: "vm1", host: "host1", reason: "reason1"}, + }, + "step2": { + {resource: "vm1", host: "host2", reason: "reason2"}, + }, + }, + expectedCount: 0, + expectConflict: true, + }, + { + name: "empty decisions", + decisionsByStep: map[string][]mockDetection{}, + expectedCount: 0, + }, + { + name: "step with empty decisions", + decisionsByStep: map[string][]mockDetection{ + "step1": {}, + }, + expectedCount: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pipeline := &DetectorPipeline[mockDetection]{} + + result := pipeline.Combine(tt.decisionsByStep) + + if len(result) != tt.expectedCount { + t.Errorf("expected %d combined decisions, got %d", tt.expectedCount, len(result)) + } + }) + } +} + +func TestDetectorPipeline_Combine_MergedReason(t *testing.T) { + pipeline := &DetectorPipeline[mockDetection]{} + + decisionsByStep := map[string][]mockDetection{ + "step1": { + {resource: "vm1", host: "host1", reason: "reason1"}, + }, + "step2": { + {resource: "vm1", host: "host1", reason: "reason2"}, + }, + } + + result := pipeline.Combine(decisionsByStep) + + if len(result) != 1 { + t.Fatalf("expected 1 combined decision, got %d", len(result)) + } + + // The merged reason should contain both original reasons + reason := result[0].GetReason() + if reason == "" { + t.Error("expected non-empty reason") + } + if reason != "multiple reasons: reason1; reason2" && reason != "multiple reasons: reason2; reason1" { + // The order might vary due to map iteration order + if !strings.Contains(reason, "reason1") || !strings.Contains(reason, "reason2") { + t.Errorf("expected reason to contain both 'reason1' and 'reason2', got %s", reason) + } + } +} + +func TestDetectorPipeline_RunWithMonitor(t *testing.T) { + // Test that Run works with a proper monitor + monitor := NewDetectorPipelineMonitor() + pipeline := &DetectorPipeline[mockDetection]{ + steps: map[string]Detector[mockDetection]{ + "step1": &mockDetectorStep{ + decisions: []mockDetection{ + {resource: "vm1", host: "host1", reason: "reason1"}, + }, + }, + }, + Monitor: monitor, + } + + result := pipeline.Run() + + if len(result) != 1 { + t.Errorf("expected 1 step result, got %d", len(result)) + } +} diff --git a/internal/scheduling/lib/detector_test.go b/internal/scheduling/lib/detector_test.go new file mode 100644 index 000000000..e80f07a7a --- /dev/null +++ b/internal/scheduling/lib/detector_test.go @@ -0,0 +1,256 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "strings" + "testing" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +type mockDetection struct { + resource string + host string + reason string +} + +func (d mockDetection) GetResource() string { return d.resource } +func (d mockDetection) GetHost() string { return d.host } +func (d mockDetection) GetReason() string { return d.reason } +func (d mockDetection) WithReason(reason string) Detection { d.reason = reason; return d } + +type mockDetectorOptions struct { + Option1 string `json:"option1"` + Option2 int `json:"option2"` +} + +func (o mockDetectorOptions) Validate() error { + return nil +} + +func TestDetector_Init(t *testing.T) { + step := BaseDetector[mockDetectorOptions]{} + cl := fake.NewClientBuilder().Build() + err := step.Init(t.Context(), cl, v1alpha1.DetectorSpec{ + Params: runtime.RawExtension{Raw: []byte(`{ + "option1": "value1", + "option2": 2 + }`)}, + }) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + if step.Options.Option1 != "value1" { + t.Errorf("expected Option1 to be 'value1', got %s", step.Options.Option1) + } + + if step.Options.Option2 != 2 { + t.Errorf("expected Option2 to be 2, got %d", step.Options.Option2) + } +} + +func TestDetector_Init_InvalidJSON(t *testing.T) { + step := BaseDetector[mockDetectorOptions]{} + cl := fake.NewClientBuilder().Build() + err := step.Init(t.Context(), cl, v1alpha1.DetectorSpec{ + Params: runtime.RawExtension{Raw: []byte(`{invalid json}`)}, + }) + if err == nil { + t.Fatal("expected error for invalid JSON, got nil") + } +} + +func TestBaseDetector_CheckKnowledges(t *testing.T) { + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add scheme: %v", err) + } + + tests := []struct { + name string + knowledges []v1alpha1.Knowledge + refs []corev1.ObjectReference + expectError bool + errorMsg string + }{ + { + name: "all knowledges ready", + knowledges: []v1alpha1.Knowledge{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "knowledge1", + Namespace: "default", + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 10, + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: metav1.ConditionTrue, + }, + }, + }, + }, + }, + refs: []corev1.ObjectReference{ + {Name: "knowledge1", Namespace: "default"}, + }, + expectError: false, + }, + { + name: "knowledge not found", + knowledges: []v1alpha1.Knowledge{}, + refs: []corev1.ObjectReference{ + {Name: "missing-knowledge", Namespace: "default"}, + }, + expectError: true, + errorMsg: "failed to get knowledge", + }, + { + name: "knowledge not ready - condition false", + knowledges: []v1alpha1.Knowledge{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "knowledge1", + Namespace: "default", + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 10, + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: metav1.ConditionFalse, + }, + }, + }, + }, + }, + refs: []corev1.ObjectReference{ + {Name: "knowledge1", Namespace: "default"}, + }, + expectError: true, + errorMsg: "not ready", + }, + { + name: "knowledge not ready - no data", + knowledges: []v1alpha1.Knowledge{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "knowledge1", + Namespace: "default", + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 0, + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: metav1.ConditionTrue, + }, + }, + }, + }, + }, + refs: []corev1.ObjectReference{ + {Name: "knowledge1", Namespace: "default"}, + }, + expectError: true, + errorMsg: "no data available", + }, + { + name: "empty knowledge list", + knowledges: []v1alpha1.Knowledge{}, + refs: []corev1.ObjectReference{}, + expectError: false, + }, + { + name: "multiple knowledges all ready", + knowledges: []v1alpha1.Knowledge{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "knowledge1", + Namespace: "default", + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 10, + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: metav1.ConditionTrue, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "knowledge2", + Namespace: "default", + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 5, + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: metav1.ConditionTrue, + }, + }, + }, + }, + }, + refs: []corev1.ObjectReference{ + {Name: "knowledge1", Namespace: "default"}, + {Name: "knowledge2", Namespace: "default"}, + }, + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + clientBuilder := fake.NewClientBuilder().WithScheme(scheme) + for i := range tt.knowledges { + clientBuilder = clientBuilder.WithObjects(&tt.knowledges[i]) + } + cl := clientBuilder.Build() + + detector := &BaseDetector[mockDetectorOptions]{ + Client: cl, + } + + err := detector.CheckKnowledges(t.Context(), tt.refs...) + + if tt.expectError && err == nil { + t.Error("expected error but got nil") + } + if !tt.expectError && err != nil { + t.Errorf("expected no error but got: %v", err) + } + if tt.expectError && err != nil && tt.errorMsg != "" { + if !strings.Contains(err.Error(), tt.errorMsg) { + t.Errorf("expected error message to contain %q, got %q", tt.errorMsg, err.Error()) + } + } + }) + } +} + +func TestBaseDetector_CheckKnowledges_NilClient(t *testing.T) { + detector := &BaseDetector[mockDetectorOptions]{ + Client: nil, + } + + err := detector.CheckKnowledges(t.Context(), corev1.ObjectReference{Name: "test", Namespace: "default"}) + + if err == nil { + t.Error("expected error for nil client but got nil") + } + if !strings.Contains(err.Error(), "client not initialized") { + t.Errorf("expected error message about client not initialized, got %q", err.Error()) + } +} diff --git a/internal/scheduling/lib/errors.go b/internal/scheduling/lib/errors.go new file mode 100644 index 000000000..59735f9a5 --- /dev/null +++ b/internal/scheduling/lib/errors.go @@ -0,0 +1,13 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "errors" +) + +var ( + // This error is returned from the step at any time when the step should be skipped. + ErrStepSkipped = errors.New("step skipped") +) diff --git a/internal/scheduling/lib/filter.go b/internal/scheduling/lib/filter.go new file mode 100644 index 000000000..552a6bd2d --- /dev/null +++ b/internal/scheduling/lib/filter.go @@ -0,0 +1,30 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// Interface for a filter as part of the scheduling pipeline. +type Filter[RequestType FilterWeigherPipelineRequest] interface { + FilterWeigherPipelineStep[RequestType] + + // Configure the filter and initialize things like a database connection. + Init(ctx context.Context, client client.Client, step v1alpha1.FilterSpec) error +} + +// Common base for all steps that provides some functionality +// that would otherwise be duplicated across all steps. +type BaseFilter[RequestType FilterWeigherPipelineRequest, Opts FilterWeigherPipelineStepOpts] struct { + BaseFilterWeigherPipelineStep[RequestType, Opts] +} + +// Init the filter with the database and options. +func (s *BaseFilter[RequestType, Opts]) Init(ctx context.Context, client client.Client, step v1alpha1.FilterSpec) error { + return s.BaseFilterWeigherPipelineStep.Init(ctx, client, step.Params) +} diff --git a/internal/scheduling/lib/filter_monitor.go b/internal/scheduling/lib/filter_monitor.go new file mode 100644 index 000000000..c60dbe090 --- /dev/null +++ b/internal/scheduling/lib/filter_monitor.go @@ -0,0 +1,43 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "log/slog" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// Wraps a scheduler filter to monitor its execution. +type FilterMonitor[RequestType FilterWeigherPipelineRequest] struct { + // The filter to monitor. + filter Filter[RequestType] + // The monitor tracking the step's execution. + monitor *FilterWeigherPipelineStepMonitor[RequestType] +} + +// Wrap the given filter with a monitor. +func monitorFilter[RequestType FilterWeigherPipelineRequest]( + filter Filter[RequestType], + stepName string, + m FilterWeigherPipelineMonitor, +) *FilterMonitor[RequestType] { + + return &FilterMonitor[RequestType]{ + filter: filter, + monitor: monitorStep[RequestType](stepName, m), + } +} + +// Initialize the wrapped filter. +func (fm *FilterMonitor[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.FilterSpec) error { + return fm.filter.Init(ctx, client, step) +} + +// Run the filter and observe its execution. +func (fm *FilterMonitor[RequestType]) Run(traceLog *slog.Logger, request RequestType) (*FilterWeigherPipelineStepResult, error) { + return fm.monitor.RunWrapped(traceLog, request, fm.filter) +} diff --git a/internal/scheduling/lib/filter_monitor_test.go b/internal/scheduling/lib/filter_monitor_test.go new file mode 100644 index 000000000..2e0e2e33f --- /dev/null +++ b/internal/scheduling/lib/filter_monitor_test.go @@ -0,0 +1,118 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "log/slog" + "testing" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestMonitorFilter(t *testing.T) { + monitor := FilterWeigherPipelineMonitor{ + PipelineName: "test-pipeline", + } + + mockFilter := &mockFilter[mockFilterWeigherPipelineRequest]{ + InitFunc: func(ctx context.Context, cl client.Client, step v1alpha1.FilterSpec) error { + return nil + }, + RunFunc: func(traceLog *slog.Logger, request mockFilterWeigherPipelineRequest) (*FilterWeigherPipelineStepResult, error) { + return &FilterWeigherPipelineStepResult{ + Activations: map[string]float64{"host1": 0.5, "host2": 1.0}, + }, nil + }, + } + + fm := monitorFilter(mockFilter, "test-filter", monitor) + if fm == nil { + t.Fatal("expected filter monitor, got nil") + } + if fm.filter == nil { + t.Error("expected filter to be set") + } + if fm.monitor == nil { + t.Error("expected monitor to be set") + } + if fm.monitor.stepName != "test-filter" { + t.Errorf("expected step name 'test-filter', got '%s'", fm.monitor.stepName) + } +} + +func TestFilterMonitor_Init(t *testing.T) { + initCalled := false + mockFilter := &mockFilter[mockFilterWeigherPipelineRequest]{ + InitFunc: func(ctx context.Context, cl client.Client, step v1alpha1.FilterSpec) error { + initCalled = true + return nil + }, + } + + monitor := FilterWeigherPipelineMonitor{ + PipelineName: "test-pipeline", + } + fm := monitorFilter(mockFilter, "test-filter", monitor) + + scheme := runtime.NewScheme() + cl := fake.NewClientBuilder().WithScheme(scheme).Build() + + err := fm.Init(t.Context(), cl, v1alpha1.FilterSpec{ + Name: "test-filter", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }) + if err != nil { + t.Errorf("expected no error, got %v", err) + } + if !initCalled { + t.Error("expected Init to be called on wrapped filter") + } +} + +func TestFilterMonitor_Run(t *testing.T) { + runCalled := false + mockFilter := &mockFilter[mockFilterWeigherPipelineRequest]{ + RunFunc: func(traceLog *slog.Logger, request mockFilterWeigherPipelineRequest) (*FilterWeigherPipelineStepResult, error) { + runCalled = true + return &FilterWeigherPipelineStepResult{ + Activations: map[string]float64{"host1": 0.5, "host2": 1.0}, + }, nil + }, + } + + runTimer := &mockObserver{} + removedSubjectsObserver := &mockObserver{} + monitor := FilterWeigherPipelineMonitor{ + PipelineName: "test-pipeline", + } + fm := monitorFilter(mockFilter, "test-filter", monitor) + // Manually set monitors for testing + fm.monitor.runTimer = runTimer + fm.monitor.removedSubjectsObserver = removedSubjectsObserver + + request := mockFilterWeigherPipelineRequest{ + Subjects: []string{"host1", "host2", "host3"}, + Weights: map[string]float64{"host1": 0.1, "host2": 0.2, "host3": 0.3}, + } + + result, err := fm.Run(slog.Default(), request) + if err != nil { + t.Errorf("expected no error, got %v", err) + } + if !runCalled { + t.Error("expected Run to be called on wrapped filter") + } + if result == nil { + t.Fatal("expected result, got nil") + } + if len(result.Activations) != 2 { + t.Errorf("expected 2 activations, got %d", len(result.Activations)) + } +} diff --git a/internal/scheduling/lib/filter_test.go b/internal/scheduling/lib/filter_test.go new file mode 100644 index 000000000..ac7c4d1d7 --- /dev/null +++ b/internal/scheduling/lib/filter_test.go @@ -0,0 +1,96 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "log/slog" + "testing" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +type mockFilter[RequestType FilterWeigherPipelineRequest] struct { + InitFunc func(ctx context.Context, client client.Client, step v1alpha1.FilterSpec) error + RunFunc func(traceLog *slog.Logger, request RequestType) (*FilterWeigherPipelineStepResult, error) +} + +func (m *mockFilter[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.FilterSpec) error { + if m.InitFunc == nil { + return nil + } + return m.InitFunc(ctx, client, step) +} +func (m *mockFilter[RequestType]) Run(traceLog *slog.Logger, request RequestType) (*FilterWeigherPipelineStepResult, error) { + if m.RunFunc == nil { + return &FilterWeigherPipelineStepResult{}, nil + } + return m.RunFunc(traceLog, request) +} + +// filterTestOptions implements FilterWeigherPipelineStepOpts for testing. +type filterTestOptions struct{} + +func (o filterTestOptions) Validate() error { return nil } + +func TestBaseFilter_Init(t *testing.T) { + tests := []struct { + name string + filterSpec v1alpha1.FilterSpec + expectError bool + }{ + { + name: "successful initialization with valid params", + filterSpec: v1alpha1.FilterSpec{ + Name: "test-filter", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }, + expectError: false, + }, + { + name: "successful initialization with empty params", + filterSpec: v1alpha1.FilterSpec{ + Name: "test-filter", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }, + expectError: false, + }, + { + name: "error on invalid JSON params", + filterSpec: v1alpha1.FilterSpec{ + Name: "test-filter", + Params: runtime.RawExtension{ + Raw: []byte(`{invalid json}`), + }, + }, + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + filter := &BaseFilter[mockFilterWeigherPipelineRequest, filterTestOptions]{} + cl := fake.NewClientBuilder().Build() + + err := filter.Init(t.Context(), cl, tt.filterSpec) + + if tt.expectError && err == nil { + t.Error("expected error but got nil") + } + if !tt.expectError && err != nil { + t.Errorf("expected no error but got: %v", err) + } + if !tt.expectError && filter.Client == nil { + t.Error("expected client to be set but it was nil") + } + }) + } +} diff --git a/internal/scheduling/lib/filter_validation.go b/internal/scheduling/lib/filter_validation.go new file mode 100644 index 000000000..3117cffe0 --- /dev/null +++ b/internal/scheduling/lib/filter_validation.go @@ -0,0 +1,51 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "errors" + "log/slog" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// Wrapper for scheduler steps that validates them before/after execution. +type FilterValidator[RequestType FilterWeigherPipelineRequest] struct { + // The wrapped filter to validate. + Filter Filter[RequestType] +} + +// Initialize the wrapped filter with the database and options. +func (s *FilterValidator[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.FilterSpec) error { + slog.Info("scheduler: init validation for step", "name", step.Name) + return s.Filter.Init(ctx, client, step) +} + +// Validate the wrapped filter with the database and options. +func validateFilter[RequestType FilterWeigherPipelineRequest](filter Filter[RequestType]) *FilterValidator[RequestType] { + return &FilterValidator[RequestType]{Filter: filter} +} + +// Run the filter and validate what happens. +func (s *FilterValidator[RequestType]) Run(traceLog *slog.Logger, request RequestType) (*FilterWeigherPipelineStepResult, error) { + result, err := s.Filter.Run(traceLog, request) + if err != nil { + return nil, err + } + // Note that for some schedulers the same subject (e.g. compute host) may + // appear multiple times if there is a substruct (e.g. hypervisor hostname). + // Since cortex will only schedule on the subject level and not below, + // we need to deduplicate the subjects first before the validation. + deduplicated := map[string]struct{}{} + for _, subject := range request.GetSubjects() { + deduplicated[subject] = struct{}{} + } + // Filters can only remove subjects, not add new ones. + if len(result.Activations) > len(deduplicated) { + return nil, errors.New("safety: number of subjects increased during step execution") + } + return result, nil +} diff --git a/internal/scheduling/lib/filter_validation_test.go b/internal/scheduling/lib/filter_validation_test.go new file mode 100644 index 000000000..750ef5b62 --- /dev/null +++ b/internal/scheduling/lib/filter_validation_test.go @@ -0,0 +1,191 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "errors" + "log/slog" + "testing" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestValidateFilter(t *testing.T) { + filter := &mockFilter[mockFilterWeigherPipelineRequest]{} + validator := validateFilter(filter) + + if validator == nil { + t.Fatal("expected validator but got nil") + } + if validator.Filter != filter { + t.Error("expected filter to be set in validator") + } +} + +func TestFilterValidator_Init(t *testing.T) { + tests := []struct { + name string + filterSpec v1alpha1.FilterSpec + initError error + expectError bool + }{ + { + name: "successful initialization", + filterSpec: v1alpha1.FilterSpec{ + Name: "test-filter", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }, + initError: nil, + expectError: false, + }, + { + name: "initialization error", + filterSpec: v1alpha1.FilterSpec{ + Name: "test-filter", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }, + initError: errors.New("init error"), + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + filter := &mockFilter[mockFilterWeigherPipelineRequest]{ + InitFunc: func(_ context.Context, _ client.Client, _ v1alpha1.FilterSpec) error { + return tt.initError + }, + } + validator := validateFilter(filter) + cl := fake.NewClientBuilder().Build() + + err := validator.Init(t.Context(), cl, tt.filterSpec) + + if tt.expectError && err == nil { + t.Error("expected error but got nil") + } + if !tt.expectError && err != nil { + t.Errorf("expected no error but got: %v", err) + } + }) + } +} + +func TestFilterValidator_Run(t *testing.T) { + tests := []struct { + name string + subjects []string + runResult *FilterWeigherPipelineStepResult + runError error + expectError bool + errorContains string + }{ + { + name: "successful run - filter removes some subjects", + subjects: []string{"host1", "host2", "host3"}, + runResult: &FilterWeigherPipelineStepResult{ + Activations: map[string]float64{ + "host1": 1.0, + "host2": 1.0, + }, + }, + runError: nil, + expectError: false, + }, + { + name: "successful run - filter keeps all subjects", + subjects: []string{"host1", "host2"}, + runResult: &FilterWeigherPipelineStepResult{ + Activations: map[string]float64{ + "host1": 1.0, + "host2": 1.0, + }, + }, + runError: nil, + expectError: false, + }, + { + name: "run error from filter", + subjects: []string{"host1"}, + runResult: nil, + runError: errors.New("filter error"), + expectError: true, + }, + { + name: "validation error - subjects increased", + subjects: []string{"host1"}, + runResult: &FilterWeigherPipelineStepResult{ + Activations: map[string]float64{ + "host1": 1.0, + "host2": 1.0, + "host3": 1.0, + }, + }, + runError: nil, + expectError: true, + errorContains: "number of subjects increased", + }, + { + name: "handle duplicate subjects in request", + subjects: []string{"host1", "host1", "host2"}, + runResult: &FilterWeigherPipelineStepResult{ + Activations: map[string]float64{ + "host1": 1.0, + "host2": 1.0, + }, + }, + runError: nil, + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + filter := &mockFilter[mockFilterWeigherPipelineRequest]{ + RunFunc: func(traceLog *slog.Logger, request mockFilterWeigherPipelineRequest) (*FilterWeigherPipelineStepResult, error) { + return tt.runResult, tt.runError + }, + } + validator := validateFilter(filter) + request := mockFilterWeigherPipelineRequest{ + Subjects: tt.subjects, + } + traceLog := slog.Default() + + result, err := validator.Run(traceLog, request) + + if tt.expectError && err == nil { + t.Error("expected error but got nil") + } + if !tt.expectError && err != nil { + t.Errorf("expected no error but got: %v", err) + } + if tt.expectError && tt.errorContains != "" && err != nil { + if !containsStr(err.Error(), tt.errorContains) { + t.Errorf("expected error to contain %q, got %q", tt.errorContains, err.Error()) + } + } + if !tt.expectError && result == nil { + t.Error("expected result but got nil") + } + }) + } +} + +func containsStr(s, substr string) bool { + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} diff --git a/internal/scheduling/lib/filter_weigher_pipeline.go b/internal/scheduling/lib/filter_weigher_pipeline.go new file mode 100644 index 000000000..76c306d86 --- /dev/null +++ b/internal/scheduling/lib/filter_weigher_pipeline.go @@ -0,0 +1,294 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "errors" + "log/slog" + "maps" + "math" + "slices" + "sort" + "sync" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type FilterWeigherPipeline[RequestType FilterWeigherPipelineRequest] interface { + // Run the scheduling pipeline with the given request. + Run(request RequestType) (v1alpha1.DecisionResult, error) +} + +// Pipeline of scheduler steps. +type filterWeigherPipeline[RequestType FilterWeigherPipelineRequest] struct { + // The activation function to use when combining the + // results of the scheduler steps. + ActivationFunction + // The order in which filters are applied, by their step name. + filtersOrder []string + // The filters by their name. + filters map[string]Filter[RequestType] + // The order in which weighers are applied, by their step name. + weighersOrder []string + // The weighers by their name. + weighers map[string]Weigher[RequestType] + // Multipliers to apply to weigher outputs. + weighersMultipliers map[string]float64 + // Monitor to observe the pipeline. + monitor FilterWeigherPipelineMonitor +} + +// Create a new pipeline with filters and weighers contained in the configuration. +func InitNewFilterWeigherPipeline[RequestType FilterWeigherPipelineRequest]( + ctx context.Context, + client client.Client, + name string, + supportedFilters map[string]func() Filter[RequestType], + confedFilters []v1alpha1.FilterSpec, + supportedWeighers map[string]func() Weigher[RequestType], + confedWeighers []v1alpha1.WeigherSpec, + monitor FilterWeigherPipelineMonitor, +) PipelineInitResult[FilterWeigherPipeline[RequestType]] { + + pipelineMonitor := monitor.SubPipeline(name) + + // Load all filters from the configuration. + filtersByName := make(map[string]Filter[RequestType], len(confedFilters)) + filtersOrder := []string{} + filterErrors := make(map[string]error) + for _, filterConfig := range confedFilters { + slog.Info("scheduler: configuring filter", "name", filterConfig.Name) + slog.Info("supported:", "filters", maps.Keys(supportedFilters)) + makeFilter, ok := supportedFilters[filterConfig.Name] + if !ok { + slog.Error("scheduler: unsupported filter", "name", filterConfig.Name) + filterErrors[filterConfig.Name] = errors.New("unsupported filter name: " + filterConfig.Name) + continue + } + filter := makeFilter() + filter = validateFilter(filter) + filter = monitorFilter(filter, filterConfig.Name, pipelineMonitor) + if err := filter.Init(ctx, client, filterConfig); err != nil { + slog.Error("scheduler: failed to initialize filter", "name", filterConfig.Name, "error", err) + filterErrors[filterConfig.Name] = errors.New("failed to initialize filter: " + err.Error()) + continue + } + filtersByName[filterConfig.Name] = filter + filtersOrder = append(filtersOrder, filterConfig.Name) + slog.Info("scheduler: added filter", "name", filterConfig.Name) + } + + // Load all weighers from the configuration. + weighersByName := make(map[string]Weigher[RequestType], len(confedWeighers)) + weighersMultipliers := make(map[string]float64, len(confedWeighers)) + weighersOrder := []string{} + weigherErrors := make(map[string]error) + for _, weigherConfig := range confedWeighers { + slog.Info("scheduler: configuring weigher", "name", weigherConfig.Name) + slog.Info("supported:", "weighers", maps.Keys(supportedWeighers)) + makeWeigher, ok := supportedWeighers[weigherConfig.Name] + if !ok { + slog.Error("scheduler: unsupported weigher", "name", weigherConfig.Name) + weigherErrors[weigherConfig.Name] = errors.New("unsupported weigher name: " + weigherConfig.Name) + continue + } + weigher := makeWeigher() + // Validate that the weigher doesn't unexpectedly filter out hosts. + weigher = validateWeigher(weigher) + weigher = monitorWeigher(weigher, weigherConfig.Name, pipelineMonitor) + if err := weigher.Init(ctx, client, weigherConfig); err != nil { + slog.Error("scheduler: failed to initialize weigher", "name", weigherConfig.Name, "error", err) + weigherErrors[weigherConfig.Name] = errors.New("failed to initialize weigher: " + err.Error()) + continue + } + weighersByName[weigherConfig.Name] = weigher + weighersOrder = append(weighersOrder, weigherConfig.Name) + if weigherConfig.Multiplier == nil { + weighersMultipliers[weigherConfig.Name] = 1.0 + } else { + weighersMultipliers[weigherConfig.Name] = *weigherConfig.Multiplier + } + slog.Info("scheduler: added weigher", "name", weigherConfig.Name) + } + + return PipelineInitResult[FilterWeigherPipeline[RequestType]]{ + FilterErrors: filterErrors, + WeigherErrors: weigherErrors, + Pipeline: &filterWeigherPipeline[RequestType]{ + filtersOrder: filtersOrder, + filters: filtersByName, + weighersOrder: weighersOrder, + weighers: weighersByName, + weighersMultipliers: weighersMultipliers, + monitor: pipelineMonitor, + }, + } +} + +// Execute filters and collect their activations by step name. +// During this process, the request is mutated to only include the +// remaining subjects. +func (p *filterWeigherPipeline[RequestType]) runFilters( + log *slog.Logger, + request RequestType, +) (filteredRequest RequestType) { + + filteredRequest = request + for _, filterName := range p.filtersOrder { + filter := p.filters[filterName] + stepLog := log.With("filter", filterName) + stepLog.Info("scheduler: running filter") + result, err := filter.Run(stepLog, filteredRequest) + if errors.Is(err, ErrStepSkipped) { + stepLog.Info("scheduler: filter skipped") + continue + } + if err != nil { + stepLog.Error("scheduler: failed to run filter", "error", err) + continue + } + stepLog.Info("scheduler: finished filter") + // Mutate the request to only include the remaining subjects. + // Assume the resulting request type is the same as the input type. + filteredRequest = filteredRequest.FilterSubjects(result.Activations).(RequestType) + } + return filteredRequest +} + +// Execute weighers and collect their activations by step name. +func (p *filterWeigherPipeline[RequestType]) runWeighers( + log *slog.Logger, + filteredRequest RequestType, +) map[string]map[string]float64 { + + activationsByStep := map[string]map[string]float64{} + // Weighers can be run in parallel as they do not modify the request. + var lock sync.Mutex + var wg sync.WaitGroup + for _, weigherName := range p.weighersOrder { + weigher := p.weighers[weigherName] + wg.Go(func() { + stepLog := log.With("weigher", weigherName) + stepLog.Info("scheduler: running weigher") + result, err := weigher.Run(stepLog, filteredRequest) + if errors.Is(err, ErrStepSkipped) { + stepLog.Info("scheduler: weigher skipped") + return + } + if err != nil { + stepLog.Error("scheduler: failed to run weigher", "error", err) + return + } + stepLog.Info("scheduler: finished weigher") + lock.Lock() + defer lock.Unlock() + activationsByStep[weigherName] = result.Activations + }) + } + wg.Wait() + return activationsByStep +} + +// Apply an initial weight to the subjects. +// +// Context: +// Openstack schedulers may give us very large (positive/negative) weights such as +// -99,000 or 99,000 (Nova). We want to respect these values, but still adjust them +// to a meaningful value. If the scheduler really doesn't want us to run on a subject, it +// should run a filter instead of setting a weight. +func (p *filterWeigherPipeline[RequestType]) normalizeInputWeights(weights map[string]float64) map[string]float64 { + normalizedWeights := make(map[string]float64, len(weights)) + for subjectname, weight := range weights { + normalizedWeights[subjectname] = math.Tanh(weight) + } + return normalizedWeights +} + +// Apply the step weights to the input weights. +func (p *filterWeigherPipeline[RequestType]) applyWeights( + stepWeights map[string]map[string]float64, + inWeights map[string]float64, +) map[string]float64 { + // Copy to avoid modifying the original weights. + outWeights := make(map[string]float64, len(inWeights)) + maps.Copy(outWeights, inWeights) + + // Apply all activations in the strict order defined by the configuration. + for _, weigherName := range p.weighersOrder { + weigherActivations, ok := stepWeights[weigherName] + if !ok { + // This is ok, since steps can be skipped. + continue + } + multiplier, ok := p.weighersMultipliers[weigherName] + if !ok { + multiplier = 1.0 + } + outWeights = p.Apply(outWeights, weigherActivations, multiplier) + } + return outWeights +} + +// Sort the subjects by their weights. +func (s *filterWeigherPipeline[RequestType]) sortSubjectsByWeights(weights map[string]float64) []string { + // Sort the subjects (keys) by their weights. + subjects := slices.Collect(maps.Keys(weights)) + sort.Slice(subjects, func(i, j int) bool { + return weights[subjects[i]] > weights[subjects[j]] + }) + return subjects +} + +// Evaluate the pipeline and return a list of subjects in order of preference. +func (p *filterWeigherPipeline[RequestType]) Run(request RequestType) (v1alpha1.DecisionResult, error) { + slogArgs := request.GetTraceLogArgs() + slogArgsAny := make([]any, 0, len(slogArgs)) + for _, arg := range slogArgs { + slogArgsAny = append(slogArgsAny, arg) + } + traceLog := slog.With(slogArgsAny...) + + subjectsIn := request.GetSubjects() + traceLog.Info("scheduler: starting pipeline", "subjects", subjectsIn) + + // Normalize the input weights so we can apply step weights meaningfully. + inWeights := p.normalizeInputWeights(request.GetWeights()) + traceLog.Info("scheduler: input weights", "weights", inWeights) + + // Run filters first to reduce the number of subjects. + // Any weights assigned to filtered out subjects are ignored. + filteredRequest := p.runFilters(traceLog, request) + traceLog.Info( + "scheduler: finished filters", + "remainingSubjects", filteredRequest.GetSubjects(), + ) + + // Run weighers on the filtered subjects. + remainingWeights := make(map[string]float64, len(filteredRequest.GetSubjects())) + for _, subject := range filteredRequest.GetSubjects() { + remainingWeights[subject] = inWeights[subject] + } + stepWeights := p.runWeighers(traceLog, filteredRequest) + outWeights := p.applyWeights(stepWeights, remainingWeights) + traceLog.Info("scheduler: output weights", "weights", outWeights) + + subjects := p.sortSubjectsByWeights(outWeights) + traceLog.Info("scheduler: sorted subjects", "subjects", subjects) + + // Collect some metrics about the pipeline execution. + go p.monitor.observePipelineResult(request, subjects) + + result := v1alpha1.DecisionResult{ + RawInWeights: request.GetWeights(), + NormalizedInWeights: inWeights, + AggregatedOutWeights: outWeights, + OrderedHosts: subjects, + } + if len(subjects) > 0 { + result.TargetHost = &subjects[0] + } + return result, nil +} diff --git a/internal/scheduling/lib/pipeline_monitor.go b/internal/scheduling/lib/filter_weigher_pipeline_monitor.go similarity index 82% rename from internal/scheduling/lib/pipeline_monitor.go rename to internal/scheduling/lib/filter_weigher_pipeline_monitor.go index 5fd7f48a9..0b55eda94 100644 --- a/internal/scheduling/lib/pipeline_monitor.go +++ b/internal/scheduling/lib/filter_weigher_pipeline_monitor.go @@ -8,7 +8,7 @@ import ( ) // Collection of Prometheus metrics to monitor scheduler pipeline -type PipelineMonitor struct { +type FilterWeigherPipelineMonitor struct { // The pipeline name is used to differentiate between different pipelines. PipelineName string @@ -33,68 +33,68 @@ type PipelineMonitor struct { } // Create a new scheduler monitor and register the necessary Prometheus metrics. -func NewPipelineMonitor() PipelineMonitor { +func NewPipelineMonitor() FilterWeigherPipelineMonitor { buckets := []float64{} buckets = append(buckets, prometheus.LinearBuckets(0, 1, 10)...) buckets = append(buckets, prometheus.LinearBuckets(10, 10, 4)...) buckets = append(buckets, prometheus.LinearBuckets(50, 50, 6)...) stepReorderingsObserver := prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "cortex_scheduler_pipeline_step_shift_origin", + Name: "cortex_filter_weigher_pipeline_step_shift_origin", Help: "From which index of the subject list the subject came from originally.", Buckets: buckets, }, []string{"pipeline", "step", "outidx"}) - return PipelineMonitor{ + return FilterWeigherPipelineMonitor{ stepRunTimer: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "cortex_scheduler_pipeline_step_run_duration_seconds", + Name: "cortex_filter_weigher_pipeline_step_run_duration_seconds", Help: "Duration of scheduler pipeline step run", Buckets: prometheus.DefBuckets, }, []string{"pipeline", "step"}), stepSubjectWeight: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "cortex_scheduler_pipeline_step_weight_modification", + Name: "cortex_filter_weigher_pipeline_step_weight_modification", Help: "Modification of subject weight by scheduler pipeline step", }, []string{"pipeline", "subject", "step"}), stepRemovedSubjectsObserver: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "cortex_scheduler_pipeline_step_removed_subjects", + Name: "cortex_filter_weigher_pipeline_step_removed_subjects", Help: "Number of subjects removed by scheduler pipeline step", Buckets: prometheus.ExponentialBucketsRange(1, 1000, 10), }, []string{"pipeline", "step"}), stepReorderingsObserver: stepReorderingsObserver, stepImpactObserver: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "cortex_scheduler_pipeline_step_impact", + Name: "cortex_filter_weigher_pipeline_step_impact", Help: "Impact of the step on the subjects", Buckets: prometheus.ExponentialBucketsRange(0.01, 1000, 20), }, []string{"pipeline", "step", "stat", "unit"}), pipelineRunTimer: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "cortex_scheduler_pipeline_run_duration_seconds", + Name: "cortex_filter_weigher_pipeline_run_duration_seconds", Help: "Duration of scheduler pipeline run", Buckets: prometheus.DefBuckets, }, []string{"pipeline"}), subjectNumberInObserver: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "cortex_scheduler_pipeline_subject_number_in", + Name: "cortex_filter_weigher_pipeline_subject_number_in", Help: "Number of subjects going into the scheduler pipeline", Buckets: prometheus.ExponentialBucketsRange(1, 1000, 10), }, []string{"pipeline"}), subjectNumberOutObserver: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "cortex_scheduler_pipeline_subject_number_out", + Name: "cortex_filter_weigher_pipeline_subject_number_out", Help: "Number of subjects coming out of the scheduler pipeline", Buckets: prometheus.ExponentialBucketsRange(1, 1000, 10), }, []string{"pipeline"}), requestCounter: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "cortex_scheduler_pipeline_requests_total", + Name: "cortex_filter_weigher_pipeline_requests_total", Help: "Total number of requests processed by the scheduler.", }, []string{"pipeline"}), } } // Get a copied pipeline monitor with the name set, after binding the metrics. -func (m PipelineMonitor) SubPipeline(name string) PipelineMonitor { +func (m FilterWeigherPipelineMonitor) SubPipeline(name string) FilterWeigherPipelineMonitor { cp := m cp.PipelineName = name return cp } // Observe a scheduler pipeline result: subjects going in, and subjects going out. -func (m *PipelineMonitor) observePipelineResult(request PipelineRequest, result []string) { +func (m *FilterWeigherPipelineMonitor) observePipelineResult(request FilterWeigherPipelineRequest, result []string) { // Observe the number of subjects going into the scheduler pipeline. if m.subjectNumberInObserver != nil { m.subjectNumberInObserver. @@ -115,7 +115,7 @@ func (m *PipelineMonitor) observePipelineResult(request PipelineRequest, result } } -func (m *PipelineMonitor) Describe(ch chan<- *prometheus.Desc) { +func (m *FilterWeigherPipelineMonitor) Describe(ch chan<- *prometheus.Desc) { m.stepRunTimer.Describe(ch) m.stepSubjectWeight.Describe(ch) m.stepRemovedSubjectsObserver.Describe(ch) @@ -127,7 +127,7 @@ func (m *PipelineMonitor) Describe(ch chan<- *prometheus.Desc) { m.requestCounter.Describe(ch) } -func (m *PipelineMonitor) Collect(ch chan<- prometheus.Metric) { +func (m *FilterWeigherPipelineMonitor) Collect(ch chan<- prometheus.Metric) { m.stepRunTimer.Collect(ch) m.stepSubjectWeight.Collect(ch) m.stepRemovedSubjectsObserver.Collect(ch) diff --git a/internal/scheduling/lib/filter_weigher_pipeline_monitor_test.go b/internal/scheduling/lib/filter_weigher_pipeline_monitor_test.go new file mode 100644 index 000000000..023a23ae1 --- /dev/null +++ b/internal/scheduling/lib/filter_weigher_pipeline_monitor_test.go @@ -0,0 +1,117 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "strings" + "testing" + + "github.com/cobaltcore-dev/cortex/pkg/monitoring" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" +) + +func TestSchedulerMonitor(t *testing.T) { + registry := &monitoring.Registry{RegistererGatherer: prometheus.NewRegistry()} + monitor := NewPipelineMonitor().SubPipeline("test") + registry.MustRegister(&monitor) + + // Test stepRunTimer + expectedStepRunTimer := strings.NewReader(` + # HELP cortex_filter_weigher_pipeline_step_run_duration_seconds Duration of scheduler pipeline step run + # TYPE cortex_filter_weigher_pipeline_step_run_duration_seconds histogram + cortex_filter_weigher_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="0.005"} 1 + cortex_filter_weigher_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="0.01"} 1 + cortex_filter_weigher_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="0.025"} 1 + cortex_filter_weigher_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="0.05"} 1 + cortex_filter_weigher_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="0.1"} 1 + cortex_filter_weigher_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="0.25"} 1 + cortex_filter_weigher_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="0.5"} 1 + cortex_filter_weigher_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="1"} 1 + cortex_filter_weigher_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="2.5"} 1 + cortex_filter_weigher_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="5"} 1 + cortex_filter_weigher_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="10"} 1 + cortex_filter_weigher_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="+Inf"} 1 + cortex_filter_weigher_pipeline_step_run_duration_seconds_sum{pipeline="test",step="test_step"} 0 + cortex_filter_weigher_pipeline_step_run_duration_seconds_count{pipeline="test",step="test_step"} 1 + `) + monitor.stepRunTimer.WithLabelValues("test", "test_step").Observe(0) + err := testutil.GatherAndCompare(registry, expectedStepRunTimer, "cortex_filter_weigher_pipeline_step_run_duration_seconds") + if err != nil { + t.Fatalf("stepRunTimer test failed: %v", err) + } + + // Test stepSubjectWeight + expectedStepSubjectWeight := strings.NewReader(` + # HELP cortex_filter_weigher_pipeline_step_weight_modification Modification of subject weight by scheduler pipeline step + # TYPE cortex_filter_weigher_pipeline_step_weight_modification gauge + cortex_filter_weigher_pipeline_step_weight_modification{pipeline="test",step="test_step",subject="test_subject"} 42 + `) + monitor.stepSubjectWeight.WithLabelValues("test", "test_subject", "test_step").Set(42) + err = testutil.GatherAndCompare(registry, expectedStepSubjectWeight, "cortex_filter_weigher_pipeline_step_weight_modification") + if err != nil { + t.Fatalf("stepSubjectWeight test failed: %v", err) + } + + // Test stepRemovedSubjectsObserver + expectedRemovedSubjectsObserver := strings.NewReader(` + # HELP cortex_filter_weigher_pipeline_step_removed_subjects Number of subjects removed by scheduler pipeline step + # TYPE cortex_filter_weigher_pipeline_step_removed_subjects histogram + cortex_filter_weigher_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="1"} 1 + cortex_filter_weigher_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="2.154434690031884"} 1 + cortex_filter_weigher_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="4.641588833612779"} 1 + cortex_filter_weigher_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="10.000000000000002"} 1 + cortex_filter_weigher_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="21.544346900318843"} 1 + cortex_filter_weigher_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="46.4158883361278"} 1 + cortex_filter_weigher_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="100.00000000000003"} 1 + cortex_filter_weigher_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="215.44346900318845"} 1 + cortex_filter_weigher_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="464.15888336127813"} 1 + cortex_filter_weigher_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="1000.0000000000006"} 1 + cortex_filter_weigher_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="+Inf"} 1 + cortex_filter_weigher_pipeline_step_removed_subjects_sum{pipeline="test",step="test_step"} 1 + cortex_filter_weigher_pipeline_step_removed_subjects_count{pipeline="test",step="test_step"} 1 + `) + monitor.stepRemovedSubjectsObserver.WithLabelValues("test", "test_step").Observe(1) + err = testutil.GatherAndCompare(registry, expectedRemovedSubjectsObserver, "cortex_filter_weigher_pipeline_step_removed_subjects") + if err != nil { + t.Fatalf("stepRemovedSubjectsObserver test failed: %v", err) + } + + // Test pipelineRunTimer + expectedPipelineRunTimer := strings.NewReader(` + # HELP cortex_filter_weigher_pipeline_run_duration_seconds Duration of scheduler pipeline run + # TYPE cortex_filter_weigher_pipeline_run_duration_seconds histogram + cortex_filter_weigher_pipeline_run_duration_seconds_bucket{pipeline="test",le="0.005"} 1 + cortex_filter_weigher_pipeline_run_duration_seconds_bucket{pipeline="test",le="0.01"} 1 + cortex_filter_weigher_pipeline_run_duration_seconds_bucket{pipeline="test",le="0.025"} 1 + cortex_filter_weigher_pipeline_run_duration_seconds_bucket{pipeline="test",le="0.05"} 1 + cortex_filter_weigher_pipeline_run_duration_seconds_bucket{pipeline="test",le="0.1"} 1 + cortex_filter_weigher_pipeline_run_duration_seconds_bucket{pipeline="test",le="0.25"} 1 + cortex_filter_weigher_pipeline_run_duration_seconds_bucket{pipeline="test",le="0.5"} 1 + cortex_filter_weigher_pipeline_run_duration_seconds_bucket{pipeline="test",le="1"} 1 + cortex_filter_weigher_pipeline_run_duration_seconds_bucket{pipeline="test",le="2.5"} 1 + cortex_filter_weigher_pipeline_run_duration_seconds_bucket{pipeline="test",le="5"} 1 + cortex_filter_weigher_pipeline_run_duration_seconds_bucket{pipeline="test",le="10"} 1 + cortex_filter_weigher_pipeline_run_duration_seconds_bucket{pipeline="test",le="+Inf"} 1 + cortex_filter_weigher_pipeline_run_duration_seconds_sum{pipeline="test"} 0 + cortex_filter_weigher_pipeline_run_duration_seconds_count{pipeline="test"} 1 + `) + monitor.pipelineRunTimer.WithLabelValues("test").Observe(0) + err = testutil.GatherAndCompare(registry, expectedPipelineRunTimer, "cortex_filter_weigher_pipeline_run_duration_seconds") + if err != nil { + t.Fatalf("pipelineRunTimer test failed: %v", err) + } + + // Test requestCounter + expectedRequestCounter := strings.NewReader(` + # HELP cortex_filter_weigher_pipeline_requests_total Total number of requests processed by the scheduler. + # TYPE cortex_filter_weigher_pipeline_requests_total counter + cortex_filter_weigher_pipeline_requests_total{pipeline="test"} 3 + `) + monitor.requestCounter.WithLabelValues("test").Add(3) + err = testutil.GatherAndCompare(registry, expectedRequestCounter, "cortex_filter_weigher_pipeline_requests_total") + if err != nil { + t.Fatalf("requestCounter test failed: %v", err) + } +} diff --git a/internal/scheduling/lib/filter_weigher_pipeline_request.go b/internal/scheduling/lib/filter_weigher_pipeline_request.go new file mode 100644 index 000000000..38387f6b2 --- /dev/null +++ b/internal/scheduling/lib/filter_weigher_pipeline_request.go @@ -0,0 +1,21 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import "log/slog" + +type FilterWeigherPipelineRequest interface { + // Get the subjects that went in the pipeline. + GetSubjects() []string + // This function can be used by the pipeline to obtain a mutated version + // of the request with only the given subjects remaining. This is helpful + // for steps that filter out subjects. Subjects not included in the map + // are considered as filtered out, and won't be reconsidered in later steps. + FilterSubjects(includedSubjects map[string]float64) FilterWeigherPipelineRequest + // Get the weights for the subjects. + GetWeights() map[string]float64 + // Get logging args to be used in the step's trace log. + // Usually, this will be the request context including the request ID. + GetTraceLogArgs() []slog.Attr +} diff --git a/internal/scheduling/lib/filter_weigher_pipeline_request_test.go b/internal/scheduling/lib/filter_weigher_pipeline_request_test.go new file mode 100644 index 000000000..70752853c --- /dev/null +++ b/internal/scheduling/lib/filter_weigher_pipeline_request_test.go @@ -0,0 +1,29 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import "log/slog" + +type mockFilterWeigherPipelineRequest struct { + WeightKeys []string + TraceLogArgs []slog.Attr + Subjects []string + Weights map[string]float64 + Pipeline string +} + +func (m mockFilterWeigherPipelineRequest) GetWeightKeys() []string { return m.WeightKeys } +func (m mockFilterWeigherPipelineRequest) GetTraceLogArgs() []slog.Attr { return m.TraceLogArgs } +func (m mockFilterWeigherPipelineRequest) GetSubjects() []string { return m.Subjects } +func (m mockFilterWeigherPipelineRequest) GetWeights() map[string]float64 { return m.Weights } +func (m mockFilterWeigherPipelineRequest) GetPipeline() string { return m.Pipeline } + +func (m mockFilterWeigherPipelineRequest) FilterSubjects(subjects map[string]float64) FilterWeigherPipelineRequest { + filteredSubjects := make([]string, 0, len(subjects)) + for subject := range subjects { + filteredSubjects = append(filteredSubjects, subject) + } + m.Subjects = filteredSubjects + return m +} diff --git a/internal/scheduling/lib/filter_weigher_pipeline_step.go b/internal/scheduling/lib/filter_weigher_pipeline_step.go new file mode 100644 index 000000000..81394a2ef --- /dev/null +++ b/internal/scheduling/lib/filter_weigher_pipeline_step.go @@ -0,0 +1,77 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "log/slog" + + "github.com/cobaltcore-dev/cortex/pkg/conf" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// Steps can be chained together to form a scheduling pipeline. +type FilterWeigherPipelineStep[RequestType FilterWeigherPipelineRequest] interface { + // Run this step in the scheduling pipeline. + // + // The request is immutable and modifications are stored in the result. + // This allows steps to be run in parallel (e.g. weighers) without passing + // mutable state around. + // + // All hosts that should not be filtered out must be included in the returned + // map of activations. I.e., filters implementing this interface should + // remove activations by omitting them from the returned map. + // + // Filters implementing this interface should adjust activation + // values in the returned map, including all hosts from the request. + // + // A traceLog is provided that contains the global request id and should + // be used to log the step's execution. + Run(traceLog *slog.Logger, request RequestType) (*FilterWeigherPipelineStepResult, error) +} + +// Common base for all steps that provides some functionality +// that would otherwise be duplicated across all steps. +type BaseFilterWeigherPipelineStep[RequestType FilterWeigherPipelineRequest, Opts FilterWeigherPipelineStepOpts] struct { + // Options to pass via yaml to this step. + conf.JsonOpts[Opts] + // The activation function to use. + ActivationFunction + // The kubernetes client to use. + Client client.Client +} + +// Init the step with the database and options. +func (s *BaseFilterWeigherPipelineStep[RequestType, Opts]) Init(ctx context.Context, client client.Client, params runtime.RawExtension) error { + opts := conf.NewRawOptsBytes(params.Raw) + if err := s.Load(opts); err != nil { + return err + } + if err := s.Options.Validate(); err != nil { + return err + } + + s.Client = client + return nil +} + +// Get a default result (no action) for the input weight keys given in the request. +// Use this to initialize the result before applying filtering/weighing logic. +func (s *BaseFilterWeigherPipelineStep[RequestType, Opts]) IncludeAllHostsFromRequest(request RequestType) *FilterWeigherPipelineStepResult { + activations := make(map[string]float64) + for _, subject := range request.GetSubjects() { + activations[subject] = s.NoEffect() + } + stats := make(map[string]FilterWeigherPipelineStepStatistics) + return &FilterWeigherPipelineStepResult{Activations: activations, Statistics: stats} +} + +// Get default statistics for the input weight keys given in the request. +func (s *BaseFilterWeigherPipelineStep[RequestType, Opts]) PrepareStats(request RequestType, unit string) FilterWeigherPipelineStepStatistics { + return FilterWeigherPipelineStepStatistics{ + Unit: unit, + Subjects: make(map[string]float64, len(request.GetSubjects())), + } +} diff --git a/internal/scheduling/lib/step_monitor.go b/internal/scheduling/lib/filter_weigher_pipeline_step_monitor.go similarity index 90% rename from internal/scheduling/lib/step_monitor.go rename to internal/scheduling/lib/filter_weigher_pipeline_step_monitor.go index ed6a79bdf..c46b4b28d 100644 --- a/internal/scheduling/lib/step_monitor.go +++ b/internal/scheduling/lib/filter_weigher_pipeline_step_monitor.go @@ -4,7 +4,6 @@ package lib import ( - "context" "fmt" "log/slog" "maps" @@ -14,13 +13,11 @@ import ( "strconv" "strings" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/prometheus/client_golang/prometheus" - "sigs.k8s.io/controller-runtime/pkg/client" ) // Wraps a scheduler step to monitor its execution. -type StepMonitor[RequestType PipelineRequest] struct { +type FilterWeigherPipelineStepMonitor[RequestType FilterWeigherPipelineRequest] struct { // Mixin that can be embedded in a step to provide some activation function tooling. ActivationFunction @@ -29,8 +26,6 @@ type StepMonitor[RequestType PipelineRequest] struct { // The name of this step. stepName string - // The wrapped scheduler step to monitor. - Step Step[RequestType] // A timer to measure how long the step takes to run. runTimer prometheus.Observer // A metric to monitor how much the step modifies the weights of the subjects. @@ -43,21 +38,8 @@ type StepMonitor[RequestType PipelineRequest] struct { stepImpactObserver *prometheus.HistogramVec } -// Initialize the wrapped step with the database and options. -func (s *StepMonitor[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { - return s.Step.Init(ctx, client, step) -} - // Schedule using the wrapped step and measure the time it takes. -func monitorStep[RequestType PipelineRequest]( - _ context.Context, - _ client.Client, - step v1alpha1.StepSpec, - impl Step[RequestType], - m PipelineMonitor, -) *StepMonitor[RequestType] { - - stepName := step.Impl +func monitorStep[RequestType FilterWeigherPipelineRequest](stepName string, m FilterWeigherPipelineMonitor) *FilterWeigherPipelineStepMonitor[RequestType] { var runTimer prometheus.Observer if m.stepRunTimer != nil { runTimer = m.stepRunTimer. @@ -68,11 +50,10 @@ func monitorStep[RequestType PipelineRequest]( removedSubjectsObserver = m.stepRemovedSubjectsObserver. WithLabelValues(m.PipelineName, stepName) } - return &StepMonitor[RequestType]{ - Step: impl, + return &FilterWeigherPipelineStepMonitor[RequestType]{ + runTimer: runTimer, stepName: stepName, pipelineName: m.PipelineName, - runTimer: runTimer, stepSubjectWeight: m.stepSubjectWeight, removedSubjectsObserver: removedSubjectsObserver, stepReorderingsObserver: m.stepReorderingsObserver, @@ -81,14 +62,19 @@ func monitorStep[RequestType PipelineRequest]( } // Run the step and observe its execution. -func (s *StepMonitor[RequestType]) Run(traceLog *slog.Logger, request RequestType) (*StepResult, error) { +func (s *FilterWeigherPipelineStepMonitor[RequestType]) RunWrapped( + traceLog *slog.Logger, + request RequestType, + step FilterWeigherPipelineStep[RequestType], +) (*FilterWeigherPipelineStepResult, error) { + if s.runTimer != nil { timer := prometheus.NewTimer(s.runTimer) defer timer.ObserveDuration() } inWeights := request.GetWeights() - stepResult, err := s.Step.Run(traceLog, request) + stepResult, err := step.Run(traceLog, request) if err != nil { return nil, err } diff --git a/internal/scheduling/lib/step_monitor_test.go b/internal/scheduling/lib/filter_weigher_pipeline_step_monitor_test.go similarity index 88% rename from internal/scheduling/lib/step_monitor_test.go rename to internal/scheduling/lib/filter_weigher_pipeline_step_monitor_test.go index c248ec576..c26385425 100644 --- a/internal/scheduling/lib/step_monitor_test.go +++ b/internal/scheduling/lib/filter_weigher_pipeline_step_monitor_test.go @@ -21,24 +21,24 @@ func (m *mockObserver) Observe(value float64) { func TestStepMonitorRun(t *testing.T) { runTimer := &mockObserver{} removedSubjectsObserver := &mockObserver{} - monitor := &StepMonitor[mockPipelineRequest]{ - stepName: "mock_step", - Step: &mockStep[mockPipelineRequest]{ - RunFunc: func(traceLog *slog.Logger, request mockPipelineRequest) (*StepResult, error) { - return &StepResult{ - Activations: map[string]float64{"subject1": 0.1, "subject2": 1.0, "subject3": 0.0}, - }, nil - }, - }, + monitor := &FilterWeigherPipelineStepMonitor[mockFilterWeigherPipelineRequest]{ + stepName: "mock_step", runTimer: runTimer, stepSubjectWeight: nil, removedSubjectsObserver: removedSubjectsObserver, } - request := mockPipelineRequest{ + step := &mockWeigher[mockFilterWeigherPipelineRequest]{ + RunFunc: func(traceLog *slog.Logger, request mockFilterWeigherPipelineRequest) (*FilterWeigherPipelineStepResult, error) { + return &FilterWeigherPipelineStepResult{ + Activations: map[string]float64{"subject1": 0.1, "subject2": 1.0, "subject3": 0.0}, + }, nil + }, + } + request := mockFilterWeigherPipelineRequest{ Subjects: []string{"subject1", "subject2", "subject3"}, Weights: map[string]float64{"subject1": 0.2, "subject2": 0.1, "subject3": 0.0}, } - if _, err := monitor.Run(slog.Default(), request); err != nil { + if _, err := monitor.RunWrapped(slog.Default(), request, step); err != nil { t.Fatalf("Run() error = %v, want nil", err) } if len(removedSubjectsObserver.Observations) != 1 { diff --git a/internal/scheduling/lib/filter_weigher_pipeline_step_opts.go b/internal/scheduling/lib/filter_weigher_pipeline_step_opts.go new file mode 100644 index 000000000..1a26fc472 --- /dev/null +++ b/internal/scheduling/lib/filter_weigher_pipeline_step_opts.go @@ -0,0 +1,15 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +// Interface to which step options must conform. +type FilterWeigherPipelineStepOpts interface { + // Validate the options for this step. + Validate() error +} + +// Empty step opts conforming to the StepOpts interface (validation always succeeds). +type EmptyFilterWeigherPipelineStepOpts struct{} + +func (EmptyFilterWeigherPipelineStepOpts) Validate() error { return nil } diff --git a/internal/scheduling/lib/filter_weigher_pipeline_step_opts_test.go b/internal/scheduling/lib/filter_weigher_pipeline_step_opts_test.go new file mode 100644 index 000000000..fe09706b6 --- /dev/null +++ b/internal/scheduling/lib/filter_weigher_pipeline_step_opts_test.go @@ -0,0 +1,24 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "testing" +) + +type MockOptions struct { + Option1 string `json:"option1"` + Option2 int `json:"option2"` +} + +func (o MockOptions) Validate() error { + return nil +} + +func TestEmptyFilterWeigherPipelineStepOpts_Validate(t *testing.T) { + opts := EmptyFilterWeigherPipelineStepOpts{} + if err := opts.Validate(); err != nil { + t.Errorf("expected no error, got %v", err) + } +} diff --git a/internal/scheduling/lib/result.go b/internal/scheduling/lib/filter_weigher_pipeline_step_result.go similarity index 82% rename from internal/scheduling/lib/result.go rename to internal/scheduling/lib/filter_weigher_pipeline_step_result.go index 0c6340858..6dc3cf8d9 100644 --- a/internal/scheduling/lib/result.go +++ b/internal/scheduling/lib/filter_weigher_pipeline_step_result.go @@ -3,7 +3,7 @@ package lib -type StepResult struct { +type FilterWeigherPipelineStepResult struct { // The activations calculated by this step. Activations map[string]float64 @@ -22,10 +22,10 @@ type StepResult struct { // // These statistics are used to display the step's effect on the hosts. // For example: max cpu contention: before [ 100%, 50%, 40% ], after [ 40%, 50%, 100% ] - Statistics map[string]StepStatistics + Statistics map[string]FilterWeigherPipelineStepStatistics } -type StepStatistics struct { +type FilterWeigherPipelineStepStatistics struct { // The unit of the statistic. Unit string // The subjects and their values. diff --git a/internal/scheduling/lib/filter_weigher_pipeline_step_test.go b/internal/scheduling/lib/filter_weigher_pipeline_step_test.go new file mode 100644 index 000000000..826e8b6f1 --- /dev/null +++ b/internal/scheduling/lib/filter_weigher_pipeline_step_test.go @@ -0,0 +1,189 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "errors" + "testing" + + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +// testStepOptions implements FilterWeigherPipelineStepOpts for testing. +type testStepOptions struct { + ValidateError error +} + +func (o testStepOptions) Validate() error { + return o.ValidateError +} + +func TestBaseFilterWeigherPipelineStep_Init(t *testing.T) { + tests := []struct { + name string + params runtime.RawExtension + expectError bool + }{ + { + name: "successful initialization with valid params", + params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + expectError: false, + }, + { + name: "successful initialization with empty params", + params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + expectError: false, + }, + { + name: "error on invalid JSON params", + params: runtime.RawExtension{ + Raw: []byte(`{invalid json}`), + }, + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + step := &BaseFilterWeigherPipelineStep[mockFilterWeigherPipelineRequest, testStepOptions]{} + cl := fake.NewClientBuilder().Build() + + err := step.Init(t.Context(), cl, tt.params) + + if tt.expectError && err == nil { + t.Error("expected error but got nil") + } + if !tt.expectError && err != nil { + t.Errorf("expected no error but got: %v", err) + } + if !tt.expectError && step.Client == nil { + t.Error("expected client to be set but it was nil") + } + }) + } +} + +func TestBaseFilterWeigherPipelineStep_Init_ValidationError(t *testing.T) { + // We need a custom type with a Validate method that returns an error + step := &BaseFilterWeigherPipelineStep[mockFilterWeigherPipelineRequest, failingValidationOptions]{} + cl := fake.NewClientBuilder().Build() + + err := step.Init(t.Context(), cl, runtime.RawExtension{Raw: []byte(`{}`)}) + if err == nil { + t.Error("expected error from validation but got nil") + } +} + +type failingValidationOptions struct{} + +func (o failingValidationOptions) Validate() error { + return errors.New("validation failed") +} + +func TestBaseFilterWeigherPipelineStep_IncludeAllHostsFromRequest(t *testing.T) { + tests := []struct { + name string + subjects []string + expectedCount int + }{ + { + name: "multiple subjects", + subjects: []string{"host1", "host2", "host3"}, + expectedCount: 3, + }, + { + name: "single subject", + subjects: []string{"host1"}, + expectedCount: 1, + }, + { + name: "empty subjects", + subjects: []string{}, + expectedCount: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + step := &BaseFilterWeigherPipelineStep[mockFilterWeigherPipelineRequest, testStepOptions]{ + ActivationFunction: ActivationFunction{}, + } + + request := mockFilterWeigherPipelineRequest{ + Subjects: tt.subjects, + } + + result := step.IncludeAllHostsFromRequest(request) + + if result == nil { + t.Fatal("expected result but got nil") + } + if len(result.Activations) != tt.expectedCount { + t.Errorf("expected %d activations, got %d", tt.expectedCount, len(result.Activations)) + } + for _, subject := range tt.subjects { + if _, ok := result.Activations[subject]; !ok { + t.Errorf("expected subject %s in activations", subject) + } + } + if result.Statistics == nil { + t.Error("expected statistics to be initialized") + } + }) + } +} + +func TestBaseFilterWeigherPipelineStep_PrepareStats(t *testing.T) { + tests := []struct { + name string + subjects []string + unit string + expectedUnit string + }{ + { + name: "with subjects and unit", + subjects: []string{"host1", "host2", "host3"}, + unit: "percentage", + expectedUnit: "percentage", + }, + { + name: "empty subjects", + subjects: []string{}, + unit: "count", + expectedUnit: "count", + }, + { + name: "empty unit", + subjects: []string{"host1"}, + unit: "", + expectedUnit: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + step := &BaseFilterWeigherPipelineStep[mockFilterWeigherPipelineRequest, testStepOptions]{} + + request := mockFilterWeigherPipelineRequest{ + Subjects: tt.subjects, + } + + stats := step.PrepareStats(request, tt.unit) + + if stats.Unit != tt.expectedUnit { + t.Errorf("expected unit %s, got %s", tt.expectedUnit, stats.Unit) + } + if stats.Subjects == nil { + t.Error("expected subjects map to be initialized") + } + // Maps don't have a cap() function, but we can verify the map is initialized + // and works correctly by checking it's not nil (already done above) + }) + } +} diff --git a/internal/scheduling/lib/filter_weigher_pipeline_test.go b/internal/scheduling/lib/filter_weigher_pipeline_test.go new file mode 100644 index 000000000..aa80cbc9d --- /dev/null +++ b/internal/scheduling/lib/filter_weigher_pipeline_test.go @@ -0,0 +1,385 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "log/slog" + "math" + "testing" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +// Mock pipeline type for testing +type mockPipeline struct { + name string +} + +func TestPipeline_Run(t *testing.T) { + // Create an instance of the pipeline with a mock step + pipeline := &filterWeigherPipeline[mockFilterWeigherPipelineRequest]{ + filters: map[string]Filter[mockFilterWeigherPipelineRequest]{ + "mock_filter": &mockFilter[mockFilterWeigherPipelineRequest]{ + RunFunc: func(traceLog *slog.Logger, request mockFilterWeigherPipelineRequest) (*FilterWeigherPipelineStepResult, error) { + // Filter out host3 + return &FilterWeigherPipelineStepResult{ + Activations: map[string]float64{ + "host1": 0.0, + "host2": 0.0, + }, + }, nil + }, + }, + }, + filtersOrder: []string{"mock_filter"}, + weighers: map[string]Weigher[mockFilterWeigherPipelineRequest]{ + "mock_weigher": &mockWeigher[mockFilterWeigherPipelineRequest]{ + RunFunc: func(traceLog *slog.Logger, request mockFilterWeigherPipelineRequest) (*FilterWeigherPipelineStepResult, error) { + // Assign weights to hosts + activations := map[string]float64{ + "host1": 0.5, + "host2": 1.0, + "host3": -0.5, + } + return &FilterWeigherPipelineStepResult{ + Activations: activations, + }, nil + }, + }, + }, + weighersOrder: []string{"mock_weigher"}, + } + + tests := []struct { + name string + request mockFilterWeigherPipelineRequest + expectedResult []string + }{ + { + name: "Single step pipeline", + request: mockFilterWeigherPipelineRequest{ + Subjects: []string{"host1", "host2", "host3"}, + Weights: map[string]float64{"host1": 0.0, "host2": 0.0, "host3": 0.0}, + }, + expectedResult: []string{"host2", "host1"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := pipeline.Run(tt.request) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if len(result.OrderedHosts) != len(tt.expectedResult) { + t.Fatalf("expected %d results, got %d", len(tt.expectedResult), len(result.OrderedHosts)) + } + for i, host := range tt.expectedResult { + if result.OrderedHosts[i] != host { + t.Errorf("expected host %s at position %d, got %s", host, i, result.OrderedHosts[i]) + } + } + }) + } +} + +func TestPipeline_NormalizeNovaWeights(t *testing.T) { + p := &filterWeigherPipeline[mockFilterWeigherPipelineRequest]{} + + tests := []struct { + name string + weights map[string]float64 + expected map[string]float64 + }{ + { + name: "Normalize weights", + weights: map[string]float64{ + "host1": 1000.0, + "host2": -1000.0, + "host3": 0.0, + }, + expected: map[string]float64{ + "host1": 1.0, + "host2": -1.0, + "host3": 0.0, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := p.normalizeInputWeights(tt.weights) + for host, weight := range tt.expected { + if result[host] != weight { + t.Errorf("expected weight %f for host %s, got %f", weight, host, result[host]) + } + } + }) + } +} + +func TestPipeline_ApplyStepWeights(t *testing.T) { + p := &filterWeigherPipeline[mockFilterWeigherPipelineRequest]{ + weighers: map[string]Weigher[mockFilterWeigherPipelineRequest]{}, + weighersOrder: []string{"step1", "step2"}, + } + + tests := []struct { + name string + stepWeights map[string]map[string]float64 + inWeights map[string]float64 + expectedResult map[string]float64 + }{ + { + name: "Apply step weights", + stepWeights: map[string]map[string]float64{ + "step1": {"host1": 0.5, "host2": 0.2}, + "step2": {"host1": 0.3, "host2": 0.4}, + }, + inWeights: map[string]float64{ + "host1": 1.0, + "host2": 1.0, + }, + expectedResult: map[string]float64{ + "host1": 1.0 + math.Tanh(0.5) + math.Tanh(0.3), + "host2": 1.0 + math.Tanh(0.2) + math.Tanh(0.4), + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := p.applyWeights(tt.stepWeights, tt.inWeights) + for host, weight := range tt.expectedResult { + if result[host] != weight { + t.Errorf("expected weight %f for host %s, got %f", weight, host, result[host]) + } + } + }) + } +} + +func TestPipeline_SortHostsByWeights(t *testing.T) { + p := &filterWeigherPipeline[mockFilterWeigherPipelineRequest]{} + + tests := []struct { + name string + weights map[string]float64 + expected []string + }{ + { + name: "Sort hosts by weights", + weights: map[string]float64{ + "host1": 0.5, + "host2": 1.0, + "host3": 0.2, + }, + expected: []string{"host2", "host1", "host3"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := p.sortSubjectsByWeights(tt.weights) + for i, host := range tt.expected { + if result[i] != host { + t.Errorf("expected host %s at position %d, got %s", host, i, result[i]) + } + } + }) + } +} + +func TestPipeline_RunFilters(t *testing.T) { + mockStep := &mockFilter[mockFilterWeigherPipelineRequest]{ + RunFunc: func(traceLog *slog.Logger, request mockFilterWeigherPipelineRequest) (*FilterWeigherPipelineStepResult, error) { + // Filter out host3 + return &FilterWeigherPipelineStepResult{ + Activations: map[string]float64{ + "host1": 0.0, + "host2": 0.0, + }, + }, nil + }, + } + p := &filterWeigherPipeline[mockFilterWeigherPipelineRequest]{ + filtersOrder: []string{ + "mock_filter", + }, + filters: map[string]Filter[mockFilterWeigherPipelineRequest]{ + "mock_filter": mockStep, + }, + } + + request := mockFilterWeigherPipelineRequest{ + Subjects: []string{"host1", "host2"}, + Weights: map[string]float64{"host1": 0.0, "host2": 0.0, "host3": 0.0}, + } + + req := p.runFilters(slog.Default(), request) + if len(req.Subjects) != 2 { + t.Fatalf("expected 2 step results, got %d", len(req.Subjects)) + } +} + +func TestInitNewFilterWeigherPipeline_Success(t *testing.T) { + scheme := runtime.NewScheme() + cl := fake.NewClientBuilder().WithScheme(scheme).Build() + + supportedFilters := map[string]func() Filter[mockFilterWeigherPipelineRequest]{ + "test-filter": func() Filter[mockFilterWeigherPipelineRequest] { + return &mockFilter[mockFilterWeigherPipelineRequest]{ + InitFunc: func(ctx context.Context, c client.Client, step v1alpha1.FilterSpec) error { + return nil + }, + } + }, + } + + supportedWeighers := map[string]func() Weigher[mockFilterWeigherPipelineRequest]{ + "test-weigher": func() Weigher[mockFilterWeigherPipelineRequest] { + return &mockWeigher[mockFilterWeigherPipelineRequest]{ + InitFunc: func(ctx context.Context, c client.Client, step v1alpha1.WeigherSpec) error { + return nil + }, + } + }, + } + + confedFilters := []v1alpha1.FilterSpec{ + { + Name: "test-filter", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }, + } + + confedWeighers := []v1alpha1.WeigherSpec{ + { + Name: "test-weigher", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }, + } + + monitor := FilterWeigherPipelineMonitor{ + PipelineName: "test-pipeline", + } + + result := InitNewFilterWeigherPipeline( + t.Context(), + cl, + "test-pipeline", + supportedFilters, + confedFilters, + supportedWeighers, + confedWeighers, + monitor, + ) + + if len(result.FilterErrors) != 0 { + t.Fatalf("expected no filter error, got %v", result.FilterErrors) + } + if len(result.WeigherErrors) != 0 { + t.Fatalf("expected no weigher error, got %v", result.WeigherErrors) + } + if result.Pipeline == nil { + t.Fatal("expected pipeline, got nil") + } +} + +func TestInitNewFilterWeigherPipeline_UnsupportedFilter(t *testing.T) { + scheme := runtime.NewScheme() + cl := fake.NewClientBuilder().WithScheme(scheme).Build() + + supportedFilters := map[string]func() Filter[mockFilterWeigherPipelineRequest]{} + supportedWeighers := map[string]func() Weigher[mockFilterWeigherPipelineRequest]{} + + confedFilters := []v1alpha1.FilterSpec{ + { + Name: "unsupported-filter", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }, + } + + monitor := FilterWeigherPipelineMonitor{ + PipelineName: "test-pipeline", + } + + result := InitNewFilterWeigherPipeline( + t.Context(), + cl, + "test-pipeline", + supportedFilters, + confedFilters, + supportedWeighers, + nil, + monitor, + ) + + if result.FilterErrors["unsupported-filter"] == nil { + t.Fatal("expected critical error for unsupported filter, got nil") + } +} + +func TestInitNewFilterWeigherPipeline_UnsupportedWeigher(t *testing.T) { + scheme := runtime.NewScheme() + cl := fake.NewClientBuilder().WithScheme(scheme).Build() + + supportedFilters := map[string]func() Filter[mockFilterWeigherPipelineRequest]{} + supportedWeighers := map[string]func() Weigher[mockFilterWeigherPipelineRequest]{} + + confedWeighers := []v1alpha1.WeigherSpec{ + { + Name: "unsupported-weigher", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }, + } + + monitor := FilterWeigherPipelineMonitor{ + PipelineName: "test-pipeline", + } + + result := InitNewFilterWeigherPipeline( + t.Context(), + cl, + "test-pipeline", + supportedFilters, + nil, + supportedWeighers, + confedWeighers, + monitor, + ) + + if result.WeigherErrors["unsupported-weigher"] == nil { + t.Fatal("expected error for unsupported weigher, got nil") + } + if len(result.FilterErrors) != 0 { + t.Fatalf("expected no filter error, got %v", result.FilterErrors) + } +} + +func TestFilterWeigherPipelineMonitor_SubPipeline(t *testing.T) { + monitor := NewPipelineMonitor() + + subPipeline := monitor.SubPipeline("test-sub-pipeline") + + if subPipeline.PipelineName != "test-sub-pipeline" { + t.Errorf("expected pipeline name 'test-sub-pipeline', got '%s'", subPipeline.PipelineName) + } + // Verify that the original monitor is not modified + if monitor.PipelineName == "test-sub-pipeline" { + t.Error("original monitor should not be modified") + } +} diff --git a/internal/scheduling/lib/pipeline.go b/internal/scheduling/lib/pipeline.go deleted file mode 100644 index 48b05b084..000000000 --- a/internal/scheduling/lib/pipeline.go +++ /dev/null @@ -1,212 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package lib - -import ( - "context" - "errors" - "log/slog" - "maps" - "math" - "slices" - "sort" - "sync" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -type Pipeline[RequestType PipelineRequest] interface { - // Run the scheduling pipeline with the given request. - Run(request RequestType) (v1alpha1.DecisionResult, error) -} - -// Pipeline of scheduler steps. -type pipeline[RequestType PipelineRequest] struct { - // The activation function to use when combining the - // results of the scheduler steps. - ActivationFunction - // The order in which scheduler steps are applied, by their step name. - order []string - // The steps by their name. - steps map[string]Step[RequestType] - // Monitor to observe the pipeline. - monitor PipelineMonitor -} - -type StepWrapper[RequestType PipelineRequest] func( - ctx context.Context, - client client.Client, - step v1alpha1.StepSpec, - impl Step[RequestType], -) (Step[RequestType], error) - -// Create a new pipeline with steps contained in the configuration. -func NewPipeline[RequestType PipelineRequest]( - ctx context.Context, - client client.Client, - name string, - supportedSteps map[string]func() Step[RequestType], - confedSteps []v1alpha1.StepSpec, - monitor PipelineMonitor, -) (Pipeline[RequestType], error) { - - // Load all steps from the configuration. - stepsByName := make(map[string]Step[RequestType], len(confedSteps)) - order := []string{} - - pipelineMonitor := monitor.SubPipeline(name) - - for _, stepConfig := range confedSteps { - slog.Info("scheduler: configuring step", "name", stepConfig.Impl) - slog.Info("supported:", "steps", maps.Keys(supportedSteps)) - makeStep, ok := supportedSteps[stepConfig.Impl] - if !ok { - return nil, errors.New("unsupported scheduler step impl: " + stepConfig.Impl) - } - step := makeStep() - if stepConfig.Type == v1alpha1.StepTypeWeigher && stepConfig.Weigher != nil { - step = validateStep(step, stepConfig.Weigher.DisabledValidations) - } - step = monitorStep(ctx, client, stepConfig, step, pipelineMonitor) - if err := step.Init(ctx, client, stepConfig); err != nil { - return nil, errors.New("failed to initialize pipeline step: " + err.Error()) - } - stepsByName[stepConfig.Impl] = step - order = append(order, stepConfig.Impl) - slog.Info( - "scheduler: added step", - "name", stepConfig.Impl, - ) - } - return &pipeline[RequestType]{ - // All steps can be run in parallel. - order: order, - steps: stepsByName, - monitor: pipelineMonitor, - }, nil -} - -// Execute the scheduler steps in groups of the execution order. -// The steps are run in parallel. -func (p *pipeline[RequestType]) runSteps(log *slog.Logger, request RequestType) map[string]map[string]float64 { - var lock sync.Mutex - activationsByStep := map[string]map[string]float64{} - var wg sync.WaitGroup - for _, stepName := range p.order { - step := p.steps[stepName] - wg.Go(func() { - stepLog := log.With("stepName", stepName) - stepLog.Info("scheduler: running step") - result, err := step.Run(stepLog, request) - if errors.Is(err, ErrStepSkipped) { - stepLog.Info("scheduler: step skipped") - return - } - if err != nil { - stepLog.Error("scheduler: failed to run step", "error", err) - return - } - stepLog.Info("scheduler: finished step") - lock.Lock() - defer lock.Unlock() - activationsByStep[stepName] = result.Activations - }) - } - wg.Wait() - return activationsByStep -} - -// Apply an initial weight to the subjects. -// -// Context: -// Openstack schedulers may give us very large (positive/negative) weights such as -// -99,000 or 99,000 (Nova). We want to respect these values, but still adjust them -// to a meaningful value. If the scheduler really doesn't want us to run on a subject, it -// should run a filter instead of setting a weight. -func (p *pipeline[RequestType]) normalizeInputWeights(weights map[string]float64) map[string]float64 { - normalizedWeights := make(map[string]float64, len(weights)) - for subjectname, weight := range weights { - normalizedWeights[subjectname] = math.Tanh(weight) - } - return normalizedWeights -} - -// Apply the step weights to the input weights. -func (p *pipeline[RequestType]) applyStepWeights( - stepWeights map[string]map[string]float64, - inWeights map[string]float64, -) map[string]float64 { - // Copy to avoid modifying the original weights. - outWeights := make(map[string]float64, len(inWeights)) - maps.Copy(outWeights, inWeights) - - // Apply all activations in the strict order defined by the configuration. - for _, stepName := range p.order { - stepActivations, ok := stepWeights[stepName] - if !ok { - // This is ok, since steps can be skipped. - continue - } - outWeights = p.Apply(outWeights, stepActivations) - } - return outWeights -} - -// Sort the subjects by their weights. -func (s *pipeline[RequestType]) sortSubjectsByWeights(weights map[string]float64) []string { - // Sort the subjects (keys) by their weights. - subjects := slices.Collect(maps.Keys(weights)) - sort.Slice(subjects, func(i, j int) bool { - return weights[subjects[i]] > weights[subjects[j]] - }) - return subjects -} - -// Evaluate the pipeline and return a list of subjects in order of preference. -func (p *pipeline[RequestType]) Run(request RequestType) (v1alpha1.DecisionResult, error) { - slogArgs := request.GetTraceLogArgs() - slogArgsAny := make([]any, 0, len(slogArgs)) - for _, arg := range slogArgs { - slogArgsAny = append(slogArgsAny, arg) - } - traceLog := slog.With(slogArgsAny...) - - subjectsIn := request.GetSubjects() - traceLog.Info("scheduler: starting pipeline", "subjects", subjectsIn) - - // Get weights from the scheduler steps, apply them to the input weights, and - // sort the subjects by their weights. The input weights are normalized before - // applying the step weights. - stepWeights := p.runSteps(traceLog, request) - traceLog.Info("scheduler: finished pipeline") - inWeights := p.normalizeInputWeights(request.GetWeights()) - traceLog.Info("scheduler: input weights", "weights", inWeights) - outWeights := p.applyStepWeights(stepWeights, inWeights) - traceLog.Info("scheduler: output weights", "weights", outWeights) - subjects := p.sortSubjectsByWeights(outWeights) - traceLog.Info("scheduler: sorted subjects", "subjects", subjects) - - // Collect some metrics about the pipeline execution. - go p.monitor.observePipelineResult(request, subjects) - - result := v1alpha1.DecisionResult{ - RawInWeights: request.GetWeights(), - NormalizedInWeights: inWeights, - AggregatedOutWeights: outWeights, - OrderedHosts: subjects, - } - if len(subjects) > 0 { - result.TargetHost = &subjects[0] - } - for _, stepName := range p.order { - if activations, ok := stepWeights[stepName]; ok { - result.StepResults = append(result.StepResults, v1alpha1.StepResult{ - StepName: stepName, - Activations: activations, - }) - } - } - return result, nil -} diff --git a/internal/scheduling/lib/pipeline_controller.go b/internal/scheduling/lib/pipeline_controller.go index 731d4d048..895e5e192 100644 --- a/internal/scheduling/lib/pipeline_controller.go +++ b/internal/scheduling/lib/pipeline_controller.go @@ -5,6 +5,7 @@ package lib import ( "context" + "errors" "fmt" "github.com/cobaltcore-dev/cortex/api/v1alpha1" @@ -17,23 +18,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" ) -// The base pipeline controller will delegate some methods to the parent -// controller struct. The parent controller only needs to conform to this -// interface and set the delegate field accordingly. -type PipelineInitializer[PipelineType any] interface { - // Initialize a new pipeline with the given steps. - // - // This method is delegated to the parent controller, when a pipeline needs - // to be newly initialized or re-initialized to update it in the pipeline - // map. - InitPipeline(ctx context.Context, p v1alpha1.Pipeline) (PipelineType, error) - // Get the accepted pipeline type for this controller. - // - // This is used to filter pipelines when listing existing pipelines on - // startup or when reacting to pipeline events. - PipelineType() v1alpha1.PipelineType -} - // Base controller for decision pipelines. type BasePipelineController[PipelineType any] struct { // Initialized pipelines by their name. @@ -88,37 +72,74 @@ func (c *BasePipelineController[PipelineType]) handlePipelineChange( log := ctrl.LoggerFrom(ctx) old := obj.DeepCopy() - // Check if all steps are ready. If not, check if the step is mandatory. - obj.Status.TotalSteps = len(obj.Spec.Steps) - obj.Status.ReadySteps = 0 - for _, step := range obj.Spec.Steps { - err := c.checkStepReady(ctx, &step) - if err == nil { - obj.Status.ReadySteps++ - continue + initResult := c.Initializer.InitPipeline(ctx, *obj) + + obj.Status.Filters = []v1alpha1.FilterStatus{} + for _, filter := range obj.Spec.Filters { + fs := v1alpha1.FilterStatus{Name: filter.Name} + if err, ok := initResult.FilterErrors[filter.Name]; ok { + meta.SetStatusCondition(&fs.Conditions, metav1.Condition{ + Type: v1alpha1.FilterConditionReady, + Status: metav1.ConditionFalse, + Reason: "FilterInitFailed", + Message: err.Error(), + }) + } else { + meta.SetStatusCondition(&fs.Conditions, metav1.Condition{ + Type: v1alpha1.FilterConditionReady, + Status: metav1.ConditionTrue, + Reason: "FilterReady", + Message: "filter is ready", + }) } - if step.Mandatory { - meta.SetStatusCondition(&obj.Status.Conditions, metav1.Condition{ - Type: v1alpha1.PipelineConditionReady, + obj.Status.Filters = append(obj.Status.Filters, fs) + } + + obj.Status.Weighers = []v1alpha1.WeigherStatus{} + for _, weigher := range obj.Spec.Weighers { + ws := v1alpha1.WeigherStatus{Name: weigher.Name} + if err, ok := initResult.WeigherErrors[weigher.Name]; ok { + meta.SetStatusCondition(&ws.Conditions, metav1.Condition{ + Type: v1alpha1.WeigherConditionReady, Status: metav1.ConditionFalse, - Reason: "MandatoryStepNotReady", - Message: fmt.Sprintf("mandatory step %s not ready: %s", step.Impl, err.Error()), + Reason: "WeigherInitFailed", + Message: err.Error(), + }) + } else { + meta.SetStatusCondition(&ws.Conditions, metav1.Condition{ + Type: v1alpha1.WeigherConditionReady, + Status: metav1.ConditionTrue, + Reason: "WeigherReady", + Message: "weigher is ready", }) - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, obj, patch); err != nil { - log.Error(err, "failed to patch pipeline status", "pipelineName", obj.Name) - } - delete(c.Pipelines, obj.Name) - delete(c.PipelineConfigs, obj.Name) - return } + obj.Status.Weighers = append(obj.Status.Weighers, ws) } - obj.Status.StepsReadyFrac = fmt.Sprintf("%d/%d", obj.Status.ReadySteps, obj.Status.TotalSteps) - var err error - c.Pipelines[obj.Name], err = c.Initializer.InitPipeline(ctx, *obj) - c.PipelineConfigs[obj.Name] = *obj - if err != nil { + obj.Status.Detectors = []v1alpha1.DetectorStatus{} + for _, detector := range obj.Spec.Detectors { + ds := v1alpha1.DetectorStatus{Name: detector.Name} + if err, ok := initResult.DetectorErrors[detector.Name]; ok { + meta.SetStatusCondition(&ds.Conditions, metav1.Condition{ + Type: v1alpha1.DetectorConditionReady, + Status: metav1.ConditionFalse, + Reason: "DetectorInitFailed", + Message: err.Error(), + }) + } else { + meta.SetStatusCondition(&ds.Conditions, metav1.Condition{ + Type: v1alpha1.DetectorConditionReady, + Status: metav1.ConditionTrue, + Reason: "DetectorReady", + Message: "detector is ready", + }) + } + obj.Status.Detectors = append(obj.Status.Detectors, ds) + } + + // If there was a critical error, the pipeline cannot be used. + if len(initResult.FilterErrors) > 0 { + err := errors.New("one or more filters failed to initialize") log.Error(err, "failed to create pipeline", "pipelineName", obj.Name) meta.SetStatusCondition(&obj.Status.Conditions, metav1.Condition{ Type: v1alpha1.PipelineConditionReady, @@ -126,6 +147,12 @@ func (c *BasePipelineController[PipelineType]) handlePipelineChange( Reason: "PipelineInitFailed", Message: err.Error(), }) + meta.SetStatusCondition(&obj.Status.Conditions, metav1.Condition{ + Type: v1alpha1.PipelineConditionAllStepsReady, + Status: metav1.ConditionFalse, + Reason: "PipelineInitFailed", + Message: err.Error(), + }) patch := client.MergeFrom(old) if err := c.Status().Patch(ctx, obj, patch); err != nil { log.Error(err, "failed to patch pipeline status", "pipelineName", obj.Name) @@ -134,6 +161,29 @@ func (c *BasePipelineController[PipelineType]) handlePipelineChange( delete(c.PipelineConfigs, obj.Name) return } + + // If there was a non-critical error, continue running the pipeline but + // report the error in the pipeline status. + if len(initResult.WeigherErrors) > 0 || len(initResult.DetectorErrors) > 0 { + err := errors.New("one or more weighers or detectors failed to initialize") + log.Error(err, "non-critical error during pipeline initialization", "pipelineName", obj.Name) + meta.SetStatusCondition(&obj.Status.Conditions, metav1.Condition{ + Type: v1alpha1.PipelineConditionAllStepsReady, + Status: metav1.ConditionFalse, + Reason: "SomeStepsNotReady", + Message: err.Error(), + }) + } else { + meta.SetStatusCondition(&obj.Status.Conditions, metav1.Condition{ + Type: v1alpha1.PipelineConditionAllStepsReady, + Status: metav1.ConditionTrue, + Reason: "AllStepsReady", + Message: "all pipeline steps are ready", + }) + } + + c.Pipelines[obj.Name] = initResult.Pipeline + c.PipelineConfigs[obj.Name] = *obj log.Info("pipeline created and ready", "pipelineName", obj.Name) meta.SetStatusCondition(&obj.Status.Conditions, metav1.Condition{ Type: v1alpha1.PipelineConditionReady, @@ -190,46 +240,7 @@ func (c *BasePipelineController[PipelineType]) HandlePipelineDeleted( delete(c.PipelineConfigs, pipelineConf.Name) } -// Check if a step is ready, and if not, return an error indicating why not. -func (c *BasePipelineController[PipelineType]) checkStepReady( - ctx context.Context, - obj *v1alpha1.StepSpec, -) error { - - log := ctrl.LoggerFrom(ctx) - // Check the status of all knowledges depending on this step. - readyKnowledges := 0 - totalKnowledges := len(obj.Knowledges) - for _, knowledgeRef := range obj.Knowledges { - knowledge := &v1alpha1.Knowledge{} - if err := c.Get(ctx, client.ObjectKey{ - Name: knowledgeRef.Name, - Namespace: knowledgeRef.Namespace, - }, knowledge); err != nil { - log.Error(err, "failed to get knowledge depending on step", "knowledgeName", knowledgeRef.Name) - continue - } - // Check if the knowledge status conditions indicate an error. - if meta.IsStatusConditionFalse(knowledge.Status.Conditions, v1alpha1.KnowledgeConditionReady) { - log.Info("knowledge not ready due to error condition", "knowledgeName", knowledgeRef.Name) - continue - } - if knowledge.Status.RawLength == 0 { - log.Info("knowledge not ready, no data available", "knowledgeName", knowledgeRef.Name) - continue - } - readyKnowledges++ - } - if readyKnowledges != totalKnowledges { - return fmt.Errorf( - "%d/%d knowledges ready", - readyKnowledges, totalKnowledges, - ) - } - return nil -} - -// Handle a knowledge creation, update, or delete event from watching knowledge resources. +// Handle a knowledge creation, readiness update, or delete event from watching knowledge resources. func (c *BasePipelineController[PipelineType]) handleKnowledgeChange( ctx context.Context, obj *v1alpha1.Knowledge, @@ -240,27 +251,23 @@ func (c *BasePipelineController[PipelineType]) handleKnowledgeChange( return } log := ctrl.LoggerFrom(ctx) - log.Info("knowledge changed, re-evaluating dependent pipelines", "knowledgeName", obj.Name) + log.Info("knowledge changed, re-evaluating all pipelines", "knowledgeName", obj.Name) // Find all pipelines depending on this knowledge and re-evaluate them. var pipelines v1alpha1.PipelineList if err := c.List(ctx, &pipelines); err != nil { - log.Error(err, "failed to list pipelines for knowledge", "knowledgeName", obj.Name) + log.Error(err, "failed to list pipelines for knowledge change", "knowledgeName", obj.Name) return } for _, pipeline := range pipelines.Items { - needsUpdate := false - for _, step := range pipeline.Spec.Steps { - for _, knowledgeRef := range step.Knowledges { - if knowledgeRef.Name == obj.Name && knowledgeRef.Namespace == obj.Namespace { - needsUpdate = true - break - } - } + // TODO: Not all pipelines may depend on this knowledge. At the moment + // we re-evaluate all pipelines matching this controller. + if pipeline.Spec.SchedulingDomain != c.SchedulingDomain { + continue } - if needsUpdate { - log.Info("re-evaluating pipeline due to knowledge change", "pipelineName", pipeline.Name) - c.handlePipelineChange(ctx, &pipeline, queue) + if pipeline.Spec.Type != c.Initializer.PipelineType() { + continue } + c.handlePipelineChange(ctx, &pipeline, queue) } } diff --git a/internal/scheduling/lib/pipeline_controller_test.go b/internal/scheduling/lib/pipeline_controller_test.go index 9fb7b9b90..9166288fe 100644 --- a/internal/scheduling/lib/pipeline_controller_test.go +++ b/internal/scheduling/lib/pipeline_controller_test.go @@ -5,9 +5,9 @@ package lib import ( "context" + "errors" "testing" - corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -18,28 +18,6 @@ import ( "github.com/cobaltcore-dev/cortex/api/v1alpha1" ) -// Mock pipeline type for testing -type mockPipeline struct { - name string -} - -// Mock PipelineInitializer for testing -type mockPipelineInitializer struct { - pipelineType v1alpha1.PipelineType - initPipelineFunc func(ctx context.Context, p v1alpha1.Pipeline) (mockPipeline, error) -} - -func (m *mockPipelineInitializer) InitPipeline(ctx context.Context, p v1alpha1.Pipeline) (mockPipeline, error) { - if m.initPipelineFunc != nil { - return m.initPipelineFunc(ctx, p) - } - return mockPipeline{name: p.Name}, nil -} - -func (m *mockPipelineInitializer) PipelineType() v1alpha1.PipelineType { - return m.pipelineType -} - func TestBasePipelineController_InitAllPipelines(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { @@ -72,7 +50,8 @@ func TestBasePipelineController_InitAllPipelines(t *testing.T) { Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, }, @@ -91,7 +70,8 @@ func TestBasePipelineController_InitAllPipelines(t *testing.T) { Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, { @@ -101,7 +81,8 @@ func TestBasePipelineController_InitAllPipelines(t *testing.T) { Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainCinder, Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, { @@ -110,8 +91,9 @@ func TestBasePipelineController_InitAllPipelines(t *testing.T) { }, Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeDescheduler, - Steps: []v1alpha1.StepSpec{}, + Type: v1alpha1.PipelineTypeDetector, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, { @@ -121,7 +103,8 @@ func TestBasePipelineController_InitAllPipelines(t *testing.T) { Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, }, @@ -197,14 +180,14 @@ func TestBasePipelineController_handlePipelineChange(t *testing.T) { Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{ + Filters: []v1alpha1.FilterSpec{ { - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Mandatory: true, - Knowledges: []corev1.ObjectReference{ - {Name: "knowledge-1", Namespace: "default"}, - }, + Name: "test-filter", + }, + }, + Weighers: []v1alpha1.WeigherSpec{ + { + Name: "test-weigher", }, }, }, @@ -227,58 +210,6 @@ func TestBasePipelineController_handlePipelineChange(t *testing.T) { expectReady: true, expectInMap: true, }, - { - name: "pipeline with mandatory step not ready", - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline-not-ready", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{ - { - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Mandatory: true, - Knowledges: []corev1.ObjectReference{ - {Name: "missing-knowledge", Namespace: "default"}, - }, - }, - }, - }, - }, - knowledges: []v1alpha1.Knowledge{}, - schedulingDomain: v1alpha1.SchedulingDomainNova, - expectReady: false, - expectInMap: false, - }, - { - name: "pipeline with optional step not ready", - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline-optional", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{ - { - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Mandatory: false, - Knowledges: []corev1.ObjectReference{ - {Name: "missing-knowledge", Namespace: "default"}, - }, - }, - }, - }, - }, - knowledges: []v1alpha1.Knowledge{}, - schedulingDomain: v1alpha1.SchedulingDomainNova, - expectReady: true, - expectInMap: true, - }, { name: "pipeline init fails", pipeline: &v1alpha1.Pipeline{ @@ -288,7 +219,7 @@ func TestBasePipelineController_handlePipelineChange(t *testing.T) { Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, knowledges: []v1alpha1.Knowledge{}, @@ -306,7 +237,7 @@ func TestBasePipelineController_handlePipelineChange(t *testing.T) { Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainCinder, Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, knowledges: []v1alpha1.Knowledge{}, @@ -314,51 +245,6 @@ func TestBasePipelineController_handlePipelineChange(t *testing.T) { expectReady: false, expectInMap: false, }, - { - name: "pipeline with knowledge in error state", - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline-knowledge-error", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{ - { - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Mandatory: true, - Knowledges: []corev1.ObjectReference{ - {Name: "error-knowledge", Namespace: "default"}, - }, - }, - }, - }, - }, - knowledges: []v1alpha1.Knowledge{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "error-knowledge", - Namespace: "default", - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 10, - Conditions: []metav1.Condition{ - { - Type: v1alpha1.KnowledgeConditionReady, - Status: metav1.ConditionFalse, - }, - }, - }, - }, - }, - schedulingDomain: v1alpha1.SchedulingDomainNova, - expectReady: false, - expectInMap: false, - }, } for _, tt := range tests { @@ -379,8 +265,12 @@ func TestBasePipelineController_handlePipelineChange(t *testing.T) { } if tt.initPipelineError { - initializer.initPipelineFunc = func(ctx context.Context, p v1alpha1.Pipeline) (mockPipeline, error) { - return mockPipeline{}, context.Canceled + initializer.initPipelineFunc = func(ctx context.Context, p v1alpha1.Pipeline) PipelineInitResult[mockPipeline] { + return PipelineInitResult[mockPipeline]{ + FilterErrors: map[string]error{ + "test-filter": errors.New("failed to init filter"), + }, + } } } @@ -429,7 +319,8 @@ func TestBasePipelineController_HandlePipelineCreated(t *testing.T) { Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, } @@ -473,7 +364,8 @@ func TestBasePipelineController_HandlePipelineUpdated(t *testing.T) { Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, } @@ -547,205 +439,6 @@ func TestBasePipelineController_HandlePipelineDeleted(t *testing.T) { } } -func TestBasePipelineController_checkStepReady(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - tests := []struct { - name string - step v1alpha1.StepSpec - knowledges []v1alpha1.Knowledge - expectError bool - }{ - { - name: "step with no knowledge dependencies", - step: v1alpha1.StepSpec{ - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Knowledges: []corev1.ObjectReference{}, - }, - knowledges: []v1alpha1.Knowledge{}, - expectError: false, - }, - { - name: "step with ready knowledge", - step: v1alpha1.StepSpec{ - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Knowledges: []corev1.ObjectReference{ - {Name: "ready-knowledge", Namespace: "default"}, - }, - }, - knowledges: []v1alpha1.Knowledge{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "ready-knowledge", - Namespace: "default", - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 10, - }, - }, - }, - expectError: false, - }, - { - name: "step with knowledge in error state", - step: v1alpha1.StepSpec{ - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Knowledges: []corev1.ObjectReference{ - {Name: "error-knowledge", Namespace: "default"}, - }, - }, - knowledges: []v1alpha1.Knowledge{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "error-knowledge", - Namespace: "default", - }, - Status: v1alpha1.KnowledgeStatus{ - Conditions: []metav1.Condition{ - { - Type: v1alpha1.KnowledgeConditionReady, - Status: metav1.ConditionFalse, - }, - }, - }, - }, - }, - expectError: true, - }, - { - name: "step with knowledge with no data", - step: v1alpha1.StepSpec{ - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Knowledges: []corev1.ObjectReference{ - {Name: "no-data-knowledge", Namespace: "default"}, - }, - }, - knowledges: []v1alpha1.Knowledge{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "no-data-knowledge", - Namespace: "default", - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 0, - }, - }, - }, - expectError: true, - }, - { - name: "step with missing knowledge", - step: v1alpha1.StepSpec{ - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Knowledges: []corev1.ObjectReference{ - {Name: "missing-knowledge", Namespace: "default"}, - }, - }, - knowledges: []v1alpha1.Knowledge{}, - expectError: true, - }, - { - name: "step with multiple knowledges, all ready", - step: v1alpha1.StepSpec{ - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Knowledges: []corev1.ObjectReference{ - {Name: "knowledge-1", Namespace: "default"}, - {Name: "knowledge-2", Namespace: "default"}, - }, - }, - knowledges: []v1alpha1.Knowledge{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "knowledge-1", - Namespace: "default", - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 10, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "knowledge-2", - Namespace: "default", - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 5, - }, - }, - }, - expectError: false, - }, - { - name: "step with multiple knowledges, some not ready", - step: v1alpha1.StepSpec{ - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Knowledges: []corev1.ObjectReference{ - {Name: "ready-knowledge", Namespace: "default"}, - {Name: "not-ready-knowledge", Namespace: "default"}, - }, - }, - knowledges: []v1alpha1.Knowledge{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "ready-knowledge", - Namespace: "default", - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 10, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "not-ready-knowledge", - Namespace: "default", - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 0, - }, - }, - }, - expectError: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := make([]client.Object, len(tt.knowledges)) - for i := range tt.knowledges { - objects[i] = &tt.knowledges[i] - } - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(objects...). - Build() - - controller := &BasePipelineController[mockPipeline]{ - Client: fakeClient, - } - - err := controller.checkStepReady(context.Background(), &tt.step) - - if tt.expectError && err == nil { - t.Error("Expected error but got none") - } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) - } - }) - } -} - func TestBasePipelineController_handleKnowledgeChange(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { @@ -760,7 +453,7 @@ func TestBasePipelineController_handleKnowledgeChange(t *testing.T) { expectReEvaluated []string }{ { - name: "knowledge change triggers dependent pipeline re-evaluation", + name: "knowledge change triggers pipeline re-evaluation", knowledge: &v1alpha1.Knowledge{ ObjectMeta: metav1.ObjectMeta{ Name: "test-knowledge", @@ -776,43 +469,35 @@ func TestBasePipelineController_handleKnowledgeChange(t *testing.T) { pipelines: []v1alpha1.Pipeline{ { ObjectMeta: metav1.ObjectMeta{ - Name: "dependent-pipeline", + Name: "pipeline-1", }, Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{ + Weighers: []v1alpha1.WeigherSpec{ { - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Knowledges: []corev1.ObjectReference{ - {Name: "test-knowledge", Namespace: "default"}, - }, + Name: "test-weigher", }, }, }, }, { ObjectMeta: metav1.ObjectMeta{ - Name: "independent-pipeline", + Name: "pipeline-2", }, Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{ + Weighers: []v1alpha1.WeigherSpec{ { - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Knowledges: []corev1.ObjectReference{ - {Name: "other-knowledge", Namespace: "default"}, - }, + Name: "test-weigher", }, }, }, }, }, schedulingDomain: v1alpha1.SchedulingDomainNova, - expectReEvaluated: []string{"dependent-pipeline"}, + expectReEvaluated: []string{"pipeline-1", "pipeline-2"}, }, { name: "knowledge change in different scheduling domain", @@ -833,13 +518,9 @@ func TestBasePipelineController_handleKnowledgeChange(t *testing.T) { Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{ + Weighers: []v1alpha1.WeigherSpec{ { - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Knowledges: []corev1.ObjectReference{ - {Name: "test-knowledge", Namespace: "default"}, - }, + Name: "test-weigher", }, }, }, @@ -911,13 +592,9 @@ func TestBasePipelineController_HandleKnowledgeCreated(t *testing.T) { Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{ + Weighers: []v1alpha1.WeigherSpec{ { - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Knowledges: []corev1.ObjectReference{ - {Name: "test-knowledge", Namespace: "default"}, - }, + Name: "test-weigher", }, }, }, @@ -1063,13 +740,9 @@ func TestBasePipelineController_HandleKnowledgeUpdated(t *testing.T) { Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{ + Weighers: []v1alpha1.WeigherSpec{ { - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Knowledges: []corev1.ObjectReference{ - {Name: "test-knowledge", Namespace: "default"}, - }, + Name: "test-weigher", }, }, }, @@ -1132,14 +805,9 @@ func TestBasePipelineController_HandleKnowledgeDeleted(t *testing.T) { Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, Type: v1alpha1.PipelineTypeFilterWeigher, - Steps: []v1alpha1.StepSpec{ + Weighers: []v1alpha1.WeigherSpec{ { - Type: v1alpha1.StepTypeFilter, - Impl: "test-filter", - Mandatory: true, - Knowledges: []corev1.ObjectReference{ - {Name: "test-knowledge", Namespace: "default"}, - }, + Name: "test-weigher", }, }, }, @@ -1169,10 +837,8 @@ func TestBasePipelineController_HandleKnowledgeDeleted(t *testing.T) { controller.HandleKnowledgeDeleted(context.Background(), evt, nil) - // When knowledge is deleted, the pipeline is re-evaluated. - // Since the knowledge is now missing and the step is mandatory, - // the pipeline should be removed from the map. - if _, exists := controller.Pipelines[pipeline.Name]; exists { - t.Error("Expected pipeline to be removed after knowledge deletion due to mandatory step") + // Check that the pipeline was re-evaluated and is still in the map + if _, exists := controller.Pipelines[pipeline.Name]; !exists { + t.Error("Expected pipeline to be re-evaluated after knowledge deletion") } } diff --git a/internal/scheduling/lib/pipeline_initializer.go b/internal/scheduling/lib/pipeline_initializer.go new file mode 100644 index 000000000..b51d72e4e --- /dev/null +++ b/internal/scheduling/lib/pipeline_initializer.go @@ -0,0 +1,41 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" +) + +// Result returned by the InitPipeline interface method. +type PipelineInitResult[PipelineType any] struct { + // The pipeline, if successfully created. + Pipeline PipelineType + + // Errors for filters, if any, by their name. + FilterErrors map[string]error + // Errors for weighers, if any, by their name. + WeigherErrors map[string]error + // Errors for detectors, if any, by their name. + DetectorErrors map[string]error +} + +// The base pipeline controller will delegate some methods to the parent +// controller struct. The parent controller only needs to conform to this +// interface and set the delegate field accordingly. +type PipelineInitializer[PipelineType any] interface { + // Initialize a new pipeline with the given steps. + // + // This method is delegated to the parent controller, when a pipeline needs + // to be newly initialized or re-initialized to update it in the pipeline + // map. + InitPipeline(ctx context.Context, p v1alpha1.Pipeline) PipelineInitResult[PipelineType] + + // Get the accepted pipeline type for this controller. + // + // This is used to filter pipelines when listing existing pipelines on + // startup or when reacting to pipeline events. + PipelineType() v1alpha1.PipelineType +} diff --git a/internal/scheduling/lib/pipeline_initializer_test.go b/internal/scheduling/lib/pipeline_initializer_test.go new file mode 100644 index 000000000..f7764a245 --- /dev/null +++ b/internal/scheduling/lib/pipeline_initializer_test.go @@ -0,0 +1,30 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" +) + +// Mock PipelineInitializer for testing +type mockPipelineInitializer struct { + pipelineType v1alpha1.PipelineType + initPipelineFunc func(ctx context.Context, p v1alpha1.Pipeline) PipelineInitResult[mockPipeline] +} + +func (m *mockPipelineInitializer) InitPipeline( + ctx context.Context, p v1alpha1.Pipeline, +) PipelineInitResult[mockPipeline] { + + if m.initPipelineFunc != nil { + return m.initPipelineFunc(ctx, p) + } + return PipelineInitResult[mockPipeline]{Pipeline: mockPipeline{name: p.Name}} +} + +func (m *mockPipelineInitializer) PipelineType() v1alpha1.PipelineType { + return m.pipelineType +} diff --git a/internal/scheduling/lib/pipeline_monitor_test.go b/internal/scheduling/lib/pipeline_monitor_test.go deleted file mode 100644 index a0c009293..000000000 --- a/internal/scheduling/lib/pipeline_monitor_test.go +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package lib - -import ( - "strings" - "testing" - - "github.com/cobaltcore-dev/cortex/pkg/monitoring" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/testutil" -) - -func TestSchedulerMonitor(t *testing.T) { - registry := &monitoring.Registry{RegistererGatherer: prometheus.NewRegistry()} - monitor := NewPipelineMonitor().SubPipeline("test") - registry.MustRegister(&monitor) - - // Test stepRunTimer - expectedStepRunTimer := strings.NewReader(` - # HELP cortex_scheduler_pipeline_step_run_duration_seconds Duration of scheduler pipeline step run - # TYPE cortex_scheduler_pipeline_step_run_duration_seconds histogram - cortex_scheduler_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="0.005"} 1 - cortex_scheduler_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="0.01"} 1 - cortex_scheduler_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="0.025"} 1 - cortex_scheduler_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="0.05"} 1 - cortex_scheduler_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="0.1"} 1 - cortex_scheduler_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="0.25"} 1 - cortex_scheduler_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="0.5"} 1 - cortex_scheduler_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="1"} 1 - cortex_scheduler_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="2.5"} 1 - cortex_scheduler_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="5"} 1 - cortex_scheduler_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="10"} 1 - cortex_scheduler_pipeline_step_run_duration_seconds_bucket{pipeline="test",step="test_step",le="+Inf"} 1 - cortex_scheduler_pipeline_step_run_duration_seconds_sum{pipeline="test",step="test_step"} 0 - cortex_scheduler_pipeline_step_run_duration_seconds_count{pipeline="test",step="test_step"} 1 - `) - monitor.stepRunTimer.WithLabelValues("test", "test_step").Observe(0) - err := testutil.GatherAndCompare(registry, expectedStepRunTimer, "cortex_scheduler_pipeline_step_run_duration_seconds") - if err != nil { - t.Fatalf("stepRunTimer test failed: %v", err) - } - - // Test stepSubjectWeight - expectedStepSubjectWeight := strings.NewReader(` - # HELP cortex_scheduler_pipeline_step_weight_modification Modification of subject weight by scheduler pipeline step - # TYPE cortex_scheduler_pipeline_step_weight_modification gauge - cortex_scheduler_pipeline_step_weight_modification{pipeline="test",step="test_step",subject="test_subject"} 42 - `) - monitor.stepSubjectWeight.WithLabelValues("test", "test_subject", "test_step").Set(42) - err = testutil.GatherAndCompare(registry, expectedStepSubjectWeight, "cortex_scheduler_pipeline_step_weight_modification") - if err != nil { - t.Fatalf("stepSubjectWeight test failed: %v", err) - } - - // Test stepRemovedSubjectsObserver - expectedRemovedSubjectsObserver := strings.NewReader(` - # HELP cortex_scheduler_pipeline_step_removed_subjects Number of subjects removed by scheduler pipeline step - # TYPE cortex_scheduler_pipeline_step_removed_subjects histogram - cortex_scheduler_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="1"} 1 - cortex_scheduler_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="2.154434690031884"} 1 - cortex_scheduler_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="4.641588833612779"} 1 - cortex_scheduler_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="10.000000000000002"} 1 - cortex_scheduler_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="21.544346900318843"} 1 - cortex_scheduler_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="46.4158883361278"} 1 - cortex_scheduler_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="100.00000000000003"} 1 - cortex_scheduler_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="215.44346900318845"} 1 - cortex_scheduler_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="464.15888336127813"} 1 - cortex_scheduler_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="1000.0000000000006"} 1 - cortex_scheduler_pipeline_step_removed_subjects_bucket{pipeline="test",step="test_step",le="+Inf"} 1 - cortex_scheduler_pipeline_step_removed_subjects_sum{pipeline="test",step="test_step"} 1 - cortex_scheduler_pipeline_step_removed_subjects_count{pipeline="test",step="test_step"} 1 - `) - monitor.stepRemovedSubjectsObserver.WithLabelValues("test", "test_step").Observe(1) - err = testutil.GatherAndCompare(registry, expectedRemovedSubjectsObserver, "cortex_scheduler_pipeline_step_removed_subjects") - if err != nil { - t.Fatalf("stepRemovedSubjectsObserver test failed: %v", err) - } - - // Test pipelineRunTimer - expectedPipelineRunTimer := strings.NewReader(` - # HELP cortex_scheduler_pipeline_run_duration_seconds Duration of scheduler pipeline run - # TYPE cortex_scheduler_pipeline_run_duration_seconds histogram - cortex_scheduler_pipeline_run_duration_seconds_bucket{pipeline="test",le="0.005"} 1 - cortex_scheduler_pipeline_run_duration_seconds_bucket{pipeline="test",le="0.01"} 1 - cortex_scheduler_pipeline_run_duration_seconds_bucket{pipeline="test",le="0.025"} 1 - cortex_scheduler_pipeline_run_duration_seconds_bucket{pipeline="test",le="0.05"} 1 - cortex_scheduler_pipeline_run_duration_seconds_bucket{pipeline="test",le="0.1"} 1 - cortex_scheduler_pipeline_run_duration_seconds_bucket{pipeline="test",le="0.25"} 1 - cortex_scheduler_pipeline_run_duration_seconds_bucket{pipeline="test",le="0.5"} 1 - cortex_scheduler_pipeline_run_duration_seconds_bucket{pipeline="test",le="1"} 1 - cortex_scheduler_pipeline_run_duration_seconds_bucket{pipeline="test",le="2.5"} 1 - cortex_scheduler_pipeline_run_duration_seconds_bucket{pipeline="test",le="5"} 1 - cortex_scheduler_pipeline_run_duration_seconds_bucket{pipeline="test",le="10"} 1 - cortex_scheduler_pipeline_run_duration_seconds_bucket{pipeline="test",le="+Inf"} 1 - cortex_scheduler_pipeline_run_duration_seconds_sum{pipeline="test"} 0 - cortex_scheduler_pipeline_run_duration_seconds_count{pipeline="test"} 1 - `) - monitor.pipelineRunTimer.WithLabelValues("test").Observe(0) - err = testutil.GatherAndCompare(registry, expectedPipelineRunTimer, "cortex_scheduler_pipeline_run_duration_seconds") - if err != nil { - t.Fatalf("pipelineRunTimer test failed: %v", err) - } - - // Test requestCounter - expectedRequestCounter := strings.NewReader(` - # HELP cortex_scheduler_pipeline_requests_total Total number of requests processed by the scheduler. - # TYPE cortex_scheduler_pipeline_requests_total counter - cortex_scheduler_pipeline_requests_total{pipeline="test"} 3 - `) - monitor.requestCounter.WithLabelValues("test").Add(3) - err = testutil.GatherAndCompare(registry, expectedRequestCounter, "cortex_scheduler_pipeline_requests_total") - if err != nil { - t.Fatalf("requestCounter test failed: %v", err) - } -} diff --git a/internal/scheduling/lib/pipeline_test.go b/internal/scheduling/lib/pipeline_test.go deleted file mode 100644 index 9a8651384..000000000 --- a/internal/scheduling/lib/pipeline_test.go +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package lib - -import ( - "context" - "log/slog" - "math" - "testing" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -type mockPipelineStep struct { - err error - name string -} - -func (m *mockPipelineStep) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { - return nil -} - -func (m *mockPipelineStep) Run(traceLog *slog.Logger, request mockPipelineRequest) (*StepResult, error) { - if m.err != nil { - return nil, m.err - } - return &StepResult{ - Activations: map[string]float64{"host1": 0.0, "host2": 1.0}, - }, nil -} - -func TestPipeline_Run(t *testing.T) { - // Create an instance of the pipeline with a mock step - pipeline := &pipeline[mockPipelineRequest]{ - steps: map[string]Step[mockPipelineRequest]{ - "mock_pipeline_step": &mockPipelineStep{ - name: "mock_pipeline_step", - }, - }, - order: []string{ - "mock_pipeline_step", - }, - } - - tests := []struct { - name string - request mockPipelineRequest - expectedResult []string - }{ - { - name: "Single step pipeline", - request: mockPipelineRequest{ - Subjects: []string{"host1", "host2", "host3"}, - Weights: map[string]float64{"host1": 0.0, "host2": 0.0, "host3": 0.0}, - }, - expectedResult: []string{"host2", "host1"}, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result, err := pipeline.Run(tt.request) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - if len(result.OrderedHosts) != len(tt.expectedResult) { - t.Fatalf("expected %d results, got %d", len(tt.expectedResult), len(result.OrderedHosts)) - } - for i, host := range tt.expectedResult { - if result.OrderedHosts[i] != host { - t.Errorf("expected host %s at position %d, got %s", host, i, result.OrderedHosts[i]) - } - } - }) - } -} - -func TestPipeline_NormalizeNovaWeights(t *testing.T) { - p := &pipeline[mockPipelineRequest]{} - - tests := []struct { - name string - weights map[string]float64 - expected map[string]float64 - }{ - { - name: "Normalize weights", - weights: map[string]float64{ - "host1": 1000.0, - "host2": -1000.0, - "host3": 0.0, - }, - expected: map[string]float64{ - "host1": 1.0, - "host2": -1.0, - "host3": 0.0, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := p.normalizeInputWeights(tt.weights) - for host, weight := range tt.expected { - if result[host] != weight { - t.Errorf("expected weight %f for host %s, got %f", weight, host, result[host]) - } - } - }) - } -} - -func TestPipeline_ApplyStepWeights(t *testing.T) { - p := &pipeline[mockPipelineRequest]{ - steps: map[string]Step[mockPipelineRequest]{}, - order: []string{"step1", "step2"}, - } - - tests := []struct { - name string - stepWeights map[string]map[string]float64 - inWeights map[string]float64 - expectedResult map[string]float64 - }{ - { - name: "Apply step weights", - stepWeights: map[string]map[string]float64{ - "step1": {"host1": 0.5, "host2": 0.2}, - "step2": {"host1": 0.3, "host2": 0.4}, - }, - inWeights: map[string]float64{ - "host1": 1.0, - "host2": 1.0, - }, - expectedResult: map[string]float64{ - "host1": 1.0 + math.Tanh(0.5) + math.Tanh(0.3), - "host2": 1.0 + math.Tanh(0.2) + math.Tanh(0.4), - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := p.applyStepWeights(tt.stepWeights, tt.inWeights) - for host, weight := range tt.expectedResult { - if result[host] != weight { - t.Errorf("expected weight %f for host %s, got %f", weight, host, result[host]) - } - } - }) - } -} - -func TestPipeline_SortHostsByWeights(t *testing.T) { - p := &pipeline[mockPipelineRequest]{} - - tests := []struct { - name string - weights map[string]float64 - expected []string - }{ - { - name: "Sort hosts by weights", - weights: map[string]float64{ - "host1": 0.5, - "host2": 1.0, - "host3": 0.2, - }, - expected: []string{"host2", "host1", "host3"}, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := p.sortSubjectsByWeights(tt.weights) - for i, host := range tt.expected { - if result[i] != host { - t.Errorf("expected host %s at position %d, got %s", host, i, result[i]) - } - } - }) - } -} - -func TestPipeline_RunSteps(t *testing.T) { - mockStep := &mockPipelineStep{ - name: "mock_pipeline_step", - } - p := &pipeline[mockPipelineRequest]{ - order: []string{ - "mock_pipeline_step", - }, - steps: map[string]Step[mockPipelineRequest]{ - "mock_pipeline_step": mockStep, - }, - } - - request := mockPipelineRequest{ - Subjects: []string{"host1", "host2"}, - Weights: map[string]float64{"host1": 0.0, "host2": 0.0}, - } - - result := p.runSteps(slog.Default(), request) - if len(result) != 1 { - t.Fatalf("expected 1 step result, got %d", len(result)) - } - if _, ok := result["mock_pipeline_step"]; !ok { - t.Fatalf("expected result for step 'mock_pipeline_step'") - } - if result["mock_pipeline_step"]["host2"] != 1.0 { - t.Errorf("expected weight 1.0 for host2, got %f", result["mock_pipeline_step"]["host2"]) - } -} diff --git a/internal/scheduling/lib/request.go b/internal/scheduling/lib/request.go deleted file mode 100644 index 6ad7ef1c6..000000000 --- a/internal/scheduling/lib/request.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package lib - -import "log/slog" - -type PipelineRequest interface { - // Get the subjects that went in the pipeline. - GetSubjects() []string - // Get the weights for the subjects. - GetWeights() map[string]float64 - // Get logging args to be used in the step's trace log. - // Usually, this will be the request context including the request ID. - GetTraceLogArgs() []slog.Attr -} diff --git a/internal/scheduling/lib/request_test.go b/internal/scheduling/lib/request_test.go deleted file mode 100644 index 8706d81f8..000000000 --- a/internal/scheduling/lib/request_test.go +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package lib - -import "log/slog" - -type mockPipelineRequest struct { - WeightKeys []string - TraceLogArgs []slog.Attr - Subjects []string - Weights map[string]float64 - Pipeline string -} - -func (m mockPipelineRequest) GetWeightKeys() []string { return m.WeightKeys } -func (m mockPipelineRequest) GetTraceLogArgs() []slog.Attr { return m.TraceLogArgs } -func (m mockPipelineRequest) GetSubjects() []string { return m.Subjects } -func (m mockPipelineRequest) GetWeights() map[string]float64 { return m.Weights } -func (m mockPipelineRequest) GetPipeline() string { return m.Pipeline } -func (m mockPipelineRequest) WithPipeline(pipeline string) PipelineRequest { - m.Pipeline = pipeline - return m -} diff --git a/internal/scheduling/lib/step.go b/internal/scheduling/lib/step.go deleted file mode 100644 index a25c55a2a..000000000 --- a/internal/scheduling/lib/step.go +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package lib - -import ( - "context" - "errors" - "log/slog" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/pkg/conf" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -var ( - // This error is returned from the step at any time when the step should be skipped. - ErrStepSkipped = errors.New("step skipped") -) - -// Interface to which step options must conform. -type StepOpts interface { - // Validate the options for this step. - Validate() error -} - -// Empty step opts conforming to the StepOpts interface (validation always succeeds). -type EmptyStepOpts struct{} - -func (EmptyStepOpts) Validate() error { return nil } - -// Interface for a scheduler step. -type Step[RequestType PipelineRequest] interface { - // Configure the step and initialize things like a database connection. - Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error - // Run this step of the scheduling pipeline. - // Return a map of keys to activation values. Important: keys that are - // not in the map are considered as filtered out. - // Provide a traceLog that contains the global request id and should - // be used to log the step's execution. - Run(traceLog *slog.Logger, request RequestType) (*StepResult, error) -} - -// Common base for all steps that provides some functionality -// that would otherwise be duplicated across all steps. -type BaseStep[RequestType PipelineRequest, Opts StepOpts] struct { - // Options to pass via yaml to this step. - conf.JsonOpts[Opts] - // The activation function to use. - ActivationFunction - // The kubernetes client to use. - Client client.Client -} - -// Init the step with the database and options. -func (s *BaseStep[RequestType, Opts]) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { - opts := conf.NewRawOptsBytes(step.Opts.Raw) - if err := s.Load(opts); err != nil { - return err - } - if err := s.Options.Validate(); err != nil { - return err - } - - s.Client = client - return nil -} - -// Get a default result (no action) for the input weight keys given in the request. -func (s *BaseStep[RequestType, Opts]) PrepareResult(request RequestType) *StepResult { - activations := make(map[string]float64) - for _, subject := range request.GetSubjects() { - activations[subject] = s.NoEffect() - } - stats := make(map[string]StepStatistics) - return &StepResult{Activations: activations, Statistics: stats} -} - -// Get default statistics for the input weight keys given in the request. -func (s *BaseStep[RequestType, Opts]) PrepareStats(request PipelineRequest, unit string) StepStatistics { - return StepStatistics{ - Unit: unit, - Subjects: make(map[string]float64, len(request.GetSubjects())), - } -} diff --git a/internal/scheduling/lib/step_test.go b/internal/scheduling/lib/step_test.go deleted file mode 100644 index 31d335cd3..000000000 --- a/internal/scheduling/lib/step_test.go +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package lib - -import ( - "context" - "log/slog" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -type mockStep[RequestType PipelineRequest] struct { - InitFunc func(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error - RunFunc func(traceLog *slog.Logger, request RequestType) (*StepResult, error) -} - -func (m *mockStep[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { - return m.InitFunc(ctx, client, step) -} -func (m *mockStep[RequestType]) Run(traceLog *slog.Logger, request RequestType) (*StepResult, error) { - return m.RunFunc(traceLog, request) -} - -type MockOptions struct { - Option1 string `json:"option1"` - Option2 int `json:"option2"` -} - -func (o MockOptions) Validate() error { - return nil -} diff --git a/internal/scheduling/lib/step_validation.go b/internal/scheduling/lib/step_validation.go deleted file mode 100644 index 638bf6e84..000000000 --- a/internal/scheduling/lib/step_validation.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package lib - -import ( - "context" - "errors" - "log/slog" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -// Wrapper for scheduler steps that validates them before/after execution. -type StepValidator[RequestType PipelineRequest] struct { - // The wrapped step to validate. - Step Step[RequestType] - // By default, we execute all validations. However, through the config, - // we can also disable some validations if necessary. - DisabledValidations v1alpha1.DisabledValidationsSpec -} - -// Initialize the wrapped step with the database and options. -func (s *StepValidator[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { - slog.Info( - "scheduler: init validation for step", "name", step.Impl, - "disabled", s.DisabledValidations, - ) - return s.Step.Init(ctx, client, step) -} - -// Validate the wrapped step with the database and options. -func validateStep[RequestType PipelineRequest](step Step[RequestType], disabledValidations v1alpha1.DisabledValidationsSpec) *StepValidator[RequestType] { - return &StepValidator[RequestType]{ - Step: step, - DisabledValidations: disabledValidations, - } -} - -// Run the step and validate what happens. -func (s *StepValidator[RequestType]) Run(traceLog *slog.Logger, request RequestType) (*StepResult, error) { - result, err := s.Step.Run(traceLog, request) - if err != nil { - return nil, err - } - // If not disabled, validate that the number of subjects stayed the same. - // Note that for some schedulers the same subject (e.g. compute host) may - // appear multiple times if there is a substruct (e.g. hypervisor hostname). - // Since cortex will only schedule on the subject level and not below, - // we need to deduplicate the subjects first before the validation. - if !s.DisabledValidations.SameSubjectNumberInOut { - deduplicated := map[string]struct{}{} - for _, subject := range request.GetSubjects() { - deduplicated[subject] = struct{}{} - } - if len(result.Activations) != len(deduplicated) { - return nil, errors.New("safety: number of (deduplicated) subjects changed during step execution") - } - } - // If not disabled, validate that some subjects remain. - if !s.DisabledValidations.SomeSubjectsRemain { - if len(result.Activations) == 0 { - return nil, errors.New("safety: no subjects remain after step execution") - } - } - return result, nil -} diff --git a/internal/scheduling/lib/step_validation_test.go b/internal/scheduling/lib/step_validation_test.go deleted file mode 100644 index 2cdd5a52f..000000000 --- a/internal/scheduling/lib/step_validation_test.go +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package lib - -import ( - "log/slog" - "reflect" - "testing" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" -) - -func TestStepValidator_Run_ValidHosts(t *testing.T) { - mockStep := &mockStep[mockPipelineRequest]{ - RunFunc: func(traceLog *slog.Logger, request mockPipelineRequest) (*StepResult, error) { - return &StepResult{ - Activations: map[string]float64{ - "host1": 1.0, - "host2": 1.0, - }, - }, nil - }, - } - - request := mockPipelineRequest{ - Subjects: []string{"subject1", "subject2"}, - } - - validator := StepValidator[mockPipelineRequest]{ - Step: mockStep, - DisabledValidations: v1alpha1.DisabledValidationsSpec{ - SameSubjectNumberInOut: false, - }, - } - - result, err := validator.Run(slog.Default(), request) - if err != nil { - t.Errorf("Run() error = %v, want nil", err) - } - - expectedWeights := map[string]float64{ - "host1": 1.0, - "host2": 1.0, - } - - if !reflect.DeepEqual(result.Activations, expectedWeights) { - t.Errorf("Run() weights = %v, want %v", result.Activations, expectedWeights) - } -} - -func TestStepValidator_Run_HostNumberMismatch(t *testing.T) { - mockStep := &mockStep[mockPipelineRequest]{ - RunFunc: func(traceLog *slog.Logger, request mockPipelineRequest) (*StepResult, error) { - return &StepResult{ - Activations: map[string]float64{ - "host1": 1.0, - }, - }, nil - }, - } - - request := mockPipelineRequest{ - Subjects: []string{"subject1", "subject2"}, - } - - validator := StepValidator[mockPipelineRequest]{ - Step: mockStep, - DisabledValidations: v1alpha1.DisabledValidationsSpec{ - SameSubjectNumberInOut: false, - }, - } - - result, err := validator.Run(slog.Default(), request) - if err == nil { - t.Errorf("Run() error = nil, want error") - } - - if result != nil { - t.Errorf("Run() weights = %v, want nil", result.Activations) - } - - expectedError := "safety: number of (deduplicated) subjects changed during step execution" - if err.Error() != expectedError { - t.Errorf("Run() error = %v, want %v", err.Error(), expectedError) - } -} - -func TestStepValidator_Run_DisabledValidation(t *testing.T) { - mockStep := &mockStep[mockPipelineRequest]{ - RunFunc: func(traceLog *slog.Logger, request mockPipelineRequest) (*StepResult, error) { - return &StepResult{ - Activations: map[string]float64{ - "host1": 1.0, - }, - }, nil - }, - } - - request := mockPipelineRequest{ - Subjects: []string{"subject1"}, - } - - validator := StepValidator[mockPipelineRequest]{ - Step: mockStep, - DisabledValidations: v1alpha1.DisabledValidationsSpec{ - SameSubjectNumberInOut: true, // Validation is disabled - }, - } - - result, err := validator.Run(slog.Default(), request) - if err != nil { - t.Errorf("Run() error = %v, want nil", err) - } - - expectedWeights := map[string]float64{ - "host1": 1.0, - } - - if !reflect.DeepEqual(result.Activations, expectedWeights) { - t.Errorf("Run() weights = %v, want %v", result.Activations, expectedWeights) - } -} - -func TestValidateStep(t *testing.T) { - mockStep := &mockStep[mockPipelineRequest]{} - disabledValidations := v1alpha1.DisabledValidationsSpec{ - SameSubjectNumberInOut: true, - } - - validator := validateStep(mockStep, disabledValidations) - if !reflect.DeepEqual(validator.DisabledValidations, disabledValidations) { - t.Errorf("validateStep() DisabledValidations = %v, want %v", validator.DisabledValidations, disabledValidations) - } -} diff --git a/internal/scheduling/lib/weigher.go b/internal/scheduling/lib/weigher.go new file mode 100644 index 000000000..ef3d213b3 --- /dev/null +++ b/internal/scheduling/lib/weigher.go @@ -0,0 +1,58 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "errors" + "fmt" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// Interface for a weigher as part of the scheduling pipeline. +type Weigher[RequestType FilterWeigherPipelineRequest] interface { + FilterWeigherPipelineStep[RequestType] + + // Configure the step and initialize things like a database connection. + Init(ctx context.Context, client client.Client, step v1alpha1.WeigherSpec) error +} + +// Common base for all steps that provides some functionality +// that would otherwise be duplicated across all steps. +type BaseWeigher[RequestType FilterWeigherPipelineRequest, Opts FilterWeigherPipelineStepOpts] struct { + BaseFilterWeigherPipelineStep[RequestType, Opts] +} + +// Init the weigher with the database and options. +func (s *BaseWeigher[RequestType, Opts]) Init(ctx context.Context, client client.Client, step v1alpha1.WeigherSpec) error { + return s.BaseFilterWeigherPipelineStep.Init(ctx, client, step.Params) +} + +// Check if all knowledges are ready, and if not, return an error indicating why not. +func (d *BaseFilterWeigherPipelineStep[RequestType, Opts]) CheckKnowledges(ctx context.Context, kns ...corev1.ObjectReference) error { + if d.Client == nil { + return errors.New("kubernetes client not initialized") + } + for _, objRef := range kns { + knowledge := &v1alpha1.Knowledge{} + if err := d.Client.Get(ctx, client.ObjectKey{ + Name: objRef.Name, + Namespace: objRef.Namespace, + }, knowledge); err != nil { + return fmt.Errorf("failed to get knowledge %s: %w", objRef.Name, err) + } + // Check if the knowledge status conditions indicate an error. + if meta.IsStatusConditionFalse(knowledge.Status.Conditions, v1alpha1.KnowledgeConditionReady) { + return fmt.Errorf("knowledge %s not ready", objRef.Name) + } + if knowledge.Status.RawLength == 0 { + return fmt.Errorf("knowledge %s not ready, no data available", objRef.Name) + } + } + return nil +} diff --git a/internal/scheduling/lib/weigher_monitor.go b/internal/scheduling/lib/weigher_monitor.go new file mode 100644 index 000000000..e777a2e33 --- /dev/null +++ b/internal/scheduling/lib/weigher_monitor.go @@ -0,0 +1,43 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "log/slog" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// Wraps a scheduler weigher to monitor its execution. +type WeigherMonitor[RequestType FilterWeigherPipelineRequest] struct { + // The weigher to monitor. + weigher Weigher[RequestType] + // The monitor tracking the step's execution. + monitor *FilterWeigherPipelineStepMonitor[RequestType] +} + +// Wrap the given weigher with a monitor. +func monitorWeigher[RequestType FilterWeigherPipelineRequest]( + weigher Weigher[RequestType], + stepName string, + m FilterWeigherPipelineMonitor, +) *WeigherMonitor[RequestType] { + + return &WeigherMonitor[RequestType]{ + weigher: weigher, + monitor: monitorStep[RequestType](stepName, m), + } +} + +// Initialize the wrapped weigher. +func (wm *WeigherMonitor[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.WeigherSpec) error { + return wm.weigher.Init(ctx, client, step) +} + +// Run the weigher and observe its execution. +func (wm *WeigherMonitor[RequestType]) Run(traceLog *slog.Logger, request RequestType) (*FilterWeigherPipelineStepResult, error) { + return wm.monitor.RunWrapped(traceLog, request, wm.weigher) +} diff --git a/internal/scheduling/lib/weigher_monitor_test.go b/internal/scheduling/lib/weigher_monitor_test.go new file mode 100644 index 000000000..64e15d28f --- /dev/null +++ b/internal/scheduling/lib/weigher_monitor_test.go @@ -0,0 +1,118 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "log/slog" + "testing" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestMonitorWeigher(t *testing.T) { + monitor := FilterWeigherPipelineMonitor{ + PipelineName: "test-pipeline", + } + + mockWeigher := &mockWeigher[mockFilterWeigherPipelineRequest]{ + InitFunc: func(ctx context.Context, cl client.Client, step v1alpha1.WeigherSpec) error { + return nil + }, + RunFunc: func(traceLog *slog.Logger, request mockFilterWeigherPipelineRequest) (*FilterWeigherPipelineStepResult, error) { + return &FilterWeigherPipelineStepResult{ + Activations: map[string]float64{"host1": 0.5, "host2": 1.0}, + }, nil + }, + } + + wm := monitorWeigher(mockWeigher, "test-weigher", monitor) + if wm == nil { + t.Fatal("expected weigher monitor, got nil") + } + if wm.weigher == nil { + t.Error("expected weigher to be set") + } + if wm.monitor == nil { + t.Error("expected monitor to be set") + } + if wm.monitor.stepName != "test-weigher" { + t.Errorf("expected step name 'test-weigher', got '%s'", wm.monitor.stepName) + } +} + +func TestWeigherMonitor_Init(t *testing.T) { + initCalled := false + mockWeigher := &mockWeigher[mockFilterWeigherPipelineRequest]{ + InitFunc: func(ctx context.Context, cl client.Client, step v1alpha1.WeigherSpec) error { + initCalled = true + return nil + }, + } + + monitor := FilterWeigherPipelineMonitor{ + PipelineName: "test-pipeline", + } + wm := monitorWeigher(mockWeigher, "test-weigher", monitor) + + scheme := runtime.NewScheme() + cl := fake.NewClientBuilder().WithScheme(scheme).Build() + + err := wm.Init(t.Context(), cl, v1alpha1.WeigherSpec{ + Name: "test-weigher", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }) + if err != nil { + t.Errorf("expected no error, got %v", err) + } + if !initCalled { + t.Error("expected Init to be called on wrapped weigher") + } +} + +func TestWeigherMonitor_Run(t *testing.T) { + runCalled := false + mockWeigher := &mockWeigher[mockFilterWeigherPipelineRequest]{ + RunFunc: func(traceLog *slog.Logger, request mockFilterWeigherPipelineRequest) (*FilterWeigherPipelineStepResult, error) { + runCalled = true + return &FilterWeigherPipelineStepResult{ + Activations: map[string]float64{"host1": 0.5, "host2": 1.0}, + }, nil + }, + } + + runTimer := &mockObserver{} + removedSubjectsObserver := &mockObserver{} + monitor := FilterWeigherPipelineMonitor{ + PipelineName: "test-pipeline", + } + wm := monitorWeigher(mockWeigher, "test-weigher", monitor) + // Manually set monitors for testing + wm.monitor.runTimer = runTimer + wm.monitor.removedSubjectsObserver = removedSubjectsObserver + + request := mockFilterWeigherPipelineRequest{ + Subjects: []string{"host1", "host2", "host3"}, + Weights: map[string]float64{"host1": 0.1, "host2": 0.2, "host3": 0.3}, + } + + result, err := wm.Run(slog.Default(), request) + if err != nil { + t.Errorf("expected no error, got %v", err) + } + if !runCalled { + t.Error("expected Run to be called on wrapped weigher") + } + if result == nil { + t.Fatal("expected result, got nil") + } + if len(result.Activations) != 2 { + t.Errorf("expected 2 activations, got %d", len(result.Activations)) + } +} diff --git a/internal/scheduling/lib/weigher_test.go b/internal/scheduling/lib/weigher_test.go new file mode 100644 index 000000000..b1db3ec58 --- /dev/null +++ b/internal/scheduling/lib/weigher_test.go @@ -0,0 +1,259 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "log/slog" + "testing" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +type mockWeigher[RequestType FilterWeigherPipelineRequest] struct { + InitFunc func(ctx context.Context, client client.Client, step v1alpha1.WeigherSpec) error + RunFunc func(traceLog *slog.Logger, request RequestType) (*FilterWeigherPipelineStepResult, error) +} + +func (m *mockWeigher[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.WeigherSpec) error { + if m.InitFunc == nil { + return nil + } + return m.InitFunc(ctx, client, step) +} +func (m *mockWeigher[RequestType]) Run(traceLog *slog.Logger, request RequestType) (*FilterWeigherPipelineStepResult, error) { + if m.RunFunc == nil { + return &FilterWeigherPipelineStepResult{}, nil + } + return m.RunFunc(traceLog, request) +} + +// weigherTestOptions implements FilterWeigherPipelineStepOpts for testing. +type weigherTestOptions struct{} + +func (o weigherTestOptions) Validate() error { return nil } + +func TestBaseWeigher_Init(t *testing.T) { + tests := []struct { + name string + weigherSpec v1alpha1.WeigherSpec + expectError bool + }{ + { + name: "successful initialization with valid params", + weigherSpec: v1alpha1.WeigherSpec{ + Name: "test-weigher", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }, + expectError: false, + }, + { + name: "successful initialization with empty params", + weigherSpec: v1alpha1.WeigherSpec{ + Name: "test-weigher", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }, + expectError: false, + }, + { + name: "error on invalid JSON params", + weigherSpec: v1alpha1.WeigherSpec{ + Name: "test-weigher", + Params: runtime.RawExtension{ + Raw: []byte(`{invalid json}`), + }, + }, + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + weigher := &BaseWeigher[mockFilterWeigherPipelineRequest, weigherTestOptions]{} + cl := fake.NewClientBuilder().Build() + + err := weigher.Init(t.Context(), cl, tt.weigherSpec) + + if tt.expectError && err == nil { + t.Error("expected error but got nil") + } + if !tt.expectError && err != nil { + t.Errorf("expected no error but got: %v", err) + } + if !tt.expectError && weigher.Client == nil { + t.Error("expected client to be set but it was nil") + } + }) + } +} + +func TestBaseFilterWeigherPipelineStep_CheckKnowledges(t *testing.T) { + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add scheme: %v", err) + } + + tests := []struct { + name string + knowledges []v1alpha1.Knowledge + refs []corev1.ObjectReference + expectError bool + errorMsg string + }{ + { + name: "all knowledges ready", + knowledges: []v1alpha1.Knowledge{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "knowledge1", + Namespace: "default", + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 10, + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: metav1.ConditionTrue, + }, + }, + }, + }, + }, + refs: []corev1.ObjectReference{ + {Name: "knowledge1", Namespace: "default"}, + }, + expectError: false, + }, + { + name: "knowledge not found", + knowledges: []v1alpha1.Knowledge{}, + refs: []corev1.ObjectReference{ + {Name: "missing-knowledge", Namespace: "default"}, + }, + expectError: true, + errorMsg: "failed to get knowledge", + }, + { + name: "knowledge not ready - condition false", + knowledges: []v1alpha1.Knowledge{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "knowledge1", + Namespace: "default", + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 10, + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: metav1.ConditionFalse, + }, + }, + }, + }, + }, + refs: []corev1.ObjectReference{ + {Name: "knowledge1", Namespace: "default"}, + }, + expectError: true, + errorMsg: "not ready", + }, + { + name: "knowledge not ready - no data", + knowledges: []v1alpha1.Knowledge{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "knowledge1", + Namespace: "default", + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 0, + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: metav1.ConditionTrue, + }, + }, + }, + }, + }, + refs: []corev1.ObjectReference{ + {Name: "knowledge1", Namespace: "default"}, + }, + expectError: true, + errorMsg: "no data available", + }, + { + name: "empty knowledge list", + knowledges: []v1alpha1.Knowledge{}, + refs: []corev1.ObjectReference{}, + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + clientBuilder := fake.NewClientBuilder().WithScheme(scheme) + for i := range tt.knowledges { + clientBuilder = clientBuilder.WithObjects(&tt.knowledges[i]) + } + cl := clientBuilder.Build() + + step := &BaseFilterWeigherPipelineStep[mockFilterWeigherPipelineRequest, weigherTestOptions]{ + Client: cl, + } + + err := step.CheckKnowledges(t.Context(), tt.refs...) + + if tt.expectError && err == nil { + t.Error("expected error but got nil") + } + if !tt.expectError && err != nil { + t.Errorf("expected no error but got: %v", err) + } + if tt.expectError && err != nil && tt.errorMsg != "" { + if !containsString(err.Error(), tt.errorMsg) { + t.Errorf("expected error message to contain %q, got %q", tt.errorMsg, err.Error()) + } + } + }) + } +} + +func TestBaseFilterWeigherPipelineStep_CheckKnowledges_NilClient(t *testing.T) { + step := &BaseFilterWeigherPipelineStep[mockFilterWeigherPipelineRequest, weigherTestOptions]{ + Client: nil, + } + + err := step.CheckKnowledges(t.Context(), corev1.ObjectReference{Name: "test", Namespace: "default"}) + + if err == nil { + t.Error("expected error for nil client but got nil") + } + if !containsString(err.Error(), "client not initialized") { + t.Errorf("expected error message about client not initialized, got %q", err.Error()) + } +} + +func containsString(s, substr string) bool { + return len(s) >= len(substr) && (s == substr || s != "" && containsSubstring(s, substr)) +} + +func containsSubstring(s, substr string) bool { + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} diff --git a/internal/scheduling/lib/weigher_validation.go b/internal/scheduling/lib/weigher_validation.go new file mode 100644 index 000000000..8c398eb44 --- /dev/null +++ b/internal/scheduling/lib/weigher_validation.go @@ -0,0 +1,54 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "errors" + "log/slog" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// Wrapper for scheduler steps that validates them before/after execution. +type WeigherValidator[RequestType FilterWeigherPipelineRequest] struct { + // The wrapped weigher to validate. + Weigher Weigher[RequestType] +} + +// Initialize the wrapped weigher with the database and options. +func (s *WeigherValidator[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.WeigherSpec) error { + slog.Info("scheduler: init validation for step", "name", step.Name) + return s.Weigher.Init(ctx, client, step) +} + +// Validate the wrapped weigher with the database and options. +func validateWeigher[RequestType FilterWeigherPipelineRequest](weigher Weigher[RequestType]) *WeigherValidator[RequestType] { + return &WeigherValidator[RequestType]{Weigher: weigher} +} + +// Run the weigher and validate what happens. +func (s *WeigherValidator[RequestType]) Run(traceLog *slog.Logger, request RequestType) (*FilterWeigherPipelineStepResult, error) { + result, err := s.Weigher.Run(traceLog, request) + if err != nil { + return nil, err + } + // Note that for some schedulers the same subject (e.g. compute host) may + // appear multiple times if there is a substruct (e.g. hypervisor hostname). + // Since cortex will only schedule on the subject level and not below, + // we need to deduplicate the subjects first before the validation. + deduplicated := map[string]struct{}{} + for _, subject := range request.GetSubjects() { + deduplicated[subject] = struct{}{} + } + if len(result.Activations) != len(deduplicated) { + return nil, errors.New("safety: number of (deduplicated) subjects changed during step execution") + } + // Validate that some subjects remain. + if len(result.Activations) == 0 { + return nil, errors.New("safety: no subjects remain after step execution") + } + return result, nil +} diff --git a/internal/scheduling/lib/weigher_validation_test.go b/internal/scheduling/lib/weigher_validation_test.go new file mode 100644 index 000000000..697c3c1c9 --- /dev/null +++ b/internal/scheduling/lib/weigher_validation_test.go @@ -0,0 +1,151 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "errors" + "log/slog" + "reflect" + "testing" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestValidateWeigher(t *testing.T) { + weigher := &mockWeigher[mockFilterWeigherPipelineRequest]{} + validator := validateWeigher(weigher) + + if validator == nil { + t.Fatal("expected validator but got nil") + } + if validator.Weigher != weigher { + t.Error("expected weigher to be set in validator") + } +} + +func TestWeigherValidator_Init(t *testing.T) { + tests := []struct { + name string + weigherSpec v1alpha1.WeigherSpec + initError error + expectError bool + }{ + { + name: "successful initialization", + weigherSpec: v1alpha1.WeigherSpec{ + Name: "test-weigher", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }, + initError: nil, + expectError: false, + }, + { + name: "initialization error", + weigherSpec: v1alpha1.WeigherSpec{ + Name: "test-weigher", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }, + initError: errors.New("init error"), + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + weigher := &mockWeigher[mockFilterWeigherPipelineRequest]{ + InitFunc: func(_ context.Context, _ client.Client, _ v1alpha1.WeigherSpec) error { + return tt.initError + }, + } + validator := validateWeigher(weigher) + cl := fake.NewClientBuilder().Build() + + err := validator.Init(t.Context(), cl, tt.weigherSpec) + + if tt.expectError && err == nil { + t.Error("expected error but got nil") + } + if !tt.expectError && err != nil { + t.Errorf("expected no error but got: %v", err) + } + }) + } +} + +func TestWeigherValidator_Run_ValidHosts(t *testing.T) { + mockStep := &mockWeigher[mockFilterWeigherPipelineRequest]{ + RunFunc: func(traceLog *slog.Logger, request mockFilterWeigherPipelineRequest) (*FilterWeigherPipelineStepResult, error) { + return &FilterWeigherPipelineStepResult{ + Activations: map[string]float64{ + "host1": 1.0, + "host2": 1.0, + }, + }, nil + }, + } + + request := mockFilterWeigherPipelineRequest{ + Subjects: []string{"subject1", "subject2"}, + } + + validator := WeigherValidator[mockFilterWeigherPipelineRequest]{ + Weigher: mockStep, + } + + result, err := validator.Run(slog.Default(), request) + if err != nil { + t.Errorf("Run() error = %v, want nil", err) + } + + expectedWeights := map[string]float64{ + "host1": 1.0, + "host2": 1.0, + } + + if !reflect.DeepEqual(result.Activations, expectedWeights) { + t.Errorf("Run() weights = %v, want %v", result.Activations, expectedWeights) + } +} + +func TestWeigherValidator_Run_HostNumberMismatch(t *testing.T) { + mockStep := &mockWeigher[mockFilterWeigherPipelineRequest]{ + RunFunc: func(traceLog *slog.Logger, request mockFilterWeigherPipelineRequest) (*FilterWeigherPipelineStepResult, error) { + return &FilterWeigherPipelineStepResult{ + Activations: map[string]float64{ + "host1": 1.0, + }, + }, nil + }, + } + + request := mockFilterWeigherPipelineRequest{ + Subjects: []string{"subject1", "subject2"}, + } + + validator := WeigherValidator[mockFilterWeigherPipelineRequest]{ + Weigher: mockStep, + } + + result, err := validator.Run(slog.Default(), request) + if err == nil { + t.Errorf("Run() error = nil, want error") + } + + if result != nil { + t.Errorf("Run() weights = %v, want nil", result.Activations) + } + + expectedError := "safety: number of (deduplicated) subjects changed during step execution" + if err.Error() != expectedError { + t.Errorf("Run() error = %v, want %v", err.Error(), expectedError) + } +} diff --git a/internal/scheduling/decisions/machines/pipeline_controller.go b/internal/scheduling/machines/filter_weigher_pipeline_controller.go similarity index 89% rename from internal/scheduling/decisions/machines/pipeline_controller.go rename to internal/scheduling/machines/filter_weigher_pipeline_controller.go index 8da6ed3dc..f7b4a0134 100644 --- a/internal/scheduling/decisions/machines/pipeline_controller.go +++ b/internal/scheduling/machines/filter_weigher_pipeline_controller.go @@ -37,9 +37,9 @@ import ( // // Additionally, the controller watches for pipeline and step changes to // reconfigure the pipelines as needed. -type DecisionPipelineController struct { +type FilterWeigherPipelineController struct { // Toolbox shared between all pipeline controllers. - lib.BasePipelineController[lib.Pipeline[ironcore.MachinePipelineRequest]] + lib.BasePipelineController[lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest]] // Mutex to only allow one process at a time processMu sync.Mutex @@ -47,15 +47,15 @@ type DecisionPipelineController struct { // Config for the scheduling operator. Conf conf.Config // Monitor to pass down to all pipelines. - Monitor lib.PipelineMonitor + Monitor lib.FilterWeigherPipelineMonitor } // The type of pipeline this controller manages. -func (c *DecisionPipelineController) PipelineType() v1alpha1.PipelineType { +func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { return v1alpha1.PipelineTypeFilterWeigher } -func (c *DecisionPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { c.processMu.Lock() defer c.processMu.Unlock() @@ -75,7 +75,7 @@ func (c *DecisionPipelineController) Reconcile(ctx context.Context, req ctrl.Req return ctrl.Result{}, nil } -func (c *DecisionPipelineController) ProcessNewMachine(ctx context.Context, machine *ironcorev1alpha1.Machine) error { +func (c *FilterWeigherPipelineController) ProcessNewMachine(ctx context.Context, machine *ironcorev1alpha1.Machine) error { c.processMu.Lock() defer c.processMu.Unlock() @@ -132,7 +132,7 @@ func (c *DecisionPipelineController) ProcessNewMachine(ctx context.Context, mach return err } -func (c *DecisionPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { +func (c *FilterWeigherPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { log := ctrl.LoggerFrom(ctx) startedAt := time.Now() // So we can measure sync duration. @@ -183,15 +183,20 @@ func (c *DecisionPipelineController) process(ctx context.Context, decision *v1al } // The base controller will delegate the pipeline creation down to this method. -func (c *DecisionPipelineController) InitPipeline( +func (c *FilterWeigherPipelineController) InitPipeline( ctx context.Context, p v1alpha1.Pipeline, -) (lib.Pipeline[ironcore.MachinePipelineRequest], error) { +) lib.PipelineInitResult[lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest]] { - return lib.NewPipeline(ctx, c.Client, p.Name, supportedSteps, p.Spec.Steps, c.Monitor) + return lib.InitNewFilterWeigherPipeline( + ctx, c.Client, p.Name, + supportedFilters, p.Spec.Filters, + supportedWeighers, p.Spec.Weighers, + c.Monitor, + ) } -func (c *DecisionPipelineController) handleMachine() handler.EventHandler { +func (c *FilterWeigherPipelineController) handleMachine() handler.EventHandler { return handler.Funcs{ CreateFunc: func(ctx context.Context, evt event.CreateEvent, queue workqueue.TypedRateLimitingInterface[reconcile.Request]) { machine := evt.Object.(*ironcorev1alpha1.Machine) @@ -231,7 +236,7 @@ func (c *DecisionPipelineController) handleMachine() handler.EventHandler { } } -func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { +func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainMachines if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { diff --git a/internal/scheduling/decisions/machines/pipeline_controller_test.go b/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go similarity index 84% rename from internal/scheduling/decisions/machines/pipeline_controller_test.go rename to internal/scheduling/machines/filter_weigher_pipeline_controller_test.go index 821639c6f..7df66091b 100644 --- a/internal/scheduling/decisions/machines/pipeline_controller_test.go +++ b/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go @@ -21,7 +21,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" ) -func TestDecisionPipelineController_Reconcile(t *testing.T) { +func TestFilterWeigherPipelineController_Reconcile(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add scheduling scheme: %v", err) @@ -120,16 +120,16 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { WithStatusSubresource(&v1alpha1.Decision{}). Build() - controller := &DecisionPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.Pipeline[ironcore.MachinePipelineRequest]]{ - Pipelines: map[string]lib.Pipeline[ironcore.MachinePipelineRequest]{ + controller := &FilterWeigherPipelineController{ + BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest]]{ + Pipelines: map[string]lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest]{ "machines-scheduler": createMockPipeline(), }, }, Conf: conf.Config{ SchedulingDomain: v1alpha1.SchedulingDomainMachines, }, - Monitor: lib.PipelineMonitor{}, + Monitor: lib.FilterWeigherPipelineMonitor{}, } controller.Client = client @@ -204,74 +204,74 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { } } -func TestDecisionPipelineController_InitPipeline(t *testing.T) { - controller := &DecisionPipelineController{ - Monitor: lib.PipelineMonitor{}, +func TestFilterWeigherPipelineController_InitPipeline(t *testing.T) { + controller := &FilterWeigherPipelineController{ + Monitor: lib.FilterWeigherPipelineMonitor{}, } tests := []struct { - name string - steps []v1alpha1.StepSpec - expectError bool + name string + filters []v1alpha1.FilterSpec + weighers []v1alpha1.WeigherSpec + expectNonCriticalError bool + expectCriticalError bool }{ { - name: "empty steps", - steps: []v1alpha1.StepSpec{}, - expectError: false, + name: "empty steps", + filters: []v1alpha1.FilterSpec{}, + weighers: []v1alpha1.WeigherSpec{}, + expectNonCriticalError: false, + expectCriticalError: false, }, { name: "noop step", - steps: []v1alpha1.StepSpec{ - { - Impl: "noop", - Type: v1alpha1.StepTypeFilter, - }, + filters: []v1alpha1.FilterSpec{ + {Name: "noop"}, }, - expectError: false, + expectNonCriticalError: false, + expectCriticalError: false, }, { name: "unsupported step", - steps: []v1alpha1.StepSpec{ - { - Impl: "unsupported", - Type: v1alpha1.StepTypeFilter, - }, + filters: []v1alpha1.FilterSpec{ + {Name: "unsupported"}, }, - expectError: true, + expectNonCriticalError: false, + expectCriticalError: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + initResult := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pipeline", }, Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainMachines, - Steps: tt.steps, + Filters: tt.filters, + Weighers: tt.weighers, }, }) - if tt.expectError && err == nil { - t.Error("expected error but got none") - return + if tt.expectCriticalError && len(initResult.FilterErrors) == 0 { + t.Error("Expected critical error but got none") } - - if !tt.expectError && err != nil { - t.Errorf("expected no error, got: %v", err) - return + if !tt.expectCriticalError && len(initResult.FilterErrors) > 0 { + t.Errorf("Expected no critical error but got: %v", initResult.FilterErrors) } - - if !tt.expectError && pipeline == nil { - t.Error("expected pipeline to be non-nil") + if tt.expectNonCriticalError && len(initResult.WeigherErrors) == 0 { + t.Error("Expected non-critical error but got none") + } + if !tt.expectNonCriticalError && len(initResult.WeigherErrors) > 0 { + t.Errorf("Expected no non-critical error but got: %v", initResult.WeigherErrors) } }) } } -func TestDecisionPipelineController_ProcessNewMachine(t *testing.T) { +func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add scheduling scheme: %v", err) @@ -318,7 +318,8 @@ func TestDecisionPipelineController_ProcessNewMachine(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainMachines, CreateDecisions: true, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, createDecisions: true, @@ -351,7 +352,8 @@ func TestDecisionPipelineController_ProcessNewMachine(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainMachines, CreateDecisions: false, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, createDecisions: false, @@ -397,7 +399,8 @@ func TestDecisionPipelineController_ProcessNewMachine(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainMachines, CreateDecisions: true, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, createDecisions: true, @@ -423,15 +426,15 @@ func TestDecisionPipelineController_ProcessNewMachine(t *testing.T) { WithStatusSubresource(&v1alpha1.Decision{}). Build() - controller := &DecisionPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.Pipeline[ironcore.MachinePipelineRequest]]{ - Pipelines: map[string]lib.Pipeline[ironcore.MachinePipelineRequest]{}, + controller := &FilterWeigherPipelineController{ + BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest]]{ + Pipelines: map[string]lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest]{}, PipelineConfigs: map[string]v1alpha1.Pipeline{}, }, Conf: conf.Config{ SchedulingDomain: v1alpha1.SchedulingDomainMachines, }, - Monitor: lib.PipelineMonitor{}, + Monitor: lib.FilterWeigherPipelineMonitor{}, } controller.Client = client @@ -545,7 +548,7 @@ func TestDecisionPipelineController_ProcessNewMachine(t *testing.T) { } // Helper function to create a mock pipeline that works with the ironcore types -func createMockPipeline() lib.Pipeline[ironcore.MachinePipelineRequest] { +func createMockPipeline() lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest] { return &mockMachinePipeline{} } diff --git a/internal/scheduling/decisions/machines/noop.go b/internal/scheduling/machines/noop.go similarity index 78% rename from internal/scheduling/decisions/machines/noop.go rename to internal/scheduling/machines/noop.go index 3b0104aa6..ce4d52226 100644 --- a/internal/scheduling/decisions/machines/noop.go +++ b/internal/scheduling/machines/noop.go @@ -15,7 +15,7 @@ type NoopFilter struct { Alias string } -func (f *NoopFilter) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { +func (f *NoopFilter) Init(ctx context.Context, client client.Client, filter v1alpha1.FilterSpec) error { return nil } @@ -24,12 +24,12 @@ func (f *NoopFilter) Init(ctx context.Context, client client.Client, step v1alph // not in the map are considered as filtered out. // Provide a traceLog that contains the global request id and should // be used to log the step's execution. -func (NoopFilter) Run(traceLog *slog.Logger, request ironcore.MachinePipelineRequest) (*lib.StepResult, error) { +func (NoopFilter) Run(traceLog *slog.Logger, request ironcore.MachinePipelineRequest) (*lib.FilterWeigherPipelineStepResult, error) { activations := make(map[string]float64, len(request.Pools)) - stats := make(map[string]lib.StepStatistics) + stats := make(map[string]lib.FilterWeigherPipelineStepStatistics) // Usually you would do some filtering here, or adjust the weights. for _, pool := range request.Pools { activations[pool.Name] = 1.0 } - return &lib.StepResult{Activations: activations, Statistics: stats}, nil + return &lib.FilterWeigherPipelineStepResult{Activations: activations, Statistics: stats}, nil } diff --git a/internal/scheduling/decisions/machines/noop_test.go b/internal/scheduling/machines/noop_test.go similarity index 100% rename from internal/scheduling/decisions/machines/noop_test.go rename to internal/scheduling/machines/noop_test.go diff --git a/internal/scheduling/machines/supported_filters.go b/internal/scheduling/machines/supported_filters.go new file mode 100644 index 000000000..5dcee9a54 --- /dev/null +++ b/internal/scheduling/machines/supported_filters.go @@ -0,0 +1,16 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package machines + +import ( + "github.com/cobaltcore-dev/cortex/api/delegation/ironcore" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" +) + +type MachineFilter = lib.Filter[ironcore.MachinePipelineRequest] + +// Configuration of filters supported by the machine scheduling. +var supportedFilters = map[string]func() MachineFilter{ + "noop": func() MachineFilter { return &NoopFilter{} }, +} diff --git a/internal/scheduling/machines/supported_weighers.go b/internal/scheduling/machines/supported_weighers.go new file mode 100644 index 000000000..329606cc1 --- /dev/null +++ b/internal/scheduling/machines/supported_weighers.go @@ -0,0 +1,14 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package machines + +import ( + "github.com/cobaltcore-dev/cortex/api/delegation/ironcore" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" +) + +type MachineWeigher = lib.Weigher[ironcore.MachinePipelineRequest] + +// Configuration of weighers supported by the machine scheduling. +var supportedWeighers = map[string]func() MachineWeigher{} diff --git a/internal/scheduling/decisions/manila/cleanup.go b/internal/scheduling/manila/decisions_cleanup.go similarity index 97% rename from internal/scheduling/decisions/manila/cleanup.go rename to internal/scheduling/manila/decisions_cleanup.go index 6ad87aa3d..3e0e62f23 100644 --- a/internal/scheduling/decisions/manila/cleanup.go +++ b/internal/scheduling/manila/decisions_cleanup.go @@ -22,7 +22,7 @@ import ( ) // Delete all decisions for manila shares that have been deleted. -func Cleanup(ctx context.Context, client client.Client, conf conf.Config) error { +func DecisionsCleanup(ctx context.Context, client client.Client, conf conf.Config) error { var authenticatedHTTP = http.DefaultClient if conf.SSOSecretRef != nil { var err error diff --git a/internal/scheduling/decisions/manila/cleanup_test.go b/internal/scheduling/manila/decisions_cleanup_test.go similarity index 98% rename from internal/scheduling/decisions/manila/cleanup_test.go rename to internal/scheduling/manila/decisions_cleanup_test.go index 6431b2d78..1786f8d1d 100644 --- a/internal/scheduling/decisions/manila/cleanup_test.go +++ b/internal/scheduling/manila/decisions_cleanup_test.go @@ -339,7 +339,7 @@ func TestCleanupManila(t *testing.T) { Namespace: "default", }, } - err := Cleanup(context.Background(), client, config) + err := DecisionsCleanup(context.Background(), client, config) if tt.expectError && err == nil { t.Error("Expected error but got none") @@ -427,7 +427,7 @@ func TestCleanupManilaDecisionsCancel(t *testing.T) { defer cancel() // This should exit quickly due to context cancellation - if err := Cleanup(ctx, client, config); err != nil { + if err := DecisionsCleanup(ctx, client, config); err != nil { if !errors.Is(err, context.DeadlineExceeded) { t.Errorf("Unexpected error during cleanup: %v", err) } diff --git a/internal/scheduling/e2e/manila/checks.go b/internal/scheduling/manila/e2e_checks.go similarity index 100% rename from internal/scheduling/e2e/manila/checks.go rename to internal/scheduling/manila/e2e_checks.go diff --git a/internal/scheduling/external/manila/api.go b/internal/scheduling/manila/external_scheduler_api.go similarity index 99% rename from internal/scheduling/external/manila/api.go rename to internal/scheduling/manila/external_scheduler_api.go index 077893118..d5fc74a63 100644 --- a/internal/scheduling/external/manila/api.go +++ b/internal/scheduling/manila/external_scheduler_api.go @@ -1,7 +1,7 @@ // Copyright SAP SE // SPDX-License-Identifier: Apache-2.0 -package http +package manila import ( "bytes" diff --git a/internal/scheduling/external/manila/api_test.go b/internal/scheduling/manila/external_scheduler_api_test.go similarity index 88% rename from internal/scheduling/external/manila/api_test.go rename to internal/scheduling/manila/external_scheduler_api_test.go index 3339f7281..334a3f1fd 100644 --- a/internal/scheduling/external/manila/api_test.go +++ b/internal/scheduling/manila/external_scheduler_api_test.go @@ -1,7 +1,7 @@ // Copyright SAP SE // SPDX-License-Identifier: Apache-2.0 -package http +package manila import ( "bytes" @@ -308,6 +308,55 @@ func TestHTTPAPI_ManilaExternalScheduler(t *testing.T) { } } +func TestHTTPAPI_inferPipelineName(t *testing.T) { + config := conf.Config{SchedulingDomain: "test-operator"} + delegate := &mockHTTPAPIDelegate{} + api := NewAPI(config, delegate).(*httpAPI) + + tests := []struct { + name string + request manilaapi.ExternalSchedulerRequest + expectedName string + expectError bool + }{ + { + name: "returns default pipeline name", + request: manilaapi.ExternalSchedulerRequest{ + Hosts: []manilaapi.ExternalSchedulerHost{ + {ShareHost: "host1"}, + }, + Weights: map[string]float64{ + "host1": 1.0, + }, + }, + expectedName: "manila-external-scheduler", + expectError: false, + }, + { + name: "returns default pipeline name for empty request", + request: manilaapi.ExternalSchedulerRequest{}, + expectedName: "manila-external-scheduler", + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pipelineName, err := api.inferPipelineName(tt.request) + + if tt.expectError && err == nil { + t.Error("expected error, got nil") + } + if !tt.expectError && err != nil { + t.Errorf("expected no error, got %v", err) + } + if pipelineName != tt.expectedName { + t.Errorf("expected pipeline name %s, got %s", tt.expectedName, pipelineName) + } + }) + } +} + func TestHTTPAPI_ManilaExternalScheduler_DecisionCreation(t *testing.T) { config := conf.Config{SchedulingDomain: v1alpha1.SchedulingDomainManila} diff --git a/internal/scheduling/decisions/manila/pipeline_controller.go b/internal/scheduling/manila/filter_weigher_pipeline_controller.go similarity index 85% rename from internal/scheduling/decisions/manila/pipeline_controller.go rename to internal/scheduling/manila/filter_weigher_pipeline_controller.go index ac153f2d3..ceb0320f2 100644 --- a/internal/scheduling/decisions/manila/pipeline_controller.go +++ b/internal/scheduling/manila/filter_weigher_pipeline_controller.go @@ -33,26 +33,26 @@ import ( // // Additionally, the controller watches for pipeline and step changes to // reconfigure the pipelines as needed. -type DecisionPipelineController struct { +type FilterWeigherPipelineController struct { // Toolbox shared between all pipeline controllers. - lib.BasePipelineController[lib.Pipeline[api.ExternalSchedulerRequest]] + lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]] // Mutex to only allow one process at a time processMu sync.Mutex // Monitor to pass down to all pipelines. - Monitor lib.PipelineMonitor + Monitor lib.FilterWeigherPipelineMonitor // Config for the scheduling operator. Conf conf.Config } // The type of pipeline this controller manages. -func (c *DecisionPipelineController) PipelineType() v1alpha1.PipelineType { +func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { return v1alpha1.PipelineTypeFilterWeigher } // Callback executed when kubernetes asks to reconcile a decision resource. -func (c *DecisionPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { c.processMu.Lock() defer c.processMu.Unlock() @@ -72,7 +72,7 @@ func (c *DecisionPipelineController) Reconcile(ctx context.Context, req ctrl.Req } // Process the decision from the API. Should create and return the updated decision. -func (c *DecisionPipelineController) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { +func (c *FilterWeigherPipelineController) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { c.processMu.Lock() defer c.processMu.Unlock() @@ -111,7 +111,7 @@ func (c *DecisionPipelineController) ProcessNewDecisionFromAPI(ctx context.Conte return err } -func (c *DecisionPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { +func (c *FilterWeigherPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { log := ctrl.LoggerFrom(ctx) startedAt := time.Now() // So we can measure sync duration. @@ -141,15 +141,20 @@ func (c *DecisionPipelineController) process(ctx context.Context, decision *v1al } // The base controller will delegate the pipeline creation down to this method. -func (c *DecisionPipelineController) InitPipeline( +func (c *FilterWeigherPipelineController) InitPipeline( ctx context.Context, p v1alpha1.Pipeline, -) (lib.Pipeline[api.ExternalSchedulerRequest], error) { - - return lib.NewPipeline(ctx, c.Client, p.Name, supportedSteps, p.Spec.Steps, c.Monitor) +) lib.PipelineInitResult[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]] { + + return lib.InitNewFilterWeigherPipeline( + ctx, c.Client, p.Name, + supportedFilters, p.Spec.Filters, + supportedWeighers, p.Spec.Weighers, + c.Monitor, + ) } -func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { +func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainManila if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { diff --git a/internal/scheduling/decisions/manila/pipeline_controller_test.go b/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go similarity index 71% rename from internal/scheduling/decisions/manila/pipeline_controller_test.go rename to internal/scheduling/manila/filter_weigher_pipeline_controller_test.go index ddfd3fce5..33f10888f 100644 --- a/internal/scheduling/decisions/manila/pipeline_controller_test.go +++ b/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go @@ -17,13 +17,15 @@ import ( api "github.com/cobaltcore-dev/cortex/api/delegation/manila" "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/sapcc/go-bits/must" + "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/storage" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" "github.com/cobaltcore-dev/cortex/pkg/conf" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -func TestDecisionPipelineController_Reconcile(t *testing.T) { +func TestFilterWeigherPipelineController_Reconcile(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add v1alpha1 scheme: %v", err) @@ -84,7 +86,8 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainManila, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, expectError: false, @@ -112,7 +115,8 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainManila, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, expectError: true, @@ -154,19 +158,19 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { WithStatusSubresource(&v1alpha1.Decision{}). Build() - controller := &DecisionPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.Pipeline[api.ExternalSchedulerRequest]]{ + controller := &FilterWeigherPipelineController{ + BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ Client: client, - Pipelines: make(map[string]lib.Pipeline[api.ExternalSchedulerRequest]), + Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), }, - Monitor: lib.PipelineMonitor{}, + Monitor: lib.FilterWeigherPipelineMonitor{}, Conf: conf.Config{ SchedulingDomain: v1alpha1.SchedulingDomainManila, }, } if tt.pipeline != nil { - pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + initResult := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: tt.pipeline.Name, }, @@ -175,7 +179,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { if err != nil { t.Fatalf("Failed to init pipeline: %v", err) } - controller.Pipelines[tt.pipeline.Name] = pipeline + controller.Pipelines[tt.pipeline.Name] = initResult.Pipeline } req := ctrl.Request{ @@ -213,7 +217,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { } } -func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { +func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add v1alpha1 scheme: %v", err) @@ -277,7 +281,8 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainManila, CreateDecisions: true, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, createDecisions: true, @@ -310,7 +315,8 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainManila, CreateDecisions: false, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, createDecisions: false, @@ -363,7 +369,8 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainManila, CreateDecisions: true, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, createDecisions: true, @@ -386,13 +393,13 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { WithStatusSubresource(&v1alpha1.Decision{}). Build() - controller := &DecisionPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.Pipeline[api.ExternalSchedulerRequest]]{ + controller := &FilterWeigherPipelineController{ + BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ Client: client, - Pipelines: make(map[string]lib.Pipeline[api.ExternalSchedulerRequest]), + Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), PipelineConfigs: make(map[string]v1alpha1.Pipeline), }, - Monitor: lib.PipelineMonitor{}, + Monitor: lib.FilterWeigherPipelineMonitor{}, Conf: conf.Config{ SchedulingDomain: v1alpha1.SchedulingDomainManila, }, @@ -400,11 +407,11 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { if tt.pipelineConfig != nil { controller.PipelineConfigs[tt.pipelineConfig.Name] = *tt.pipelineConfig - pipeline, err := controller.InitPipeline(t.Context(), *tt.pipelineConfig) - if err != nil { - t.Fatalf("Failed to init pipeline: %v", err) + initResult := controller.InitPipeline(t.Context(), *tt.pipelineConfig) + if len(initResult.FilterErrors) > 0 || len(initResult.WeigherErrors) > 0 { + t.Fatalf("Failed to init pipeline: %v", initResult) } - controller.Pipelines[tt.pipelineConfig.Name] = pipeline + controller.Pipelines[tt.pipelineConfig.Name] = initResult.Pipeline } err := controller.ProcessNewDecisionFromAPI(context.Background(), tt.decision) @@ -459,67 +466,127 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { } } -func TestDecisionPipelineController_InitPipeline(t *testing.T) { - controller := &DecisionPipelineController{ - Monitor: lib.PipelineMonitor{}, +func TestFilterWeigherPipelineController_PipelineType(t *testing.T) { + controller := &FilterWeigherPipelineController{} + + pipelineType := controller.PipelineType() + + if pipelineType != v1alpha1.PipelineTypeFilterWeigher { + t.Errorf("expected pipeline type %s, got %s", v1alpha1.PipelineTypeFilterWeigher, pipelineType) + } +} + +func TestFilterWeigherPipelineController_InitPipeline(t *testing.T) { + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add v1alpha1 scheme: %v", err) } tests := []struct { - name string - steps []v1alpha1.StepSpec - expectError bool + name string + filters []v1alpha1.FilterSpec + weighers []v1alpha1.WeigherSpec + knowledges []client.Object + expectNonCriticalError bool + expectCriticalError bool }{ { - name: "empty steps", - steps: []v1alpha1.StepSpec{}, - expectError: false, + name: "empty steps", + filters: []v1alpha1.FilterSpec{}, + weighers: []v1alpha1.WeigherSpec{}, + knowledges: []client.Object{}, + expectNonCriticalError: false, + expectCriticalError: false, }, { name: "supported netapp step", - steps: []v1alpha1.StepSpec{ + weighers: []v1alpha1.WeigherSpec{ { - Type: v1alpha1.StepTypeWeigher, - Impl: "netapp_cpu_usage_balancing", - Opts: runtime.RawExtension{ + Name: "netapp_cpu_usage_balancing", + Params: runtime.RawExtension{ Raw: []byte(`{"AvgCPUUsageLowerBound": 0, "AvgCPUUsageUpperBound": 90, "MaxCPUUsageLowerBound": 0, "MaxCPUUsageUpperBound": 100}`), }, }, }, - expectError: false, + knowledges: []client.Object{ + &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{ + Name: "netapp-storage-pool-cpu-usage-manila", + }, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: metav1.ConditionTrue, + }, + }, + Raw: must.Return(v1alpha1.BoxFeatureList([]storage.StoragePoolCPUUsage{ + { + StoragePoolName: "manila-share-1@backend1", + AvgCPUUsagePct: 50, + MaxCPUUsagePct: 80, + }, + { + StoragePoolName: "manila-share-2@backend2", + AvgCPUUsagePct: 20, + MaxCPUUsagePct: 40, + }, + })), + RawLength: 2, + }, + }, + }, + expectNonCriticalError: false, + expectCriticalError: false, }, { name: "unsupported step", - steps: []v1alpha1.StepSpec{ + filters: []v1alpha1.FilterSpec{ { - Type: v1alpha1.StepTypeFilter, - Impl: "unsupported-plugin", + Name: "unsupported-plugin", }, }, - expectError: true, + expectNonCriticalError: false, + expectCriticalError: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + client := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(tt.knowledges...). + WithStatusSubresource(&v1alpha1.Decision{}). + Build() + controller := &FilterWeigherPipelineController{ + Monitor: lib.FilterWeigherPipelineMonitor{}, + } + controller.Client = client // Through basepipelinecontroller + + initResult := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pipeline", }, Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainManila, - Steps: tt.steps, + Filters: tt.filters, + Weighers: tt.weighers, }, }) - if tt.expectError && err == nil { - t.Error("Expected error but got none") + if !tt.expectCriticalError && len(initResult.FilterErrors) > 0 { + t.Errorf("Expected no critical error but got: %v", initResult.FilterErrors) } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) + if tt.expectCriticalError && len(initResult.FilterErrors) == 0 { + t.Error("Expected critical error but got none") + } + + if !tt.expectNonCriticalError && len(initResult.WeigherErrors) > 0 { + t.Errorf("Expected no non-critical error but got: %v", initResult.WeigherErrors) } - if !tt.expectError && pipeline == nil { - t.Error("Expected pipeline but got nil") + if tt.expectNonCriticalError && len(initResult.WeigherErrors) == 0 { + t.Error("Expected non-critical error but got none") } }) } diff --git a/internal/scheduling/decisions/manila/plugins/weighers/netapp_cpu_usage_balancing.go b/internal/scheduling/manila/plugins/weighers/netapp_cpu_usage_balancing.go similarity index 80% rename from internal/scheduling/decisions/manila/plugins/weighers/netapp_cpu_usage_balancing.go rename to internal/scheduling/manila/plugins/weighers/netapp_cpu_usage_balancing.go index 539988c3a..5eb9474b5 100644 --- a/internal/scheduling/decisions/manila/plugins/weighers/netapp_cpu_usage_balancing.go +++ b/internal/scheduling/manila/plugins/weighers/netapp_cpu_usage_balancing.go @@ -11,7 +11,8 @@ import ( api "github.com/cobaltcore-dev/cortex/api/delegation/manila" "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/storage" - scheduling "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -45,12 +46,23 @@ func (o NetappCPUUsageBalancingStepOpts) Validate() error { // Step to balance CPU usage by avoiding highly used storage pools. type NetappCPUUsageBalancingStep struct { // BaseStep is a helper struct that provides common functionality for all steps. - scheduling.BaseStep[api.ExternalSchedulerRequest, NetappCPUUsageBalancingStepOpts] + lib.BaseWeigher[api.ExternalSchedulerRequest, NetappCPUUsageBalancingStepOpts] +} + +// Initialize the step and validate that all required knowledges are ready. +func (s *NetappCPUUsageBalancingStep) Init(ctx context.Context, client client.Client, weigher v1alpha1.WeigherSpec) error { + if err := s.BaseWeigher.Init(ctx, client, weigher); err != nil { + return err + } + if err := s.CheckKnowledges(ctx, corev1.ObjectReference{Name: "netapp-storage-pool-cpu-usage-manila"}); err != nil { + return err + } + return nil } // Downvote hosts that are highly contended. -func (s *NetappCPUUsageBalancingStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*scheduling.StepResult, error) { - result := s.PrepareResult(request) +func (s *NetappCPUUsageBalancingStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) result.Statistics["avg cpu contention"] = s.PrepareStats(request, "%") result.Statistics["max cpu contention"] = s.PrepareStats(request, "%") @@ -74,14 +86,14 @@ func (s *NetappCPUUsageBalancingStep) Run(traceLog *slog.Logger, request api.Ext if _, ok := result.Activations[usage.StoragePoolName]; !ok { continue } - activationAvg := scheduling.MinMaxScale( + activationAvg := lib.MinMaxScale( usage.AvgCPUUsagePct, s.Options.AvgCPUUsageLowerBound, s.Options.AvgCPUUsageUpperBound, s.Options.AvgCPUUsageActivationLowerBound, s.Options.AvgCPUUsageActivationUpperBound, ) - activationMax := scheduling.MinMaxScale( + activationMax := lib.MinMaxScale( usage.MaxCPUUsagePct, s.Options.MaxCPUUsageLowerBound, s.Options.MaxCPUUsageUpperBound, diff --git a/internal/scheduling/decisions/manila/plugins/weighers/netapp_cpu_usage_balancing_test.go b/internal/scheduling/manila/plugins/weighers/netapp_cpu_usage_balancing_test.go similarity index 56% rename from internal/scheduling/decisions/manila/plugins/weighers/netapp_cpu_usage_balancing_test.go rename to internal/scheduling/manila/plugins/weighers/netapp_cpu_usage_balancing_test.go index f78559b28..839d1d2a1 100644 --- a/internal/scheduling/decisions/manila/plugins/weighers/netapp_cpu_usage_balancing_test.go +++ b/internal/scheduling/manila/plugins/weighers/netapp_cpu_usage_balancing_test.go @@ -14,6 +14,83 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" ) +func TestNetappCPUUsageBalancingStepOpts_Validate(t *testing.T) { + tests := []struct { + name string + opts NetappCPUUsageBalancingStepOpts + expectError bool + }{ + { + name: "valid options with different bounds", + opts: NetappCPUUsageBalancingStepOpts{ + AvgCPUUsageLowerBound: 0.0, + AvgCPUUsageUpperBound: 100.0, + AvgCPUUsageActivationLowerBound: 0.0, + AvgCPUUsageActivationUpperBound: -1.0, + MaxCPUUsageLowerBound: 0.0, + MaxCPUUsageUpperBound: 100.0, + MaxCPUUsageActivationLowerBound: 0.0, + MaxCPUUsageActivationUpperBound: -1.0, + }, + expectError: false, + }, + { + name: "invalid - avg bounds equal", + opts: NetappCPUUsageBalancingStepOpts{ + AvgCPUUsageLowerBound: 50.0, + AvgCPUUsageUpperBound: 50.0, // Same as lower + AvgCPUUsageActivationLowerBound: 0.0, + AvgCPUUsageActivationUpperBound: -1.0, + MaxCPUUsageLowerBound: 0.0, + MaxCPUUsageUpperBound: 100.0, + MaxCPUUsageActivationLowerBound: 0.0, + MaxCPUUsageActivationUpperBound: -1.0, + }, + expectError: true, + }, + { + name: "invalid - max bounds equal", + opts: NetappCPUUsageBalancingStepOpts{ + AvgCPUUsageLowerBound: 0.0, + AvgCPUUsageUpperBound: 100.0, + AvgCPUUsageActivationLowerBound: 0.0, + AvgCPUUsageActivationUpperBound: -1.0, + MaxCPUUsageLowerBound: 75.0, + MaxCPUUsageUpperBound: 75.0, // Same as lower + MaxCPUUsageActivationLowerBound: 0.0, + MaxCPUUsageActivationUpperBound: -1.0, + }, + expectError: true, + }, + { + name: "invalid - both bounds equal", + opts: NetappCPUUsageBalancingStepOpts{ + AvgCPUUsageLowerBound: 0.0, + AvgCPUUsageUpperBound: 0.0, // Same as lower + AvgCPUUsageActivationLowerBound: 0.0, + AvgCPUUsageActivationUpperBound: -1.0, + MaxCPUUsageLowerBound: 0.0, + MaxCPUUsageUpperBound: 0.0, // Same as lower + MaxCPUUsageActivationLowerBound: 0.0, + MaxCPUUsageActivationUpperBound: -1.0, + }, + expectError: true, // First error is for avg bounds + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.opts.Validate() + if tt.expectError && err == nil { + t.Error("expected error, got nil") + } + if !tt.expectError && err != nil { + t.Errorf("expected no error, got %v", err) + } + }) + } +} + func TestNetappCPUUsageBalancingStep_Run(t *testing.T) { scheme, err := v1alpha1.SchemeBuilder.Build() if err != nil { diff --git a/internal/scheduling/manila/supported_filters.go b/internal/scheduling/manila/supported_filters.go new file mode 100644 index 000000000..ed86e3f5f --- /dev/null +++ b/internal/scheduling/manila/supported_filters.go @@ -0,0 +1,14 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package manila + +import ( + api "github.com/cobaltcore-dev/cortex/api/delegation/manila" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" +) + +type ManilaFilter = lib.Filter[api.ExternalSchedulerRequest] + +// Configuration of filters supported by the manila scheduler. +var supportedFilters = map[string]func() ManilaFilter{} diff --git a/internal/scheduling/manila/supported_weighers.go b/internal/scheduling/manila/supported_weighers.go new file mode 100644 index 000000000..3e9a5b6cb --- /dev/null +++ b/internal/scheduling/manila/supported_weighers.go @@ -0,0 +1,17 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package manila + +import ( + api "github.com/cobaltcore-dev/cortex/api/delegation/manila" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/manila/plugins/weighers" +) + +type ManilaWeigher = lib.Weigher[api.ExternalSchedulerRequest] + +// Configuration of weighers supported by the manila scheduler. +var supportedWeighers = map[string]func() ManilaWeigher{ + "netapp_cpu_usage_balancing": func() ManilaWeigher { return &weighers.NetappCPUUsageBalancingStep{} }, +} diff --git a/internal/scheduling/decisions/nova/cleanup.go b/internal/scheduling/nova/decisions_cleanup.go similarity index 97% rename from internal/scheduling/decisions/nova/cleanup.go rename to internal/scheduling/nova/decisions_cleanup.go index 5a6524450..57b52355d 100644 --- a/internal/scheduling/decisions/nova/cleanup.go +++ b/internal/scheduling/nova/decisions_cleanup.go @@ -20,7 +20,7 @@ import ( ) // Delete all decisions for nova servers that have been deleted. -func Cleanup(ctx context.Context, client client.Client, conf conf.Config) error { +func DecisionsCleanup(ctx context.Context, client client.Client, conf conf.Config) error { var authenticatedHTTP = http.DefaultClient if conf.SSOSecretRef != nil { var err error diff --git a/internal/scheduling/decisions/nova/cleanup_test.go b/internal/scheduling/nova/decisions_cleanup_test.go similarity index 98% rename from internal/scheduling/decisions/nova/cleanup_test.go rename to internal/scheduling/nova/decisions_cleanup_test.go index 6bfab7a00..414cfe0c4 100644 --- a/internal/scheduling/decisions/nova/cleanup_test.go +++ b/internal/scheduling/nova/decisions_cleanup_test.go @@ -342,7 +342,7 @@ func TestCleanupNova(t *testing.T) { Namespace: "default", }, } - err := Cleanup(context.Background(), client, config) + err := DecisionsCleanup(context.Background(), client, config) if tt.expectError && err == nil { t.Error("Expected error but got none") @@ -429,7 +429,7 @@ func TestCleanupNovaDecisionsCancel(t *testing.T) { defer cancel() // This should exit quickly due to context cancellation - if err := Cleanup(ctx, client, config); err != nil { + if err := DecisionsCleanup(ctx, client, config); err != nil { if !errors.Is(err, context.DeadlineExceeded) { t.Errorf("Unexpected error during cleanup: %v", err) } diff --git a/internal/scheduling/descheduling/nova/cleanup.go b/internal/scheduling/nova/deschedulings_cleanup.go similarity index 88% rename from internal/scheduling/descheduling/nova/cleanup.go rename to internal/scheduling/nova/deschedulings_cleanup.go index 46410ae44..bc8d09016 100644 --- a/internal/scheduling/descheduling/nova/cleanup.go +++ b/internal/scheduling/nova/deschedulings_cleanup.go @@ -17,10 +17,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" ) -type CleanupOnStartup struct{ *Cleanup } +type DeschedulingsCleanupOnStartup struct{ *DeschedulingsCleanup } // Cleanup all old deschedulings on controller startup. -func (s *CleanupOnStartup) Start(ctx context.Context) error { +func (s *DeschedulingsCleanupOnStartup) Start(ctx context.Context) error { log := logf.FromContext(ctx).WithName("ttl-startup-reconciler") log.Info("starting descheduling cleanup for existing resources") var resources v1alpha1.DeschedulingList @@ -52,14 +52,14 @@ func (s *CleanupOnStartup) Start(ctx context.Context) error { } // Removes old deschedulings. -type Cleanup struct { +type DeschedulingsCleanup struct { // Client for the kubernetes API. client.Client // Kubernetes scheme to use for the deschedulings. Scheme *runtime.Scheme } -func (r *Cleanup) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (r *DeschedulingsCleanup) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { log := logf.FromContext(ctx).WithName("cleanup") // Fetch the descheduling object @@ -91,8 +91,8 @@ func (r *Cleanup) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, return ctrl.Result{}, nil } -func (r *Cleanup) SetupWithManager(mgr ctrl.Manager, mcl *multicluster.Client) error { - if err := mgr.Add(&CleanupOnStartup{r}); err != nil { +func (r *DeschedulingsCleanup) SetupWithManager(mgr ctrl.Manager, mcl *multicluster.Client) error { + if err := mgr.Add(&DeschedulingsCleanupOnStartup{r}); err != nil { return err } return multicluster.BuildController(mcl, mgr). diff --git a/internal/scheduling/descheduling/nova/cleanup_test.go b/internal/scheduling/nova/deschedulings_cleanup_test.go similarity index 96% rename from internal/scheduling/descheduling/nova/cleanup_test.go rename to internal/scheduling/nova/deschedulings_cleanup_test.go index 6458c57a8..cf913442f 100644 --- a/internal/scheduling/descheduling/nova/cleanup_test.go +++ b/internal/scheduling/nova/deschedulings_cleanup_test.go @@ -158,7 +158,7 @@ func TestCleanup_Reconcile(t *testing.T) { WithObjects(tt.descheduling). Build() - cleanup := &Cleanup{ + cleanup := &DeschedulingsCleanup{ Client: fakeClient, Scheme: scheme, } @@ -220,7 +220,7 @@ func TestCleanup_Reconcile_NonexistentResource(t *testing.T) { fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() - cleanup := &Cleanup{ + cleanup := &DeschedulingsCleanup{ Client: fakeClient, Scheme: scheme, } @@ -305,13 +305,13 @@ func TestCleanupOnStartup_Start(t *testing.T) { WithObjects(objects...). Build() - cleanup := &Cleanup{ + cleanup := &DeschedulingsCleanup{ Client: fakeClient, Scheme: scheme, } - cleanupOnStartup := &CleanupOnStartup{ - Cleanup: cleanup, + cleanupOnStartup := &DeschedulingsCleanupOnStartup{ + DeschedulingsCleanup: cleanup, } ctx := context.Background() @@ -352,13 +352,13 @@ func TestCleanupOnStartup_Start_EmptyList(t *testing.T) { fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() - cleanup := &Cleanup{ + cleanup := &DeschedulingsCleanup{ Client: fakeClient, Scheme: scheme, } - cleanupOnStartup := &CleanupOnStartup{ - Cleanup: cleanup, + cleanupOnStartup := &DeschedulingsCleanupOnStartup{ + DeschedulingsCleanup: cleanup, } ctx := context.Background() diff --git a/internal/scheduling/descheduling/nova/executor.go b/internal/scheduling/nova/deschedulings_executor.go similarity index 95% rename from internal/scheduling/descheduling/nova/executor.go rename to internal/scheduling/nova/deschedulings_executor.go index aec78a6ee..1e1cd17e1 100644 --- a/internal/scheduling/descheduling/nova/executor.go +++ b/internal/scheduling/nova/deschedulings_executor.go @@ -9,6 +9,8 @@ import ( "time" "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/nova/plugins" "github.com/cobaltcore-dev/cortex/pkg/conf" "github.com/cobaltcore-dev/cortex/pkg/multicluster" @@ -24,7 +26,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" ) -type Executor struct { +type DeschedulingsExecutor struct { // Client for the kubernetes API. client.Client // Kubernetes scheme to use for the deschedulings. @@ -35,12 +37,12 @@ type Executor struct { // Configuration for the descheduler. Conf conf.Config // Monitor for tracking the descheduler execution. - Monitor Monitor + Monitor lib.DetectorMonitor[plugins.VMDetection] } // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. -func (e *Executor) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (e *DeschedulingsExecutor) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { log := logf.FromContext(ctx) descheduling := &v1alpha1.Descheduling{} @@ -251,9 +253,9 @@ func (e *Executor) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result return ctrl.Result{}, nil } -func (s *Executor) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { +func (s *DeschedulingsExecutor) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { return multicluster.BuildController(mcl, mgr). - Named("cortex-descheduler"). + Named("cortex-nova-deschedulings-executor"). For( &v1alpha1.Descheduling{}, // Only schedule machines that have the custom scheduler set. diff --git a/internal/scheduling/descheduling/nova/executor_test.go b/internal/scheduling/nova/deschedulings_executor_test.go similarity index 96% rename from internal/scheduling/descheduling/nova/executor_test.go rename to internal/scheduling/nova/deschedulings_executor_test.go index 744f8a7c8..54eace86d 100644 --- a/internal/scheduling/descheduling/nova/executor_test.go +++ b/internal/scheduling/nova/deschedulings_executor_test.go @@ -10,6 +10,8 @@ import ( "time" "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/nova/plugins" "github.com/cobaltcore-dev/cortex/pkg/conf" "k8s.io/apimachinery/pkg/api/meta" @@ -74,8 +76,8 @@ func (m *mockExecutorNovaAPI) GetServerMigrations(ctx context.Context, id string } // Create a zero-value Monitor for testing -func newMockMonitor() Monitor { - return Monitor{} +func newMockMonitor() lib.DetectorMonitor[plugins.VMDetection] { + return lib.DetectorMonitor[plugins.VMDetection]{} } func TestExecutor_Reconcile(t *testing.T) { @@ -331,7 +333,7 @@ func TestExecutor_Reconcile(t *testing.T) { WithStatusSubresource(&v1alpha1.Descheduling{}). Build() - executor := &Executor{ + executor := &DeschedulingsExecutor{ Client: client, Scheme: scheme, NovaAPI: tt.novaAPI, @@ -403,7 +405,7 @@ func TestExecutor_Reconcile(t *testing.T) { } } -func TestExecutor_ReconcileNotFound(t *testing.T) { +func TestDeschedulingsExecutor_ReconcileNotFound(t *testing.T) { scheme := runtime.NewScheme() err := v1alpha1.AddToScheme(scheme) if err != nil { @@ -411,7 +413,7 @@ func TestExecutor_ReconcileNotFound(t *testing.T) { } client := fake.NewClientBuilder().WithScheme(scheme).Build() - executor := &Executor{ + executor := &DeschedulingsExecutor{ Client: client, Scheme: scheme, NovaAPI: &mockExecutorNovaAPI{}, diff --git a/internal/scheduling/descheduling/nova/cycle_detector.go b/internal/scheduling/nova/detector_cycle_breaker.go similarity index 63% rename from internal/scheduling/descheduling/nova/cycle_detector.go rename to internal/scheduling/nova/detector_cycle_breaker.go index 1b501bddc..fd3a1bd00 100644 --- a/internal/scheduling/descheduling/nova/cycle_detector.go +++ b/internal/scheduling/nova/detector_cycle_breaker.go @@ -6,33 +6,27 @@ package nova import ( "context" - "github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova/plugins" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/nova/plugins" "github.com/cobaltcore-dev/cortex/pkg/conf" "sigs.k8s.io/controller-runtime/pkg/client" ) -type CycleDetector interface { - // Initialize the cycle detector with needed clients. - Init(ctx context.Context, client client.Client, conf conf.Config) error - // Filter descheduling decisions to avoid cycles. - Filter(ctx context.Context, decisions []plugins.Decision) ([]plugins.Decision, error) -} - -type cycleDetector struct { +type detectorCycleBreaker struct { // Nova API to get needed information for cycle detection. novaAPI NovaAPI } -func NewCycleDetector() CycleDetector { - return &cycleDetector{novaAPI: NewNovaAPI()} +func NewDetectorCycleBreaker() lib.DetectorCycleBreaker[plugins.VMDetection] { + return &detectorCycleBreaker{novaAPI: NewNovaAPI()} } // Initialize the cycle detector. -func (c *cycleDetector) Init(ctx context.Context, client client.Client, conf conf.Config) error { +func (c *detectorCycleBreaker) Init(ctx context.Context, client client.Client, conf conf.Config) error { return c.novaAPI.Init(ctx, client, conf) } -func (c *cycleDetector) Filter(ctx context.Context, decisions []plugins.Decision) ([]plugins.Decision, error) { +func (c *detectorCycleBreaker) Filter(ctx context.Context, decisions []plugins.VMDetection) ([]plugins.VMDetection, error) { keep := make(map[string]struct{}, len(decisions)) for _, decision := range decisions { // Get the migrations for the VM. @@ -59,7 +53,7 @@ func (c *cycleDetector) Filter(ctx context.Context, decisions []plugins.Decision keep[decision.VMID] = struct{}{} } } - var output []plugins.Decision + var output []plugins.VMDetection for _, decision := range decisions { if _, ok := keep[decision.VMID]; ok { output = append(output, decision) diff --git a/internal/scheduling/descheduling/nova/cycle_detector_test.go b/internal/scheduling/nova/detector_cycle_breaker_test.go similarity index 55% rename from internal/scheduling/descheduling/nova/cycle_detector_test.go rename to internal/scheduling/nova/detector_cycle_breaker_test.go index ca343c6b6..a242ff0d0 100644 --- a/internal/scheduling/descheduling/nova/cycle_detector_test.go +++ b/internal/scheduling/nova/detector_cycle_breaker_test.go @@ -8,29 +8,30 @@ import ( "errors" "testing" - "github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova/plugins" + "github.com/cobaltcore-dev/cortex/internal/scheduling/nova/plugins" "github.com/cobaltcore-dev/cortex/pkg/conf" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" ) -type mockCycleDetectorNovaAPI struct { +type mockDetectorCycleBreakerNovaAPI struct { migrations map[string][]migration getError error } -func (m *mockCycleDetectorNovaAPI) Init(ctx context.Context, client client.Client, conf conf.Config) error { +func (m *mockDetectorCycleBreakerNovaAPI) Init(ctx context.Context, client client.Client, conf conf.Config) error { return nil } -func (m *mockCycleDetectorNovaAPI) Get(ctx context.Context, id string) (server, error) { +func (m *mockDetectorCycleBreakerNovaAPI) Get(ctx context.Context, id string) (server, error) { return server{}, errors.New("not implemented") } -func (m *mockCycleDetectorNovaAPI) LiveMigrate(ctx context.Context, id string) error { +func (m *mockDetectorCycleBreakerNovaAPI) LiveMigrate(ctx context.Context, id string) error { return errors.New("not implemented") } -func (m *mockCycleDetectorNovaAPI) GetServerMigrations(ctx context.Context, id string) ([]migration, error) { +func (m *mockDetectorCycleBreakerNovaAPI) GetServerMigrations(ctx context.Context, id string) ([]migration, error) { if m.getError != nil { return nil, m.getError } @@ -40,17 +41,17 @@ func (m *mockCycleDetectorNovaAPI) GetServerMigrations(ctx context.Context, id s return []migration{}, nil } -func TestCycleDetector_Filter(t *testing.T) { +func TestDetectorCycleBreaker_Filter(t *testing.T) { tests := []struct { name string - decisions []plugins.Decision + decisions []plugins.VMDetection migrations map[string][]migration - expected []plugins.Decision + expected []plugins.VMDetection expectErr bool }{ { name: "no cycles - all decisions pass through", - decisions: []plugins.Decision{ + decisions: []plugins.VMDetection{ {VMID: "vm-1", Reason: "high CPU", Host: "host-a"}, {VMID: "vm-2", Reason: "high memory", Host: "host-b"}, }, @@ -62,14 +63,14 @@ func TestCycleDetector_Filter(t *testing.T) { {SourceCompute: "host-b", DestCompute: "host-c"}, }, }, - expected: []plugins.Decision{ + expected: []plugins.VMDetection{ {VMID: "vm-1", Reason: "high CPU", Host: "host-a"}, {VMID: "vm-2", Reason: "high memory", Host: "host-b"}, }, }, { name: "simple cycle detected - decision filtered out", - decisions: []plugins.Decision{ + decisions: []plugins.VMDetection{ {VMID: "vm-1", Reason: "high CPU", Host: "host-a"}, }, migrations: map[string][]migration{ @@ -78,11 +79,11 @@ func TestCycleDetector_Filter(t *testing.T) { {SourceCompute: "host-b", DestCompute: "host-a"}, // Cycle back to host-a }, }, - expected: []plugins.Decision{}, // Filtered out due to cycle + expected: []plugins.VMDetection{}, // Filtered out due to cycle }, { name: "three-hop cycle detected", - decisions: []plugins.Decision{ + decisions: []plugins.VMDetection{ {VMID: "vm-1", Reason: "high CPU", Host: "host-a"}, }, migrations: map[string][]migration{ @@ -92,11 +93,11 @@ func TestCycleDetector_Filter(t *testing.T) { {SourceCompute: "host-c", DestCompute: "host-a"}, // Cycle back to host-a }, }, - expected: []plugins.Decision{}, // Filtered out due to cycle + expected: []plugins.VMDetection{}, // Filtered out due to cycle }, { name: "mixed scenarios - some cycles, some not", - decisions: []plugins.Decision{ + decisions: []plugins.VMDetection{ {VMID: "vm-1", Reason: "high CPU", Host: "host-a"}, // Has cycle {VMID: "vm-2", Reason: "high memory", Host: "host-x"}, // No cycle {VMID: "vm-3", Reason: "high disk", Host: "host-y"}, // No migrations @@ -112,14 +113,14 @@ func TestCycleDetector_Filter(t *testing.T) { }, "vm-3": {}, // No migrations }, - expected: []plugins.Decision{ + expected: []plugins.VMDetection{ {VMID: "vm-2", Reason: "high memory", Host: "host-x"}, {VMID: "vm-3", Reason: "high disk", Host: "host-y"}, }, }, { name: "complex cycle with multiple hops", - decisions: []plugins.Decision{ + decisions: []plugins.VMDetection{ {VMID: "vm-1", Reason: "high CPU", Host: "host-a"}, }, migrations: map[string][]migration{ @@ -130,23 +131,23 @@ func TestCycleDetector_Filter(t *testing.T) { {SourceCompute: "host-d", DestCompute: "host-b"}, // Cycle to host-b (not host-a) }, }, - expected: []plugins.Decision{}, // Filtered out due to cycle + expected: []plugins.VMDetection{}, // Filtered out due to cycle }, { name: "no migrations - decision passes through", - decisions: []plugins.Decision{ + decisions: []plugins.VMDetection{ {VMID: "vm-1", Reason: "high CPU", Host: "host-a"}, }, migrations: map[string][]migration{ "vm-1": {}, // No migrations }, - expected: []plugins.Decision{ + expected: []plugins.VMDetection{ {VMID: "vm-1", Reason: "high CPU", Host: "host-a"}, }, }, { name: "single migration - no cycle possible", - decisions: []plugins.Decision{ + decisions: []plugins.VMDetection{ {VMID: "vm-1", Reason: "high CPU", Host: "host-a"}, }, migrations: map[string][]migration{ @@ -154,13 +155,13 @@ func TestCycleDetector_Filter(t *testing.T) { {SourceCompute: "host-a", DestCompute: "host-b"}, }, }, - expected: []plugins.Decision{ + expected: []plugins.VMDetection{ {VMID: "vm-1", Reason: "high CPU", Host: "host-a"}, }, }, { name: "API error when getting migrations", - decisions: []plugins.Decision{ + decisions: []plugins.VMDetection{ {VMID: "vm-1", Reason: "high CPU", Host: "host-a"}, }, migrations: map[string][]migration{}, @@ -170,7 +171,7 @@ func TestCycleDetector_Filter(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - mockAPI := &mockCycleDetectorNovaAPI{ + mockAPI := &mockDetectorCycleBreakerNovaAPI{ migrations: tt.migrations, } @@ -178,7 +179,7 @@ func TestCycleDetector_Filter(t *testing.T) { mockAPI.getError = errors.New("API error") } - detector := cycleDetector{novaAPI: mockAPI} + detector := detectorCycleBreaker{novaAPI: mockAPI} ctx := context.Background() result, err := detector.Filter(ctx, tt.decisions) @@ -201,41 +202,41 @@ func TestCycleDetector_Filter(t *testing.T) { } // Check if all expected decisions are present - expectedMap := make(map[string]plugins.Decision) + expectedMap := make(map[string]plugins.VMDetection) for _, d := range tt.expected { expectedMap[d.VMID] = d } - for _, resultDecision := range result { - expectedDecision, found := expectedMap[resultDecision.VMID] + for _, resultVMDetection := range result { + expectedVMDetection, found := expectedMap[resultVMDetection.VMID] if !found { - t.Errorf("unexpected decision for VM %s", resultDecision.VMID) + t.Errorf("unexpected decision for VM %s", resultVMDetection.VMID) continue } - if resultDecision.Reason != expectedDecision.Reason { + if resultVMDetection.Reason != expectedVMDetection.Reason { t.Errorf("expected reason %s for VM %s, got %s", - expectedDecision.Reason, resultDecision.VMID, resultDecision.Reason) + expectedVMDetection.Reason, resultVMDetection.VMID, resultVMDetection.Reason) } - if resultDecision.Host != expectedDecision.Host { + if resultVMDetection.Host != expectedVMDetection.Host { t.Errorf("expected host %s for VM %s, got %s", - expectedDecision.Host, resultDecision.VMID, resultDecision.Host) + expectedVMDetection.Host, resultVMDetection.VMID, resultVMDetection.Host) } } }) } } -func TestCycleDetector_Filter_EmptyDecisions(t *testing.T) { - mockAPI := &mockCycleDetectorNovaAPI{ +func TestDetectorCycleBreaker_Filter_EmptyVMDetections(t *testing.T) { + mockAPI := &mockDetectorCycleBreakerNovaAPI{ migrations: map[string][]migration{}, } - detector := cycleDetector{novaAPI: mockAPI} + detector := detectorCycleBreaker{novaAPI: mockAPI} ctx := context.Background() - result, err := detector.Filter(ctx, []plugins.Decision{}) + result, err := detector.Filter(ctx, []plugins.VMDetection{}) if err != nil { t.Errorf("unexpected error: %v", err) @@ -245,3 +246,77 @@ func TestCycleDetector_Filter_EmptyDecisions(t *testing.T) { t.Errorf("expected empty result for empty input, got %d decisions", len(result)) } } + +func TestNewDetectorCycleBreaker(t *testing.T) { + detector := NewDetectorCycleBreaker() + + if detector == nil { + t.Fatal("expected non-nil detector") + } + + // Verify it's the correct type + _, ok := detector.(*detectorCycleBreaker) + if !ok { + t.Errorf("expected *detectorCycleBreaker, got %T", detector) + } + + // Verify the novaAPI field is initialized + detectorImpl := detector.(*detectorCycleBreaker) + if detectorImpl.novaAPI == nil { + t.Error("expected novaAPI to be initialized") + } +} + +func TestDetectorCycleBreaker_Init(t *testing.T) { + tests := []struct { + name string + setupMock func() NovaAPI + expectErr bool + }{ + { + name: "successful initialization", + setupMock: func() NovaAPI { + return &mockDetectorCycleBreakerNovaAPI{} + }, + expectErr: false, + }, + { + name: "initialization with error", + setupMock: func() NovaAPI { + return &mockDetectorCycleBreakerNovaAPIWithInitError{} + }, + expectErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + detector := &detectorCycleBreaker{ + novaAPI: tt.setupMock(), + } + + ctx := context.Background() + fakeClient := fake.NewClientBuilder().Build() + cfg := conf.Config{} + + err := detector.Init(ctx, fakeClient, cfg) + + if tt.expectErr && err == nil { + t.Error("expected error but got none") + } + + if !tt.expectErr && err != nil { + t.Errorf("unexpected error: %v", err) + } + }) + } +} + +// mockDetectorCycleBreakerNovaAPIWithInitError is a mock that returns an error on Init +type mockDetectorCycleBreakerNovaAPIWithInitError struct { + mockDetectorCycleBreakerNovaAPI +} + +func (m *mockDetectorCycleBreakerNovaAPIWithInitError) Init(ctx context.Context, client client.Client, conf conf.Config) error { + return errors.New("init error") +} diff --git a/internal/scheduling/descheduling/nova/pipeline_controller.go b/internal/scheduling/nova/detector_pipeline_controller.go similarity index 52% rename from internal/scheduling/descheduling/nova/pipeline_controller.go rename to internal/scheduling/nova/detector_pipeline_controller.go index 4a357a5ed..7bd773245 100644 --- a/internal/scheduling/descheduling/nova/pipeline_controller.go +++ b/internal/scheduling/nova/detector_pipeline_controller.go @@ -11,6 +11,7 @@ import ( "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/nova/plugins" "github.com/cobaltcore-dev/cortex/pkg/conf" "github.com/cobaltcore-dev/cortex/pkg/multicluster" "github.com/sapcc/go-bits/jobloop" @@ -28,35 +29,42 @@ import ( // // Additionally, the controller watches for pipeline and step changes to // reconfigure the pipelines as needed. -type DeschedulingsPipelineController struct { +type DetectorPipelineController struct { // Toolbox shared between all pipeline controllers. - lib.BasePipelineController[*Pipeline] + lib.BasePipelineController[*lib.DetectorPipeline[plugins.VMDetection]] // Monitor to pass down to all pipelines. - Monitor Monitor + Monitor lib.DetectorPipelineMonitor // Config for the scheduling operator. Conf conf.Config // Cycle detector to avoid descheduling loops. - CycleDetector CycleDetector + DetectorCycleBreaker lib.DetectorCycleBreaker[plugins.VMDetection] } // The type of pipeline this controller manages. -func (c *DeschedulingsPipelineController) PipelineType() v1alpha1.PipelineType { - return v1alpha1.PipelineTypeDescheduler +func (c *DetectorPipelineController) PipelineType() v1alpha1.PipelineType { + return v1alpha1.PipelineTypeDetector } // The base controller will delegate the pipeline creation down to this method. -func (c *DeschedulingsPipelineController) InitPipeline(ctx context.Context, p v1alpha1.Pipeline) (*Pipeline, error) { - pipeline := &Pipeline{ - Client: c.Client, - CycleDetector: c.CycleDetector, - Monitor: c.Monitor.SubPipeline(p.Name), +func (c *DetectorPipelineController) InitPipeline( + ctx context.Context, + p v1alpha1.Pipeline, +) lib.PipelineInitResult[*lib.DetectorPipeline[plugins.VMDetection]] { + + pipeline := &lib.DetectorPipeline[plugins.VMDetection]{ + Client: c.Client, + DetectorCycleBreaker: c.DetectorCycleBreaker, + Monitor: c.Monitor.SubPipeline(p.Name), + } + errs := pipeline.Init(ctx, p.Spec.Detectors, supportedDetectors) + return lib.PipelineInitResult[*lib.DetectorPipeline[plugins.VMDetection]]{ + Pipeline: pipeline, + DetectorErrors: errs, } - err := pipeline.Init(ctx, p.Spec.Steps, supportedSteps) - return pipeline, err } -func (c *DeschedulingsPipelineController) CreateDeschedulingsPeriodically(ctx context.Context) { +func (c *DetectorPipelineController) CreateDeschedulingsPeriodically(ctx context.Context) { for { select { case <-ctx.Done(): @@ -70,25 +78,64 @@ func (c *DeschedulingsPipelineController) CreateDeschedulingsPeriodically(ctx co time.Sleep(jobloop.DefaultJitter(time.Minute)) continue } - if err := p.createDeschedulings(ctx); err != nil { - slog.Error("descheduler: failed to create deschedulings", "error", err) + decisionsByStep := p.Run() + if len(decisionsByStep) == 0 { + slog.Info("descheduler: no decisions made in this run") + time.Sleep(jobloop.DefaultJitter(time.Minute)) + continue + } + slog.Info("descheduler: decisions made", "decisionsByStep", decisionsByStep) + decisions := p.Combine(decisionsByStep) + var err error + decisions, err = p.DetectorCycleBreaker.Filter(ctx, decisions) + if err != nil { + slog.Error("descheduler: failed to filter decisions for cycles", "error", err) + time.Sleep(jobloop.DefaultJitter(time.Minute)) + continue + } + for _, decision := range decisions { + // Precaution: If a descheduling for the VM already exists, skip it. + // The TTL controller will clean up old deschedulings so the vm + // can be descheduled again later if needed, or we can manually + // delete the descheduling if we want to deschedule the VM again. + var existing v1alpha1.Descheduling + err := p.Get(ctx, client.ObjectKey{Name: decision.VMID}, &existing) + if err == nil { + slog.Info("descheduler: descheduling already exists for VM, skipping", "vmId", decision.VMID) + continue + } + + descheduling := &v1alpha1.Descheduling{} + descheduling.Name = decision.VMID + descheduling.Spec.Ref = decision.VMID + descheduling.Spec.RefType = v1alpha1.DeschedulingSpecVMReferenceNovaServerUUID + descheduling.Spec.PrevHostType = v1alpha1.DeschedulingSpecHostTypeNovaComputeHostName + descheduling.Spec.PrevHost = decision.Host + descheduling.Spec.Reason = decision.Reason + if err := p.Create(ctx, descheduling); err != nil { + slog.Error("descheduler: failed to create descheduling", "error", err) + time.Sleep(jobloop.DefaultJitter(time.Minute)) + continue + } + slog.Info("descheduler: created descheduling", "vmId", decision.VMID, "host", decision.Host, "reason", decision.Reason) } + time.Sleep(jobloop.DefaultJitter(time.Minute)) } } } -func (c *DeschedulingsPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (c *DetectorPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { // This controller does not reconcile any resources directly. return ctrl.Result{}, nil } -func (c *DeschedulingsPipelineController) SetupWithManager(mgr ctrl.Manager, mcl *multicluster.Client) error { +func (c *DetectorPipelineController) SetupWithManager(mgr ctrl.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainNova if err := mgr.Add(manager.RunnableFunc(func(ctx context.Context) error { // Initialize the cycle detector. - return c.CycleDetector.Init(ctx, mgr.GetClient(), c.Conf) + return c.DetectorCycleBreaker.Init(ctx, mgr.GetClient(), c.Conf) })); err != nil { return err } diff --git a/internal/scheduling/nova/detector_pipeline_controller_test.go b/internal/scheduling/nova/detector_pipeline_controller_test.go new file mode 100644 index 000000000..29df28631 --- /dev/null +++ b/internal/scheduling/nova/detector_pipeline_controller_test.go @@ -0,0 +1,132 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package nova + +import ( + "context" + "testing" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/nova/plugins" + "github.com/cobaltcore-dev/cortex/pkg/conf" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +type mockDetectorCycleBreaker struct{} + +func (m *mockDetectorCycleBreaker) Init(ctx context.Context, client client.Client, conf conf.Config) error { + return nil +} + +func (m *mockDetectorCycleBreaker) Filter(ctx context.Context, decisions []plugins.VMDetection) ([]plugins.VMDetection, error) { + return decisions, nil +} + +type mockControllerStep struct{} + +func (m *mockControllerStep) Run() ([]plugins.VMDetection, error) { + return nil, nil +} +func (m *mockControllerStep) Init(ctx context.Context, client client.Client, step v1alpha1.DetectorSpec) error { + return nil +} + +func TestDetectorPipelineController_InitPipeline(t *testing.T) { + tests := []struct { + name string + steps []v1alpha1.DetectorSpec + expectNonCriticalError bool + }{ + { + name: "successful pipeline initialization", + steps: []v1alpha1.DetectorSpec{ + { + Name: "mock-step", + }, + }, + expectNonCriticalError: false, + }, + { + name: "unsupported step", + steps: []v1alpha1.DetectorSpec{ + { + Name: "unsupported", + }, + }, + expectNonCriticalError: true, + }, + { + name: "empty steps", + steps: []v1alpha1.DetectorSpec{}, + expectNonCriticalError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + controller := &DetectorPipelineController{ + Monitor: lib.NewDetectorPipelineMonitor(), + DetectorCycleBreaker: &mockDetectorCycleBreaker{}, + } + + pipeline := lib.DetectorPipeline[plugins.VMDetection]{ + DetectorCycleBreaker: controller.DetectorCycleBreaker, + Monitor: controller.Monitor, + } + errs := pipeline.Init(t.Context(), tt.steps, map[string]lib.Detector[plugins.VMDetection]{ + "mock-step": &mockControllerStep{}, + }) + + if tt.expectNonCriticalError { + if len(errs) == 0 { + t.Errorf("expected non-critical error, got none") + } + } else { + if len(errs) > 0 { + t.Errorf("unexpected non-critical error: %v", errs) + } + } + + if pipeline.DetectorCycleBreaker != controller.DetectorCycleBreaker { + t.Error("expected pipeline to have cycle detector set") + } + + if pipeline.Monitor != controller.Monitor { + t.Error("expected pipeline to have monitor set") + } + }) + } +} + +func TestDetectorPipelineController_Reconcile(t *testing.T) { + scheme := runtime.NewScheme() + err := v1alpha1.AddToScheme(scheme) + if err != nil { + t.Fatalf("Failed to add v1alpha1 scheme: %v", err) + } + + client := fake.NewClientBuilder().WithScheme(scheme).Build() + + controller := &DetectorPipelineController{ + BasePipelineController: lib.BasePipelineController[*lib.DetectorPipeline[plugins.VMDetection]]{ + Client: client, + }, + } + + req := ctrl.Request{} + result, err := controller.Reconcile(t.Context(), req) + + if err != nil { + t.Errorf("unexpected error: %v", err) + } + + if result.RequeueAfter > 0 { + t.Error("expected no requeue") + } +} diff --git a/internal/scheduling/e2e/nova/checks.go b/internal/scheduling/nova/e2e_checks.go similarity index 100% rename from internal/scheduling/e2e/nova/checks.go rename to internal/scheduling/nova/e2e_checks.go diff --git a/internal/scheduling/external/nova/api.go b/internal/scheduling/nova/external_scheduler_api.go similarity index 99% rename from internal/scheduling/external/nova/api.go rename to internal/scheduling/nova/external_scheduler_api.go index 36a6a3ca3..26083c481 100644 --- a/internal/scheduling/external/nova/api.go +++ b/internal/scheduling/nova/external_scheduler_api.go @@ -1,7 +1,7 @@ // Copyright SAP SE // SPDX-License-Identifier: Apache-2.0 -package http +package nova import ( "bytes" diff --git a/internal/scheduling/external/nova/api_test.go b/internal/scheduling/nova/external_scheduler_api_test.go similarity index 65% rename from internal/scheduling/external/nova/api_test.go rename to internal/scheduling/nova/external_scheduler_api_test.go index 41043d496..6ac7706ed 100644 --- a/internal/scheduling/external/nova/api_test.go +++ b/internal/scheduling/nova/external_scheduler_api_test.go @@ -1,7 +1,7 @@ // Copyright SAP SE // SPDX-License-Identifier: Apache-2.0 -package http +package nova import ( "bytes" @@ -393,3 +393,208 @@ func TestHTTPAPI_NovaExternalScheduler_DecisionCreation(t *testing.T) { t.Error("NovaRaw should not be nil") } } + +func TestHTTPAPI_inferPipelineName(t *testing.T) { + config := conf.Config{SchedulingDomain: "test-operator"} + delegate := &mockHTTPAPIDelegate{} + api := NewAPI(config, delegate).(*httpAPI) + + tests := []struct { + name string + requestData novaapi.ExternalSchedulerRequest + expectedResult string + expectErr bool + errContains string + }{ + { + name: "qemu hypervisor without reservation", + requestData: novaapi.ExternalSchedulerRequest{ + Spec: novaapi.NovaObject[novaapi.NovaSpec]{ + Data: novaapi.NovaSpec{ + Flavor: novaapi.NovaObject[novaapi.NovaFlavor]{ + Data: novaapi.NovaFlavor{ + ExtraSpecs: map[string]string{ + "capabilities:hypervisor_type": "qemu", + }, + }, + }, + }, + }, + Reservation: false, + }, + expectedResult: "nova-external-scheduler-kvm", + expectErr: false, + }, + { + name: "qemu hypervisor with reservation", + requestData: novaapi.ExternalSchedulerRequest{ + Spec: novaapi.NovaObject[novaapi.NovaSpec]{ + Data: novaapi.NovaSpec{ + Flavor: novaapi.NovaObject[novaapi.NovaFlavor]{ + Data: novaapi.NovaFlavor{ + ExtraSpecs: map[string]string{ + "capabilities:hypervisor_type": "qemu", + }, + }, + }, + }, + }, + Reservation: true, + }, + expectedResult: "nova-external-scheduler-kvm-all-filters-enabled", + expectErr: false, + }, + { + name: "QEMU hypervisor uppercase", + requestData: novaapi.ExternalSchedulerRequest{ + Spec: novaapi.NovaObject[novaapi.NovaSpec]{ + Data: novaapi.NovaSpec{ + Flavor: novaapi.NovaObject[novaapi.NovaFlavor]{ + Data: novaapi.NovaFlavor{ + ExtraSpecs: map[string]string{ + "capabilities:hypervisor_type": "QEMU", + }, + }, + }, + }, + }, + Reservation: false, + }, + expectedResult: "nova-external-scheduler-kvm", + expectErr: false, + }, + { + name: "ch hypervisor without reservation", + requestData: novaapi.ExternalSchedulerRequest{ + Spec: novaapi.NovaObject[novaapi.NovaSpec]{ + Data: novaapi.NovaSpec{ + Flavor: novaapi.NovaObject[novaapi.NovaFlavor]{ + Data: novaapi.NovaFlavor{ + ExtraSpecs: map[string]string{ + "capabilities:hypervisor_type": "ch", + }, + }, + }, + }, + }, + Reservation: false, + }, + expectedResult: "nova-external-scheduler-kvm", + expectErr: false, + }, + { + name: "ch hypervisor with reservation", + requestData: novaapi.ExternalSchedulerRequest{ + Spec: novaapi.NovaObject[novaapi.NovaSpec]{ + Data: novaapi.NovaSpec{ + Flavor: novaapi.NovaObject[novaapi.NovaFlavor]{ + Data: novaapi.NovaFlavor{ + ExtraSpecs: map[string]string{ + "capabilities:hypervisor_type": "ch", + }, + }, + }, + }, + }, + Reservation: true, + }, + expectedResult: "nova-external-scheduler-kvm-all-filters-enabled", + expectErr: false, + }, + { + name: "vmware hypervisor without reservation", + requestData: novaapi.ExternalSchedulerRequest{ + Spec: novaapi.NovaObject[novaapi.NovaSpec]{ + Data: novaapi.NovaSpec{ + Flavor: novaapi.NovaObject[novaapi.NovaFlavor]{ + Data: novaapi.NovaFlavor{ + ExtraSpecs: map[string]string{ + "capabilities:hypervisor_type": "VMware vCenter Server", + }, + }, + }, + }, + }, + Reservation: false, + }, + expectedResult: "nova-external-scheduler-vmware", + expectErr: false, + }, + { + name: "vmware hypervisor with reservation - error", + requestData: novaapi.ExternalSchedulerRequest{ + Spec: novaapi.NovaObject[novaapi.NovaSpec]{ + Data: novaapi.NovaSpec{ + Flavor: novaapi.NovaObject[novaapi.NovaFlavor]{ + Data: novaapi.NovaFlavor{ + ExtraSpecs: map[string]string{ + "capabilities:hypervisor_type": "VMware vCenter Server", + }, + }, + }, + }, + }, + Reservation: true, + }, + expectErr: true, + errContains: "reservations are not supported on vmware hypervisors", + }, + { + name: "missing hypervisor_type", + requestData: novaapi.ExternalSchedulerRequest{ + Spec: novaapi.NovaObject[novaapi.NovaSpec]{ + Data: novaapi.NovaSpec{ + Flavor: novaapi.NovaObject[novaapi.NovaFlavor]{ + Data: novaapi.NovaFlavor{ + ExtraSpecs: map[string]string{}, + }, + }, + }, + }, + Reservation: false, + }, + expectErr: true, + errContains: "missing hypervisor_type", + }, + { + name: "unsupported hypervisor_type", + requestData: novaapi.ExternalSchedulerRequest{ + Spec: novaapi.NovaObject[novaapi.NovaSpec]{ + Data: novaapi.NovaSpec{ + Flavor: novaapi.NovaObject[novaapi.NovaFlavor]{ + Data: novaapi.NovaFlavor{ + ExtraSpecs: map[string]string{ + "capabilities:hypervisor_type": "unknown-hypervisor", + }, + }, + }, + }, + }, + Reservation: false, + }, + expectErr: true, + errContains: "unsupported hypervisor_type", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := api.inferPipelineName(tt.requestData) + + if tt.expectErr { + if err == nil { + t.Error("expected error but got none") + } else if tt.errContains != "" && !strings.Contains(err.Error(), tt.errContains) { + t.Errorf("expected error to contain '%s', got '%s'", tt.errContains, err.Error()) + } + } else { + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if result != tt.expectedResult { + t.Errorf("expected pipeline name '%s', got '%s'", tt.expectedResult, result) + } + } + }) + } +} diff --git a/internal/scheduling/decisions/nova/pipeline_controller.go b/internal/scheduling/nova/filter_weigher_pipeline_controller.go similarity index 86% rename from internal/scheduling/decisions/nova/pipeline_controller.go rename to internal/scheduling/nova/filter_weigher_pipeline_controller.go index da088a6e8..8d4b29c6c 100644 --- a/internal/scheduling/decisions/nova/pipeline_controller.go +++ b/internal/scheduling/nova/filter_weigher_pipeline_controller.go @@ -34,26 +34,26 @@ import ( // // Additionally, the controller watches for pipeline and step changes to // reconfigure the pipelines as needed. -type DecisionPipelineController struct { +type FilterWeigherPipelineController struct { // Toolbox shared between all pipeline controllers. - lib.BasePipelineController[lib.Pipeline[api.ExternalSchedulerRequest]] + lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]] // Mutex to only allow one process at a time processMu sync.Mutex // Monitor to pass down to all pipelines. - Monitor lib.PipelineMonitor + Monitor lib.FilterWeigherPipelineMonitor // Config for the scheduling operator. Conf conf.Config } // The type of pipeline this controller manages. -func (c *DecisionPipelineController) PipelineType() v1alpha1.PipelineType { +func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { return v1alpha1.PipelineTypeFilterWeigher } // Callback executed when kubernetes asks to reconcile a decision resource. -func (c *DecisionPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { c.processMu.Lock() defer c.processMu.Unlock() @@ -73,7 +73,7 @@ func (c *DecisionPipelineController) Reconcile(ctx context.Context, req ctrl.Req } // Process the decision from the API. Should create and return the updated decision. -func (c *DecisionPipelineController) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { +func (c *FilterWeigherPipelineController) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { c.processMu.Lock() defer c.processMu.Unlock() @@ -112,7 +112,7 @@ func (c *DecisionPipelineController) ProcessNewDecisionFromAPI(ctx context.Conte return err } -func (c *DecisionPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { +func (c *FilterWeigherPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { log := ctrl.LoggerFrom(ctx) startedAt := time.Now() // So we can measure sync duration. @@ -148,15 +148,20 @@ func (c *DecisionPipelineController) process(ctx context.Context, decision *v1al } // The base controller will delegate the pipeline creation down to this method. -func (c *DecisionPipelineController) InitPipeline( +func (c *FilterWeigherPipelineController) InitPipeline( ctx context.Context, p v1alpha1.Pipeline, -) (lib.Pipeline[api.ExternalSchedulerRequest], error) { - - return lib.NewPipeline(ctx, c.Client, p.Name, supportedSteps, p.Spec.Steps, c.Monitor) +) lib.PipelineInitResult[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]] { + + return lib.InitNewFilterWeigherPipeline( + ctx, c.Client, p.Name, + supportedFilters, p.Spec.Filters, + supportedWeighers, p.Spec.Weighers, + c.Monitor, + ) } -func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { +func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainNova if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { diff --git a/internal/scheduling/decisions/nova/pipeline_controller_test.go b/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go similarity index 79% rename from internal/scheduling/decisions/nova/pipeline_controller_test.go rename to internal/scheduling/nova/filter_weigher_pipeline_controller_test.go index fa8ab962c..7db3faec3 100644 --- a/internal/scheduling/decisions/nova/pipeline_controller_test.go +++ b/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go @@ -24,7 +24,7 @@ import ( "github.com/cobaltcore-dev/cortex/pkg/conf" ) -func TestDecisionPipelineController_Reconcile(t *testing.T) { +func TestFilterWeigherPipelineController_Reconcile(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add v1alpha1 scheme: %v", err) @@ -92,7 +92,8 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, expectError: false, @@ -120,7 +121,8 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, expectError: true, @@ -171,7 +173,8 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, expectError: true, @@ -192,28 +195,28 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { WithStatusSubresource(&v1alpha1.Decision{}). Build() - controller := &DecisionPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.Pipeline[api.ExternalSchedulerRequest]]{ + controller := &FilterWeigherPipelineController{ + BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ Client: client, - Pipelines: make(map[string]lib.Pipeline[api.ExternalSchedulerRequest]), + Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), }, - Monitor: lib.PipelineMonitor{}, + Monitor: lib.FilterWeigherPipelineMonitor{}, Conf: conf.Config{ SchedulingDomain: v1alpha1.SchedulingDomainNova, }, } if tt.pipeline != nil { - pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + initResult := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: tt.pipeline.Name, }, Spec: tt.pipeline.Spec, }) - if err != nil { - t.Fatalf("Failed to init pipeline: %v", err) + if len(initResult.FilterErrors) > 0 || len(initResult.WeigherErrors) > 0 { + t.Fatalf("Failed to initialize pipeline: filter errors: %v, weigher errors: %v", initResult.FilterErrors, initResult.WeigherErrors) } - controller.Pipelines[tt.pipeline.Name] = pipeline + controller.Pipelines[tt.pipeline.Name] = initResult.Pipeline } req := ctrl.Request{ @@ -254,94 +257,102 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { } } -func TestDecisionPipelineController_InitPipeline(t *testing.T) { - controller := &DecisionPipelineController{ - Monitor: lib.PipelineMonitor{}, +func TestFilterWeigherPipelineController_InitPipeline(t *testing.T) { + controller := &FilterWeigherPipelineController{ + Monitor: lib.FilterWeigherPipelineMonitor{}, } tests := []struct { - name string - steps []v1alpha1.StepSpec - expectError bool + name string + filters []v1alpha1.FilterSpec + weighers []v1alpha1.WeigherSpec + expectNonCriticalError bool + expectCriticalError bool }{ { - name: "empty steps", - steps: []v1alpha1.StepSpec{}, - expectError: false, + name: "empty steps", + filters: []v1alpha1.FilterSpec{}, + weighers: []v1alpha1.WeigherSpec{}, + expectNonCriticalError: false, + expectCriticalError: false, }, { name: "supported step", - steps: []v1alpha1.StepSpec{ + filters: []v1alpha1.FilterSpec{ { - Type: v1alpha1.StepTypeFilter, - Impl: "filter_status_conditions", + Name: "filter_status_conditions", }, }, - expectError: false, + expectNonCriticalError: false, + expectCriticalError: false, }, { name: "unsupported step", - steps: []v1alpha1.StepSpec{ + filters: []v1alpha1.FilterSpec{ { - Type: v1alpha1.StepTypeFilter, - Impl: "unsupported-plugin", + Name: "unsupported-plugin", }, }, - expectError: true, + expectNonCriticalError: false, + expectCriticalError: true, }, { name: "step with scoping options", - steps: []v1alpha1.StepSpec{ + filters: []v1alpha1.FilterSpec{ { - Type: v1alpha1.StepTypeFilter, - Impl: "filter_status_conditions", - Opts: runtime.RawExtension{ + Name: "filter_status_conditions", + Params: runtime.RawExtension{ Raw: []byte(`{"scope":{"host_capabilities":{"any_of_trait_infixes":["TEST_TRAIT"]}}}`), }, }, }, - expectError: false, + expectNonCriticalError: false, + expectCriticalError: false, }, { name: "step with invalid scoping options", - steps: []v1alpha1.StepSpec{ + filters: []v1alpha1.FilterSpec{ { - Type: v1alpha1.StepTypeFilter, - Impl: "filter_status_conditions", - Opts: runtime.RawExtension{ + Name: "filter_status_conditions", + Params: runtime.RawExtension{ Raw: []byte(`invalid json`), }, }, }, - expectError: true, + expectNonCriticalError: false, + expectCriticalError: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + initResult := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pipeline", }, Spec: v1alpha1.PipelineSpec{ - Steps: tt.steps, + Filters: tt.filters, + Weighers: tt.weighers, }, }) - if tt.expectError && err == nil { - t.Error("Expected error but got none") + if tt.expectCriticalError && len(initResult.FilterErrors) == 0 { + t.Error("Expected critical error but got none") } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) + if !tt.expectCriticalError && len(initResult.FilterErrors) > 0 { + t.Errorf("Unexpected critical errors: %v", initResult.FilterErrors) + } + if tt.expectNonCriticalError && len(initResult.WeigherErrors) == 0 { + t.Error("Expected non-critical error but got none") } - if !tt.expectError && pipeline == nil { - t.Error("Expected pipeline but got nil") + if !tt.expectNonCriticalError && len(initResult.WeigherErrors) > 0 { + t.Errorf("Unexpected non-critical errors: %v", initResult.WeigherErrors) } }) } } -func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { +func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add v1alpha1 scheme: %v", err) @@ -416,7 +427,8 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, CreateDecisions: true, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, pipelineConf: &v1alpha1.Pipeline{ @@ -427,7 +439,8 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, CreateDecisions: true, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, setupPipelineConfigs: true, @@ -462,7 +475,8 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, CreateDecisions: false, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, pipelineConf: &v1alpha1.Pipeline{ @@ -473,7 +487,8 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, CreateDecisions: false, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, setupPipelineConfigs: true, @@ -532,7 +547,8 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, CreateDecisions: true, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, pipelineConf: &v1alpha1.Pipeline{ @@ -543,7 +559,8 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, CreateDecisions: true, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, setupPipelineConfigs: true, @@ -580,7 +597,8 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, CreateDecisions: true, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, setupPipelineConfigs: true, @@ -617,7 +635,8 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, CreateDecisions: true, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, setupPipelineConfigs: true, @@ -643,13 +662,13 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { WithStatusSubresource(&v1alpha1.Decision{}). Build() - controller := &DecisionPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.Pipeline[api.ExternalSchedulerRequest]]{ + controller := &FilterWeigherPipelineController{ + BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ Client: client, - Pipelines: make(map[string]lib.Pipeline[api.ExternalSchedulerRequest]), + Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), PipelineConfigs: make(map[string]v1alpha1.Pipeline), }, - Monitor: lib.PipelineMonitor{}, + Monitor: lib.FilterWeigherPipelineMonitor{}, Conf: conf.Config{ SchedulingDomain: v1alpha1.SchedulingDomainNova, }, @@ -662,16 +681,16 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { // Setup runtime pipeline if needed if tt.pipeline != nil { - pipeline, err := controller.InitPipeline(context.Background(), v1alpha1.Pipeline{ + initResult := controller.InitPipeline(context.Background(), v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: tt.pipeline.Name, }, Spec: tt.pipeline.Spec, }) - if err != nil { - t.Fatalf("Failed to init pipeline: %v", err) + if len(initResult.FilterErrors) > 0 || len(initResult.WeigherErrors) > 0 { + t.Fatalf("Failed to initialize pipeline: filter errors: %v, weigher errors: %v", initResult.FilterErrors, initResult.WeigherErrors) } - controller.Pipelines[tt.pipeline.Name] = pipeline + controller.Pipelines[tt.pipeline.Name] = initResult.Pipeline } // Call the method under test diff --git a/internal/scheduling/descheduling/nova/nova_api.go b/internal/scheduling/nova/nova_api.go similarity index 100% rename from internal/scheduling/descheduling/nova/nova_api.go rename to internal/scheduling/nova/nova_api.go diff --git a/internal/scheduling/descheduling/nova/nova_api_test.go b/internal/scheduling/nova/nova_api_test.go similarity index 100% rename from internal/scheduling/descheduling/nova/nova_api_test.go rename to internal/scheduling/nova/nova_api_test.go diff --git a/internal/scheduling/descheduling/nova/plugins/kvm/avoid_high_steal_pct.go b/internal/scheduling/nova/plugins/detectors/avoid_high_steal_pct.go similarity index 63% rename from internal/scheduling/descheduling/nova/plugins/kvm/avoid_high_steal_pct.go rename to internal/scheduling/nova/plugins/detectors/avoid_high_steal_pct.go index bd8e51dae..b956fd26c 100644 --- a/internal/scheduling/descheduling/nova/plugins/kvm/avoid_high_steal_pct.go +++ b/internal/scheduling/nova/plugins/detectors/avoid_high_steal_pct.go @@ -1,7 +1,7 @@ // Copyright SAP SE // SPDX-License-Identifier: Apache-2.0 -package kvm +package detectors import ( "context" @@ -10,7 +10,9 @@ import ( "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" - "github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova/plugins" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/nova/plugins" + corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -20,11 +22,22 @@ type AvoidHighStealPctStepOpts struct { } type AvoidHighStealPctStep struct { - // BaseStep is a helper struct that provides common functionality for all steps. - plugins.BaseStep[AvoidHighStealPctStepOpts] + // Detector is a helper struct that provides common functionality for all descheduler steps. + lib.BaseDetector[AvoidHighStealPctStepOpts] } -func (s *AvoidHighStealPctStep) Run() ([]plugins.Decision, error) { +// Initialize the step and validate that all required knowledges are ready. +func (s *AvoidHighStealPctStep) Init(ctx context.Context, client client.Client, step v1alpha1.DetectorSpec) error { + if err := s.BaseDetector.Init(ctx, client, step); err != nil { + return err + } + if err := s.CheckKnowledges(ctx, corev1.ObjectReference{Name: "kvm-libvirt-domain-cpu-steal-pct"}); err != nil { + return err + } + return nil +} + +func (s *AvoidHighStealPctStep) Run() ([]plugins.VMDetection, error) { if s.Options.MaxStealPctOverObservedTimeSpan <= 0 { slog.Info("skipping step because maxStealPctOverObservedTimeSpan is not set or <= 0") return nil, nil @@ -43,10 +56,10 @@ func (s *AvoidHighStealPctStep) Run() ([]plugins.Decision, error) { if err != nil { return nil, err } - var decisions []plugins.Decision + var decisions []plugins.VMDetection for _, f := range features { if f.MaxStealTimePct > s.Options.MaxStealPctOverObservedTimeSpan { - decisions = append(decisions, plugins.Decision{ + decisions = append(decisions, plugins.VMDetection{ VMID: f.InstanceUUID, Reason: fmt.Sprintf("kvm monitoring indicates cpu steal pct %.2f%% which is above %.2f%% threshold", f.MaxStealTimePct, s.Options.MaxStealPctOverObservedTimeSpan), Host: f.Host, diff --git a/internal/scheduling/descheduling/nova/plugins/kvm/avoid_high_steal_pct_test.go b/internal/scheduling/nova/plugins/detectors/avoid_high_steal_pct_test.go similarity index 56% rename from internal/scheduling/descheduling/nova/plugins/kvm/avoid_high_steal_pct_test.go rename to internal/scheduling/nova/plugins/detectors/avoid_high_steal_pct_test.go index 59e1a5f0b..30ec0d670 100644 --- a/internal/scheduling/descheduling/nova/plugins/kvm/avoid_high_steal_pct_test.go +++ b/internal/scheduling/nova/plugins/detectors/avoid_high_steal_pct_test.go @@ -1,24 +1,123 @@ // Copyright SAP SE // SPDX-License-Identifier: Apache-2.0 -package kvm +package detectors import ( + "context" + "strings" "testing" "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) -// Decision represents a descheduling decision for testing -type Decision struct { +// VMDetection represents a descheduling decision for testing +type VMDetection struct { VMID string Reason string Host string } +func TestAvoidHighStealPctStep_Init(t *testing.T) { + scheme, err := v1alpha1.SchemeBuilder.Build() + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + validParams := runtime.RawExtension{ + Raw: []byte(`{"maxStealPctOverObservedTimeSpan": 80.0}`), + } + + tests := []struct { + name string + knowledge *v1alpha1.Knowledge + detectorSpec v1alpha1.DetectorSpec + wantError bool + errorContains string + }{ + { + name: "successful init with valid knowledge", + knowledge: &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{Name: "kvm-libvirt-domain-cpu-steal-pct"}, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: metav1.ConditionTrue, + }, + }, + RawLength: 10, + }, + }, + detectorSpec: v1alpha1.DetectorSpec{ + Name: "avoid_high_steal_pct", + Params: validParams, + }, + wantError: false, + }, + { + name: "fails when knowledge doesn't exist", + knowledge: nil, + detectorSpec: v1alpha1.DetectorSpec{ + Name: "avoid_high_steal_pct", + Params: validParams, + }, + wantError: true, + errorContains: "failed to get knowledge", + }, + { + name: "fails when knowledge not ready", + knowledge: &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{Name: "kvm-libvirt-domain-cpu-steal-pct"}, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: metav1.ConditionFalse, + }, + }, + RawLength: 0, + }, + }, + detectorSpec: v1alpha1.DetectorSpec{ + Name: "avoid_high_steal_pct", + Params: validParams, + }, + wantError: true, + errorContains: "not ready", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + builder := fake.NewClientBuilder().WithScheme(scheme) + if tt.knowledge != nil { + builder = builder.WithObjects(tt.knowledge) + } + client := builder.Build() + + step := &AvoidHighStealPctStep{} + err := step.Init(context.Background(), client, tt.detectorSpec) + + if tt.wantError { + if err == nil { + t.Error("expected error, got nil") + } else if tt.errorContains != "" && !strings.Contains(err.Error(), tt.errorContains) { + t.Errorf("expected error containing %q, got %q", tt.errorContains, err.Error()) + } + } else { + if err != nil { + t.Errorf("unexpected error: %v", err) + } + } + }) + } +} + func TestAvoidHighStealPctStep_Run(t *testing.T) { scheme, err := v1alpha1.SchemeBuilder.Build() if err != nil { @@ -26,12 +125,12 @@ func TestAvoidHighStealPctStep_Run(t *testing.T) { } tests := []struct { - name string - threshold float64 - features []compute.LibvirtDomainCPUStealPct - expectedDecisions int - expectedVMs []string - expectSkip bool + name string + threshold float64 + features []compute.LibvirtDomainCPUStealPct + expectedVMDetections int + expectedVMs []string + expectSkip bool }{ { name: "skip when threshold is zero", @@ -46,9 +145,9 @@ func TestAvoidHighStealPctStep_Run(t *testing.T) { expectSkip: true, }, { - name: "no VMs above threshold", - threshold: 80.0, - expectedDecisions: 0, + name: "no VMs above threshold", + threshold: 80.0, + expectedVMDetections: 0, features: []compute.LibvirtDomainCPUStealPct{ {InstanceUUID: "vm-1", Host: "host1", MaxStealTimePct: 50.0}, {InstanceUUID: "vm-2", Host: "host2", MaxStealTimePct: 75.0}, @@ -56,10 +155,10 @@ func TestAvoidHighStealPctStep_Run(t *testing.T) { }, }, { - name: "some VMs above threshold", - threshold: 70.0, - expectedDecisions: 2, - expectedVMs: []string{"vm-2", "vm-4"}, + name: "some VMs above threshold", + threshold: 70.0, + expectedVMDetections: 2, + expectedVMs: []string{"vm-2", "vm-4"}, features: []compute.LibvirtDomainCPUStealPct{ {InstanceUUID: "vm-1", Host: "host1", MaxStealTimePct: 50.0}, {InstanceUUID: "vm-2", Host: "host2", MaxStealTimePct: 75.0}, @@ -68,10 +167,10 @@ func TestAvoidHighStealPctStep_Run(t *testing.T) { }, }, { - name: "all VMs above threshold", - threshold: 40.0, - expectedDecisions: 3, - expectedVMs: []string{"vm-1", "vm-2", "vm-3"}, + name: "all VMs above threshold", + threshold: 40.0, + expectedVMDetections: 3, + expectedVMs: []string{"vm-1", "vm-2", "vm-3"}, features: []compute.LibvirtDomainCPUStealPct{ {InstanceUUID: "vm-1", Host: "host1", MaxStealTimePct: 50.0}, {InstanceUUID: "vm-2", Host: "host2", MaxStealTimePct: 75.0}, @@ -79,10 +178,10 @@ func TestAvoidHighStealPctStep_Run(t *testing.T) { }, }, { - name: "VM exactly at threshold (should not be selected)", - threshold: 75.0, - expectedDecisions: 1, - expectedVMs: []string{"vm-3"}, + name: "VM exactly at threshold (should not be selected)", + threshold: 75.0, + expectedVMDetections: 1, + expectedVMs: []string{"vm-3"}, features: []compute.LibvirtDomainCPUStealPct{ {InstanceUUID: "vm-1", Host: "host1", MaxStealTimePct: 50.0}, {InstanceUUID: "vm-2", Host: "host2", MaxStealTimePct: 75.0}, // exactly at threshold @@ -90,16 +189,16 @@ func TestAvoidHighStealPctStep_Run(t *testing.T) { }, }, { - name: "empty database", - threshold: 50.0, - expectedDecisions: 0, - features: []compute.LibvirtDomainCPUStealPct{}, + name: "empty database", + threshold: 50.0, + expectedVMDetections: 0, + features: []compute.LibvirtDomainCPUStealPct{}, }, { - name: "high precision values", - threshold: 75.555, - expectedDecisions: 1, - expectedVMs: []string{"vm-2"}, + name: "high precision values", + threshold: 75.555, + expectedVMDetections: 1, + expectedVMs: []string{"vm-2"}, features: []compute.LibvirtDomainCPUStealPct{ {InstanceUUID: "vm-1", Host: "host1", MaxStealTimePct: 75.554}, {InstanceUUID: "vm-2", Host: "host2", MaxStealTimePct: 75.556}, @@ -138,8 +237,8 @@ func TestAvoidHighStealPctStep_Run(t *testing.T) { } // Check number of decisions - if len(decisions) != tt.expectedDecisions { - t.Errorf("expected %d decisions, got %d", tt.expectedDecisions, len(decisions)) + if len(decisions) != tt.expectedVMDetections { + t.Errorf("expected %d decisions, got %d", tt.expectedVMDetections, len(decisions)) } // Check that the correct VMs were selected diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_allowed_projects.go b/internal/scheduling/nova/plugins/filters/filter_allowed_projects.go similarity index 85% rename from internal/scheduling/decisions/nova/plugins/filters/filter_allowed_projects.go rename to internal/scheduling/nova/plugins/filters/filter_allowed_projects.go index 215a0f6b3..16c75d5d7 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_allowed_projects.go +++ b/internal/scheduling/nova/plugins/filters/filter_allowed_projects.go @@ -14,13 +14,13 @@ import ( ) type FilterAllowedProjectsStep struct { - lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts] + lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts] } // Lock certain hosts for certain projects, based on the hypervisor spec. // Note that hosts without specified projects are still accessible. -func (s *FilterAllowedProjectsStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.StepResult, error) { - result := s.PrepareResult(request) +func (s *FilterAllowedProjectsStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) if request.Spec.Data.ProjectID == "" { traceLog.Info("no project ID in request, skipping filter") return result, nil diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_allowed_projects_test.go b/internal/scheduling/nova/plugins/filters/filter_allowed_projects_test.go similarity index 100% rename from internal/scheduling/decisions/nova/plugins/filters/filter_allowed_projects_test.go rename to internal/scheduling/nova/plugins/filters/filter_allowed_projects_test.go diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_capabilities.go b/internal/scheduling/nova/plugins/filters/filter_capabilities.go similarity index 94% rename from internal/scheduling/decisions/nova/plugins/filters/filter_capabilities.go rename to internal/scheduling/nova/plugins/filters/filter_capabilities.go index ea0c86b7f..0366df41f 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_capabilities.go +++ b/internal/scheduling/nova/plugins/filters/filter_capabilities.go @@ -15,7 +15,7 @@ import ( ) type FilterCapabilitiesStep struct { - lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts] + lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts] } // Get the provided capabilities of a hypervisor resource in the format Nova expects. @@ -44,8 +44,8 @@ func hvToNovaCapabilities(hv hv1.Hypervisor) (map[string]string, error) { // Check the capabilities of each host and if they match the extra spec provided // in the request spec flavor. -func (s *FilterCapabilitiesStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.StepResult, error) { - result := s.PrepareResult(request) +func (s *FilterCapabilitiesStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) requestedCapabilities := request.Spec.Data.Flavor.Data.ExtraSpecs if len(requestedCapabilities) == 0 { traceLog.Debug("no flavor extra spec capabilities in request, skipping filter") diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_capabilities_test.go b/internal/scheduling/nova/plugins/filters/filter_capabilities_test.go similarity index 100% rename from internal/scheduling/decisions/nova/plugins/filters/filter_capabilities_test.go rename to internal/scheduling/nova/plugins/filters/filter_capabilities_test.go diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_correct_az.go b/internal/scheduling/nova/plugins/filters/filter_correct_az.go similarity index 88% rename from internal/scheduling/decisions/nova/plugins/filters/filter_correct_az.go rename to internal/scheduling/nova/plugins/filters/filter_correct_az.go index 744edfb62..f91b57d9a 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_correct_az.go +++ b/internal/scheduling/nova/plugins/filters/filter_correct_az.go @@ -13,12 +13,12 @@ import ( ) type FilterCorrectAZStep struct { - lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts] + lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts] } // Only get hosts in the requested az. -func (s *FilterCorrectAZStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.StepResult, error) { - result := s.PrepareResult(request) +func (s *FilterCorrectAZStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) if request.Spec.Data.AvailabilityZone == "" { traceLog.Info("no availability zone requested, skipping filter_correct_az step") return result, nil diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_correct_az_test.go b/internal/scheduling/nova/plugins/filters/filter_correct_az_test.go similarity index 100% rename from internal/scheduling/decisions/nova/plugins/filters/filter_correct_az_test.go rename to internal/scheduling/nova/plugins/filters/filter_correct_az_test.go diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_external_customer.go b/internal/scheduling/nova/plugins/filters/filter_external_customer.go similarity index 91% rename from internal/scheduling/decisions/nova/plugins/filters/filter_external_customer.go rename to internal/scheduling/nova/plugins/filters/filter_external_customer.go index 7385063dd..49d9de046 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_external_customer.go +++ b/internal/scheduling/nova/plugins/filters/filter_external_customer.go @@ -28,13 +28,13 @@ func (opts FilterExternalCustomerStepOpts) Validate() error { } type FilterExternalCustomerStep struct { - lib.BaseStep[api.ExternalSchedulerRequest, FilterExternalCustomerStepOpts] + lib.BaseFilter[api.ExternalSchedulerRequest, FilterExternalCustomerStepOpts] } // Prefix-match the domain name for external customer domains and filter out hosts // that are not intended for external customers. -func (s *FilterExternalCustomerStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.StepResult, error) { - result := s.PrepareResult(request) +func (s *FilterExternalCustomerStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) domainName, err := request.Spec.Data.GetSchedulerHintStr("domain_name") if err != nil { return nil, err diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_external_customer_test.go b/internal/scheduling/nova/plugins/filters/filter_external_customer_test.go similarity index 100% rename from internal/scheduling/decisions/nova/plugins/filters/filter_external_customer_test.go rename to internal/scheduling/nova/plugins/filters/filter_external_customer_test.go diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_has_accelerators.go b/internal/scheduling/nova/plugins/filters/filter_has_accelerators.go similarity index 86% rename from internal/scheduling/decisions/nova/plugins/filters/filter_has_accelerators.go rename to internal/scheduling/nova/plugins/filters/filter_has_accelerators.go index 04918542c..1e3bdb726 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_has_accelerators.go +++ b/internal/scheduling/nova/plugins/filters/filter_has_accelerators.go @@ -14,12 +14,12 @@ import ( ) type FilterHasAcceleratorsStep struct { - lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts] + lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts] } // If requested, only get hosts with accelerators. -func (s *FilterHasAcceleratorsStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.StepResult, error) { - result := s.PrepareResult(request) +func (s *FilterHasAcceleratorsStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) extraSpecs := request.Spec.Data.Flavor.Data.ExtraSpecs if _, ok := extraSpecs["accel:device_profile"]; !ok { traceLog.Debug("no accelerators requested") diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_has_accelerators_test.go b/internal/scheduling/nova/plugins/filters/filter_has_accelerators_test.go similarity index 100% rename from internal/scheduling/decisions/nova/plugins/filters/filter_has_accelerators_test.go rename to internal/scheduling/nova/plugins/filters/filter_has_accelerators_test.go diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_has_enough_capacity.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go similarity index 96% rename from internal/scheduling/decisions/nova/plugins/filters/filter_has_enough_capacity.go rename to internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go index 62d0f5968..7bd12c69f 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_has_enough_capacity.go +++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go @@ -23,7 +23,7 @@ type FilterHasEnoughCapacityOpts struct { func (FilterHasEnoughCapacityOpts) Validate() error { return nil } type FilterHasEnoughCapacity struct { - lib.BaseStep[api.ExternalSchedulerRequest, FilterHasEnoughCapacityOpts] + lib.BaseFilter[api.ExternalSchedulerRequest, FilterHasEnoughCapacityOpts] } // Filter hosts that don't have enough capacity to run the requested flavor. @@ -40,8 +40,8 @@ type FilterHasEnoughCapacity struct { // known at this point. // // Please also note that disk space is currently not considered by this filter. -func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.StepResult, error) { - result := s.PrepareResult(request) +func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) // This map holds the free resources per host. freeResourcesByHost := make(map[string]map[string]resource.Quantity) diff --git a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go new file mode 100644 index 000000000..a6e6561e9 --- /dev/null +++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go @@ -0,0 +1,48 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package filters + +import ( + "testing" +) + +func TestFilterHasEnoughCapacityOpts_Validate(t *testing.T) { + tests := []struct { + name string + opts FilterHasEnoughCapacityOpts + expectError bool + }{ + { + name: "valid options with lock reserved true", + opts: FilterHasEnoughCapacityOpts{ + LockReserved: true, + }, + expectError: false, + }, + { + name: "valid options with lock reserved false", + opts: FilterHasEnoughCapacityOpts{ + LockReserved: false, + }, + expectError: false, + }, + { + name: "valid options with default values", + opts: FilterHasEnoughCapacityOpts{}, + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.opts.Validate() + if tt.expectError && err == nil { + t.Error("expected error, got nil") + } + if !tt.expectError && err != nil { + t.Errorf("expected no error, got %v", err) + } + }) + } +} diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_has_requested_traits.go b/internal/scheduling/nova/plugins/filters/filter_has_requested_traits.go similarity index 91% rename from internal/scheduling/decisions/nova/plugins/filters/filter_has_requested_traits.go rename to internal/scheduling/nova/plugins/filters/filter_has_requested_traits.go index 14cf927a8..05095ac67 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_has_requested_traits.go +++ b/internal/scheduling/nova/plugins/filters/filter_has_requested_traits.go @@ -15,14 +15,14 @@ import ( ) type FilterHasRequestedTraits struct { - lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts] + lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts] } // Filter hosts that do not have the requested traits given by the extra spec: // - "trait:": "forbidden" means the host must not have the specified trait. // - "trait:": "required" means the host must have the specified trait. -func (s *FilterHasRequestedTraits) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.StepResult, error) { - result := s.PrepareResult(request) +func (s *FilterHasRequestedTraits) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) var requiredTraits, forbiddenTraits []string for key, value := range request.Spec.Data.Flavor.Data.ExtraSpecs { if !strings.HasPrefix(key, "trait:") { diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_has_requested_traits_test.go b/internal/scheduling/nova/plugins/filters/filter_has_requested_traits_test.go similarity index 100% rename from internal/scheduling/decisions/nova/plugins/filters/filter_has_requested_traits_test.go rename to internal/scheduling/nova/plugins/filters/filter_has_requested_traits_test.go diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_host_instructions.go b/internal/scheduling/nova/plugins/filters/filter_host_instructions.go similarity index 82% rename from internal/scheduling/decisions/nova/plugins/filters/filter_host_instructions.go rename to internal/scheduling/nova/plugins/filters/filter_host_instructions.go index b20f041f8..42562d244 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_host_instructions.go +++ b/internal/scheduling/nova/plugins/filters/filter_host_instructions.go @@ -12,14 +12,14 @@ import ( ) type FilterHostInstructionsStep struct { - lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts] + lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts] } // Filter hosts based on instructions given in the request spec. Supported are: // - spec.ignore_hosts: Filter out all hosts in this list. // - spec.force_hosts: Include only hosts in this list. -func (s *FilterHostInstructionsStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.StepResult, error) { - result := s.PrepareResult(request) +func (s *FilterHostInstructionsStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) if request.Spec.Data.IgnoreHosts != nil { for _, host := range *request.Spec.Data.IgnoreHosts { delete(result.Activations, host) diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_host_instructions_test.go b/internal/scheduling/nova/plugins/filters/filter_host_instructions_test.go similarity index 100% rename from internal/scheduling/decisions/nova/plugins/filters/filter_host_instructions_test.go rename to internal/scheduling/nova/plugins/filters/filter_host_instructions_test.go diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_instance_group_affinity.go b/internal/scheduling/nova/plugins/filters/filter_instance_group_affinity.go similarity index 85% rename from internal/scheduling/decisions/nova/plugins/filters/filter_instance_group_affinity.go rename to internal/scheduling/nova/plugins/filters/filter_instance_group_affinity.go index f75abc596..ec5569d23 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_instance_group_affinity.go +++ b/internal/scheduling/nova/plugins/filters/filter_instance_group_affinity.go @@ -12,16 +12,16 @@ import ( ) type FilterInstanceGroupAffinityStep struct { - lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts] + lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts] } // Select hosts in spec.instance_group. func (s *FilterInstanceGroupAffinityStep) Run( traceLog *slog.Logger, request api.ExternalSchedulerRequest, -) (*lib.StepResult, error) { +) (*lib.FilterWeigherPipelineStepResult, error) { - result := s.PrepareResult(request) + result := s.IncludeAllHostsFromRequest(request) ig := request.Spec.Data.InstanceGroup if ig == nil { diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_instance_group_affinity_test.go b/internal/scheduling/nova/plugins/filters/filter_instance_group_affinity_test.go similarity index 100% rename from internal/scheduling/decisions/nova/plugins/filters/filter_instance_group_affinity_test.go rename to internal/scheduling/nova/plugins/filters/filter_instance_group_affinity_test.go diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_instance_group_anti_affinity.go b/internal/scheduling/nova/plugins/filters/filter_instance_group_anti_affinity.go similarity index 93% rename from internal/scheduling/decisions/nova/plugins/filters/filter_instance_group_anti_affinity.go rename to internal/scheduling/nova/plugins/filters/filter_instance_group_anti_affinity.go index 78fbb4c84..bdd6c0910 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_instance_group_anti_affinity.go +++ b/internal/scheduling/nova/plugins/filters/filter_instance_group_anti_affinity.go @@ -14,7 +14,7 @@ import ( ) type FilterInstanceGroupAntiAffinityStep struct { - lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts] + lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts] } // Select hosts not in spec_obj.instance_group but only until @@ -22,9 +22,9 @@ type FilterInstanceGroupAntiAffinityStep struct { func (s *FilterInstanceGroupAntiAffinityStep) Run( traceLog *slog.Logger, request api.ExternalSchedulerRequest, -) (*lib.StepResult, error) { +) (*lib.FilterWeigherPipelineStepResult, error) { - result := s.PrepareResult(request) + result := s.IncludeAllHostsFromRequest(request) ig := request.Spec.Data.InstanceGroup if ig == nil { diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_instance_group_anti_affinity_test.go b/internal/scheduling/nova/plugins/filters/filter_instance_group_anti_affinity_test.go similarity index 100% rename from internal/scheduling/decisions/nova/plugins/filters/filter_instance_group_anti_affinity_test.go rename to internal/scheduling/nova/plugins/filters/filter_instance_group_anti_affinity_test.go diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_live_migratable.go b/internal/scheduling/nova/plugins/filters/filter_live_migratable.go similarity index 94% rename from internal/scheduling/decisions/nova/plugins/filters/filter_live_migratable.go rename to internal/scheduling/nova/plugins/filters/filter_live_migratable.go index 7554099be..4ae1a2365 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_live_migratable.go +++ b/internal/scheduling/nova/plugins/filters/filter_live_migratable.go @@ -15,7 +15,7 @@ import ( ) type FilterLiveMigratableStep struct { - lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts] + lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts] } // Check if the encountered request spec is a live migration. @@ -64,9 +64,9 @@ func (s *FilterLiveMigratableStep) checkHasSufficientFeatures( func (s *FilterLiveMigratableStep) Run( traceLog *slog.Logger, request api.ExternalSchedulerRequest, -) (*lib.StepResult, error) { +) (*lib.FilterWeigherPipelineStepResult, error) { - result := s.PrepareResult(request) + result := s.IncludeAllHostsFromRequest(request) if !s.isLiveMigration(request) { traceLog.Debug("not a live migration request, skipping filter") diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_live_migratable_test.go b/internal/scheduling/nova/plugins/filters/filter_live_migratable_test.go similarity index 96% rename from internal/scheduling/decisions/nova/plugins/filters/filter_live_migratable_test.go rename to internal/scheduling/nova/plugins/filters/filter_live_migratable_test.go index c719a3eb6..673084901 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_live_migratable_test.go +++ b/internal/scheduling/nova/plugins/filters/filter_live_migratable_test.go @@ -727,8 +727,10 @@ func TestFilterLiveMigratableStep_Run(t *testing.T) { Build() step := &FilterLiveMigratableStep{ - BaseStep: lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts]{ - Client: fakeClient, + BaseFilter: lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts]{ + BaseFilterWeigherPipelineStep: lib.BaseFilterWeigherPipelineStep[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts]{ + Client: fakeClient, + }, }, } @@ -812,8 +814,10 @@ func TestFilterLiveMigratableStep_Run_SourceHostNotFound(t *testing.T) { Build() step := &FilterLiveMigratableStep{ - BaseStep: lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts]{ - Client: fakeClient, + BaseFilter: lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts]{ + BaseFilterWeigherPipelineStep: lib.BaseFilterWeigherPipelineStep[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts]{ + Client: fakeClient, + }, }, } @@ -856,8 +860,10 @@ func TestFilterLiveMigratableStep_Run_ClientError(t *testing.T) { Build() step := &FilterLiveMigratableStep{ - BaseStep: lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts]{ - Client: fakeClient, + BaseFilter: lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts]{ + BaseFilterWeigherPipelineStep: lib.BaseFilterWeigherPipelineStep[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts]{ + Client: fakeClient, + }, }, } diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_maintenance.go b/internal/scheduling/nova/plugins/filters/filter_maintenance.go similarity index 88% rename from internal/scheduling/decisions/nova/plugins/filters/filter_maintenance.go rename to internal/scheduling/nova/plugins/filters/filter_maintenance.go index 15dc4eafd..de81adefc 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_maintenance.go +++ b/internal/scheduling/nova/plugins/filters/filter_maintenance.go @@ -13,12 +13,12 @@ import ( ) type FilterMaintenanceStep struct { - lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts] + lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts] } // Check that the maintenance spec of the hypervisor doesn't prevent scheduling. -func (s *FilterMaintenanceStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.StepResult, error) { - result := s.PrepareResult(request) +func (s *FilterMaintenanceStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) hvs := &hv1.HypervisorList{} if err := s.Client.List(context.Background(), hvs); err != nil { diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_maintenance_test.go b/internal/scheduling/nova/plugins/filters/filter_maintenance_test.go similarity index 100% rename from internal/scheduling/decisions/nova/plugins/filters/filter_maintenance_test.go rename to internal/scheduling/nova/plugins/filters/filter_maintenance_test.go diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_packed_virtqueue.go b/internal/scheduling/nova/plugins/filters/filter_packed_virtqueue.go similarity index 87% rename from internal/scheduling/decisions/nova/plugins/filters/filter_packed_virtqueue.go rename to internal/scheduling/nova/plugins/filters/filter_packed_virtqueue.go index 836ffd05e..248ebe6a8 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_packed_virtqueue.go +++ b/internal/scheduling/nova/plugins/filters/filter_packed_virtqueue.go @@ -14,12 +14,12 @@ import ( ) type FilterPackedVirtqueueStep struct { - lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts] + lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts] } // If requested, only get hosts with packed virtqueues. -func (s *FilterPackedVirtqueueStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.StepResult, error) { - result := s.PrepareResult(request) +func (s *FilterPackedVirtqueueStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) // We don't care about the value. _, reqInSpecs := request.Spec.Data.Flavor.Data.ExtraSpecs["hw:virtio_packed_ring"] _, reqInProps := request.Spec.Data.Image.Data.Properties.Data["hw_virtio_packed_ring"] diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_packed_virtqueue_test.go b/internal/scheduling/nova/plugins/filters/filter_packed_virtqueue_test.go similarity index 100% rename from internal/scheduling/decisions/nova/plugins/filters/filter_packed_virtqueue_test.go rename to internal/scheduling/nova/plugins/filters/filter_packed_virtqueue_test.go diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_requested_destination.go b/internal/scheduling/nova/plugins/filters/filter_requested_destination.go similarity index 92% rename from internal/scheduling/decisions/nova/plugins/filters/filter_requested_destination.go rename to internal/scheduling/nova/plugins/filters/filter_requested_destination.go index 9a7ab3462..55e542c32 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_requested_destination.go +++ b/internal/scheduling/nova/plugins/filters/filter_requested_destination.go @@ -14,7 +14,7 @@ import ( ) type FilterRequestedDestinationStep struct { - lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts] + lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts] } // If `requested_destination` is set in the request spec, filter hosts @@ -23,9 +23,9 @@ type FilterRequestedDestinationStep struct { func (s *FilterRequestedDestinationStep) Run( traceLog *slog.Logger, request api.ExternalSchedulerRequest, -) (*lib.StepResult, error) { +) (*lib.FilterWeigherPipelineStepResult, error) { - result := s.PrepareResult(request) + result := s.IncludeAllHostsFromRequest(request) rd := request.Spec.Data.RequestedDestination if rd == nil { diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_requested_destination_test.go b/internal/scheduling/nova/plugins/filters/filter_requested_destination_test.go similarity index 96% rename from internal/scheduling/decisions/nova/plugins/filters/filter_requested_destination_test.go rename to internal/scheduling/nova/plugins/filters/filter_requested_destination_test.go index ca1faaa07..3a5f63d0e 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_requested_destination_test.go +++ b/internal/scheduling/nova/plugins/filters/filter_requested_destination_test.go @@ -494,8 +494,10 @@ func TestFilterRequestedDestinationStep_Run(t *testing.T) { Build() step := &FilterRequestedDestinationStep{ - BaseStep: lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts]{ - Client: fakeClient, + BaseFilter: lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts]{ + BaseFilterWeigherPipelineStep: lib.BaseFilterWeigherPipelineStep[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts]{ + Client: fakeClient, + }, }, } @@ -575,8 +577,10 @@ func TestFilterRequestedDestinationStep_Run_ClientError(t *testing.T) { Build() step := &FilterRequestedDestinationStep{ - BaseStep: lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts]{ - Client: fakeClient, + BaseFilter: lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts]{ + BaseFilterWeigherPipelineStep: lib.BaseFilterWeigherPipelineStep[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts]{ + Client: fakeClient, + }, }, } diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_status_conditions.go b/internal/scheduling/nova/plugins/filters/filter_status_conditions.go similarity index 91% rename from internal/scheduling/decisions/nova/plugins/filters/filter_status_conditions.go rename to internal/scheduling/nova/plugins/filters/filter_status_conditions.go index 0ea1f037b..daa1a5249 100644 --- a/internal/scheduling/decisions/nova/plugins/filters/filter_status_conditions.go +++ b/internal/scheduling/nova/plugins/filters/filter_status_conditions.go @@ -15,13 +15,13 @@ import ( ) type FilterStatusConditionsStep struct { - lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts] + lib.BaseFilter[api.ExternalSchedulerRequest, lib.EmptyFilterWeigherPipelineStepOpts] } // Check that all status conditions meet the expected values, for example, // that the hypervisor is ready and not disabled. -func (s *FilterStatusConditionsStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.StepResult, error) { - result := s.PrepareResult(request) +func (s *FilterStatusConditionsStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) hvs := &hv1.HypervisorList{} if err := s.Client.List(context.Background(), hvs); err != nil { diff --git a/internal/scheduling/decisions/nova/plugins/filters/filter_status_conditions_test.go b/internal/scheduling/nova/plugins/filters/filter_status_conditions_test.go similarity index 100% rename from internal/scheduling/decisions/nova/plugins/filters/filter_status_conditions_test.go rename to internal/scheduling/nova/plugins/filters/filter_status_conditions_test.go diff --git a/internal/scheduling/nova/plugins/vm_detection.go b/internal/scheduling/nova/plugins/vm_detection.go new file mode 100644 index 000000000..777dbe642 --- /dev/null +++ b/internal/scheduling/nova/plugins/vm_detection.go @@ -0,0 +1,20 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package plugins + +import "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + +type VMDetection struct { + // Get the VM ID for which this decision applies. + VMID string + // Get a human-readable reason for this decision. + Reason string + // Get the compute host where the vm should be migrated away from. + Host string +} + +func (d VMDetection) GetResource() string { return d.VMID } +func (d VMDetection) GetReason() string { return d.Reason } +func (d VMDetection) GetHost() string { return d.Host } +func (d VMDetection) WithReason(reason string) lib.Detection { d.Reason = reason; return d } diff --git a/internal/scheduling/nova/plugins/vm_detection_test.go b/internal/scheduling/nova/plugins/vm_detection_test.go new file mode 100644 index 000000000..cc8d7076e --- /dev/null +++ b/internal/scheduling/nova/plugins/vm_detection_test.go @@ -0,0 +1,196 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package plugins + +import ( + "testing" + + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" +) + +func TestVMDetection_GetResource(t *testing.T) { + tests := []struct { + name string + vmID string + expected string + }{ + { + name: "returns VM ID", + vmID: "vm-123", + expected: "vm-123", + }, + { + name: "returns empty string when VM ID is empty", + vmID: "", + expected: "", + }, + { + name: "returns UUID format VM ID", + vmID: "550e8400-e29b-41d4-a716-446655440000", + expected: "550e8400-e29b-41d4-a716-446655440000", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + d := VMDetection{VMID: tt.vmID} + if got := d.GetResource(); got != tt.expected { + t.Errorf("GetResource() = %v, want %v", got, tt.expected) + } + }) + } +} + +func TestVMDetection_GetReason(t *testing.T) { + tests := []struct { + name string + reason string + expected string + }{ + { + name: "returns reason", + reason: "high CPU usage", + expected: "high CPU usage", + }, + { + name: "returns empty string when reason is empty", + reason: "", + expected: "", + }, + { + name: "returns detailed reason", + reason: "kvm monitoring indicates cpu steal pct 85.50% which is above 80.00% threshold", + expected: "kvm monitoring indicates cpu steal pct 85.50% which is above 80.00% threshold", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + d := VMDetection{Reason: tt.reason} + if got := d.GetReason(); got != tt.expected { + t.Errorf("GetReason() = %v, want %v", got, tt.expected) + } + }) + } +} + +func TestVMDetection_GetHost(t *testing.T) { + tests := []struct { + name string + host string + expected string + }{ + { + name: "returns host", + host: "compute-host-1", + expected: "compute-host-1", + }, + { + name: "returns empty string when host is empty", + host: "", + expected: "", + }, + { + name: "returns FQDN host", + host: "compute-host-1.example.com", + expected: "compute-host-1.example.com", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + d := VMDetection{Host: tt.host} + if got := d.GetHost(); got != tt.expected { + t.Errorf("GetHost() = %v, want %v", got, tt.expected) + } + }) + } +} + +func TestVMDetection_WithReason(t *testing.T) { + tests := []struct { + name string + initialReason string + newReason string + expectedVMID string + expectedHost string + }{ + { + name: "sets new reason", + initialReason: "old reason", + newReason: "new reason", + expectedVMID: "vm-123", + expectedHost: "host-1", + }, + { + name: "sets reason from empty", + initialReason: "", + newReason: "new reason", + expectedVMID: "vm-456", + expectedHost: "host-2", + }, + { + name: "clears reason", + initialReason: "existing reason", + newReason: "", + expectedVMID: "vm-789", + expectedHost: "host-3", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + d := VMDetection{ + VMID: tt.expectedVMID, + Reason: tt.initialReason, + Host: tt.expectedHost, + } + + result := d.WithReason(tt.newReason) + + // Check that the reason was updated + if got := result.GetReason(); got != tt.newReason { + t.Errorf("WithReason() reason = %v, want %v", got, tt.newReason) + } + + // Check that VMID is preserved + if got := result.GetResource(); got != tt.expectedVMID { + t.Errorf("WithReason() preserved VMID = %v, want %v", got, tt.expectedVMID) + } + + // Check that Host is preserved + if got := result.GetHost(); got != tt.expectedHost { + t.Errorf("WithReason() preserved Host = %v, want %v", got, tt.expectedHost) + } + }) + } +} + +func TestVMDetection_ImplementsDetectionInterface(t *testing.T) { + // Verify that VMDetection implements the lib.Detection interface + var _ lib.Detection = VMDetection{} + var _ lib.Detection = &VMDetection{} + + d := VMDetection{ + VMID: "test-vm", + Reason: "test reason", + Host: "test-host", + } + + // Verify interface methods work correctly + if d.GetResource() != "test-vm" { + t.Error("GetResource() interface method not working") + } + if d.GetReason() != "test reason" { + t.Error("GetReason() interface method not working") + } + if d.GetHost() != "test-host" { + t.Error("GetHost() interface method not working") + } + + updated := d.WithReason("updated reason") + if updated.GetReason() != "updated reason" { + t.Error("WithReason() interface method not working") + } +} diff --git a/internal/scheduling/decisions/nova/plugins/weighers/vmware_anti_affinity_noisy_projects.go b/internal/scheduling/nova/plugins/weighers/vmware_anti_affinity_noisy_projects.go similarity index 80% rename from internal/scheduling/decisions/nova/plugins/weighers/vmware_anti_affinity_noisy_projects.go rename to internal/scheduling/nova/plugins/weighers/vmware_anti_affinity_noisy_projects.go index 5dae93259..181cada8b 100644 --- a/internal/scheduling/decisions/nova/plugins/weighers/vmware_anti_affinity_noisy_projects.go +++ b/internal/scheduling/nova/plugins/weighers/vmware_anti_affinity_noisy_projects.go @@ -12,6 +12,7 @@ import ( "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -36,12 +37,23 @@ func (o VMwareAntiAffinityNoisyProjectsStepOpts) Validate() error { // Step to avoid noisy projects by downvoting the hosts they are running on. type VMwareAntiAffinityNoisyProjectsStep struct { // BaseStep is a helper struct that provides common functionality for all steps. - lib.BaseStep[api.ExternalSchedulerRequest, VMwareAntiAffinityNoisyProjectsStepOpts] + lib.BaseWeigher[api.ExternalSchedulerRequest, VMwareAntiAffinityNoisyProjectsStepOpts] +} + +// Initialize the step and validate that all required knowledges are ready. +func (s *VMwareAntiAffinityNoisyProjectsStep) Init(ctx context.Context, client client.Client, weigher v1alpha1.WeigherSpec) error { + if err := s.BaseWeigher.Init(ctx, client, weigher); err != nil { + return err + } + if err := s.CheckKnowledges(ctx, corev1.ObjectReference{Name: "vmware-project-noisiness"}); err != nil { + return err + } + return nil } // Downvote the hosts a project is currently running on if it's noisy. -func (s *VMwareAntiAffinityNoisyProjectsStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.StepResult, error) { - result := s.PrepareResult(request) +func (s *VMwareAntiAffinityNoisyProjectsStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) if !request.VMware { slog.Debug("Skipping general purpose balancing step for non-VMware VM") diff --git a/internal/scheduling/nova/plugins/weighers/vmware_anti_affinity_noisy_projects_test.go b/internal/scheduling/nova/plugins/weighers/vmware_anti_affinity_noisy_projects_test.go new file mode 100644 index 000000000..bce66198d --- /dev/null +++ b/internal/scheduling/nova/plugins/weighers/vmware_anti_affinity_noisy_projects_test.go @@ -0,0 +1,288 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package weighers + +import ( + "context" + "log/slog" + "strings" + "testing" + + api "github.com/cobaltcore-dev/cortex/api/delegation/nova" + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestVMwareAntiAffinityNoisyProjectsStepOpts_Validate(t *testing.T) { + tests := []struct { + name string + opts VMwareAntiAffinityNoisyProjectsStepOpts + wantError bool + }{ + { + name: "valid opts with different bounds", + opts: VMwareAntiAffinityNoisyProjectsStepOpts{ + AvgCPUUsageLowerBound: 20.0, + AvgCPUUsageUpperBound: 100.0, + AvgCPUUsageActivationLowerBound: 0.0, + AvgCPUUsageActivationUpperBound: -0.5, + }, + wantError: false, + }, + { + name: "invalid opts - equal bounds causes zero division", + opts: VMwareAntiAffinityNoisyProjectsStepOpts{ + AvgCPUUsageLowerBound: 50.0, + AvgCPUUsageUpperBound: 50.0, // Same as lower bound + AvgCPUUsageActivationLowerBound: 0.0, + AvgCPUUsageActivationUpperBound: -0.5, + }, + wantError: true, + }, + { + name: "valid opts with zero bounds", + opts: VMwareAntiAffinityNoisyProjectsStepOpts{ + AvgCPUUsageLowerBound: 0.0, + AvgCPUUsageUpperBound: 100.0, + AvgCPUUsageActivationLowerBound: 0.0, + AvgCPUUsageActivationUpperBound: 1.0, + }, + wantError: false, + }, + { + name: "valid opts with negative values", + opts: VMwareAntiAffinityNoisyProjectsStepOpts{ + AvgCPUUsageLowerBound: -10.0, + AvgCPUUsageUpperBound: 10.0, + AvgCPUUsageActivationLowerBound: -1.0, + AvgCPUUsageActivationUpperBound: 1.0, + }, + wantError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.opts.Validate() + if (err != nil) != tt.wantError { + t.Errorf("Validate() error = %v, wantError %v", err, tt.wantError) + } + }) + } +} + +func TestVMwareAntiAffinityNoisyProjectsStep_Init(t *testing.T) { + scheme, err := v1alpha1.SchemeBuilder.Build() + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + // Valid params JSON for the weigher + validParams := runtime.RawExtension{ + Raw: []byte(`{"avgCPUUsageLowerBound": 20.0, "avgCPUUsageUpperBound": 100.0, "avgCPUUsageActivationLowerBound": 0.0, "avgCPUUsageActivationUpperBound": -0.5}`), + } + + tests := []struct { + name string + knowledge *v1alpha1.Knowledge + weigherSpec v1alpha1.WeigherSpec + wantError bool + errorContains string + }{ + { + name: "successful init with valid knowledge", + knowledge: &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{Name: "vmware-project-noisiness"}, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: metav1.ConditionTrue, + }, + }, + RawLength: 10, + }, + }, + weigherSpec: v1alpha1.WeigherSpec{ + Name: "vmware_anti_affinity_noisy_projects", + Params: validParams, + }, + wantError: false, + }, + { + name: "fails when knowledge doesn't exist", + knowledge: nil, + weigherSpec: v1alpha1.WeigherSpec{ + Name: "vmware_anti_affinity_noisy_projects", + Params: validParams, + }, + wantError: true, + errorContains: "failed to get knowledge", + }, + { + name: "fails when knowledge not ready", + knowledge: &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{Name: "vmware-project-noisiness"}, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: metav1.ConditionFalse, + }, + }, + RawLength: 0, + }, + }, + weigherSpec: v1alpha1.WeigherSpec{ + Name: "vmware_anti_affinity_noisy_projects", + Params: validParams, + }, + wantError: true, + errorContains: "not ready", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + builder := fake.NewClientBuilder().WithScheme(scheme) + if tt.knowledge != nil { + builder = builder.WithObjects(tt.knowledge) + } + client := builder.Build() + + step := &VMwareAntiAffinityNoisyProjectsStep{} + err := step.Init(context.Background(), client, tt.weigherSpec) + + if tt.wantError { + if err == nil { + t.Error("expected error, got nil") + } else if tt.errorContains != "" && !containsString(err.Error(), tt.errorContains) { + t.Errorf("expected error containing %q, got %q", tt.errorContains, err.Error()) + } + } else { + if err != nil { + t.Errorf("unexpected error: %v", err) + } + } + }) + } +} + +func containsString(s, substr string) bool { + return strings.Contains(s, substr) +} + +func TestVMwareAntiAffinityNoisyProjectsStep_Run(t *testing.T) { + scheme, err := v1alpha1.SchemeBuilder.Build() + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + vropsProjectNoisiness, err := v1alpha1.BoxFeatureList([]any{ + &compute.VROpsProjectNoisiness{Project: "project1", ComputeHost: "host1", AvgCPUOfProject: 25.0}, + &compute.VROpsProjectNoisiness{Project: "project1", ComputeHost: "host2", AvgCPUOfProject: 30.0}, + &compute.VROpsProjectNoisiness{Project: "project2", ComputeHost: "host3", AvgCPUOfProject: 15.0}, + }) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + step := &VMwareAntiAffinityNoisyProjectsStep{} + step.Options.AvgCPUUsageLowerBound = 20.0 + step.Options.AvgCPUUsageUpperBound = 100.0 + step.Options.AvgCPUUsageActivationLowerBound = 0.0 + step.Options.AvgCPUUsageActivationUpperBound = -0.5 + step.Client = fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(&v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{Name: "vmware-project-noisiness"}, + Status: v1alpha1.KnowledgeStatus{Raw: vropsProjectNoisiness}, + }). + Build() + + tests := []struct { + name string + request api.ExternalSchedulerRequest + downvotedHosts map[string]struct{} + }{ + { + name: "Noisy project", + request: api.ExternalSchedulerRequest{ + Spec: api.NovaObject[api.NovaSpec]{ + Data: api.NovaSpec{ + ProjectID: "project1", + }, + }, + VMware: true, + Hosts: []api.ExternalSchedulerHost{ + {ComputeHost: "host1"}, + {ComputeHost: "host2"}, + {ComputeHost: "host3"}, + }, + }, + downvotedHosts: map[string]struct{}{ + "host1": {}, + "host2": {}, + }, + }, + { + name: "Non-noisy project", + request: api.ExternalSchedulerRequest{ + Spec: api.NovaObject[api.NovaSpec]{ + Data: api.NovaSpec{ + ProjectID: "project2", + }, + }, + VMware: true, + Hosts: []api.ExternalSchedulerHost{ + {ComputeHost: "host1"}, + {ComputeHost: "host2"}, + {ComputeHost: "host3"}, + }, + }, + downvotedHosts: map[string]struct{}{}, + }, + { + name: "No noisy project data", + request: api.ExternalSchedulerRequest{ + Spec: api.NovaObject[api.NovaSpec]{ + Data: api.NovaSpec{ + ProjectID: "project3", + }, + }, + VMware: true, + Hosts: []api.ExternalSchedulerHost{ + {ComputeHost: "host1"}, + {ComputeHost: "host2"}, + {ComputeHost: "host3"}, + }, + }, + downvotedHosts: map[string]struct{}{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := step.Run(slog.Default(), tt.request) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + // Check that the weights have decreased + for host, weight := range result.Activations { + if _, ok := tt.downvotedHosts[host]; ok { + if weight >= 0 { + t.Errorf("expected weight for host %s to be less than 0, got %f", host, weight) + } + } else { + if weight != 0 { + t.Errorf("expected weight for host %s to be 0, got %f", host, weight) + } + } + } + }) + } +} diff --git a/internal/scheduling/decisions/nova/plugins/weighers/vmware_avoid_long_term_contended_hosts.go b/internal/scheduling/nova/plugins/weighers/vmware_avoid_long_term_contended_hosts.go similarity index 84% rename from internal/scheduling/decisions/nova/plugins/weighers/vmware_avoid_long_term_contended_hosts.go rename to internal/scheduling/nova/plugins/weighers/vmware_avoid_long_term_contended_hosts.go index f06aa49c2..e58679f55 100644 --- a/internal/scheduling/decisions/nova/plugins/weighers/vmware_avoid_long_term_contended_hosts.go +++ b/internal/scheduling/nova/plugins/weighers/vmware_avoid_long_term_contended_hosts.go @@ -12,6 +12,7 @@ import ( "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -45,12 +46,23 @@ func (o VMwareAvoidLongTermContendedHostsStepOpts) Validate() error { // Step to avoid long term contended hosts by downvoting them. type VMwareAvoidLongTermContendedHostsStep struct { // BaseStep is a helper struct that provides common functionality for all steps. - lib.BaseStep[api.ExternalSchedulerRequest, VMwareAvoidLongTermContendedHostsStepOpts] + lib.BaseWeigher[api.ExternalSchedulerRequest, VMwareAvoidLongTermContendedHostsStepOpts] +} + +// Initialize the step and validate that all required knowledges are ready. +func (s *VMwareAvoidLongTermContendedHostsStep) Init(ctx context.Context, client client.Client, weigher v1alpha1.WeigherSpec) error { + if err := s.BaseWeigher.Init(ctx, client, weigher); err != nil { + return err + } + if err := s.CheckKnowledges(ctx, corev1.ObjectReference{Name: "vmware-long-term-contended-hosts"}); err != nil { + return err + } + return nil } // Downvote hosts that are highly contended. -func (s *VMwareAvoidLongTermContendedHostsStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.StepResult, error) { - result := s.PrepareResult(request) +func (s *VMwareAvoidLongTermContendedHostsStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) if !request.VMware { slog.Debug("Skipping general purpose balancing step for non-VMware VM") diff --git a/internal/scheduling/nova/plugins/weighers/vmware_avoid_long_term_contended_hosts_test.go b/internal/scheduling/nova/plugins/weighers/vmware_avoid_long_term_contended_hosts_test.go new file mode 100644 index 000000000..052d39a1f --- /dev/null +++ b/internal/scheduling/nova/plugins/weighers/vmware_avoid_long_term_contended_hosts_test.go @@ -0,0 +1,252 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package weighers + +import ( + "context" + "log/slog" + "strings" + "testing" + + api "github.com/cobaltcore-dev/cortex/api/delegation/nova" + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestVMwareAvoidLongTermContendedHostsStepOpts_Validate(t *testing.T) { + tests := []struct { + name string + opts VMwareAvoidLongTermContendedHostsStepOpts + wantError bool + }{ + { + name: "valid opts with different bounds", + opts: VMwareAvoidLongTermContendedHostsStepOpts{ + AvgCPUContentionLowerBound: 0.0, + AvgCPUContentionUpperBound: 100.0, + AvgCPUContentionActivationLowerBound: 0.0, + AvgCPUContentionActivationUpperBound: -1.0, + MaxCPUContentionLowerBound: 0.0, + MaxCPUContentionUpperBound: 100.0, + MaxCPUContentionActivationLowerBound: 0.0, + MaxCPUContentionActivationUpperBound: -1.0, + }, + wantError: false, + }, + { + name: "invalid opts - equal avg bounds", + opts: VMwareAvoidLongTermContendedHostsStepOpts{ + AvgCPUContentionLowerBound: 50.0, + AvgCPUContentionUpperBound: 50.0, // Same as lower + AvgCPUContentionActivationLowerBound: 0.0, + AvgCPUContentionActivationUpperBound: -1.0, + MaxCPUContentionLowerBound: 0.0, + MaxCPUContentionUpperBound: 100.0, + MaxCPUContentionActivationLowerBound: 0.0, + MaxCPUContentionActivationUpperBound: -1.0, + }, + wantError: true, + }, + { + name: "invalid opts - equal max bounds", + opts: VMwareAvoidLongTermContendedHostsStepOpts{ + AvgCPUContentionLowerBound: 0.0, + AvgCPUContentionUpperBound: 100.0, + AvgCPUContentionActivationLowerBound: 0.0, + AvgCPUContentionActivationUpperBound: -1.0, + MaxCPUContentionLowerBound: 50.0, + MaxCPUContentionUpperBound: 50.0, // Same as lower + MaxCPUContentionActivationLowerBound: 0.0, + MaxCPUContentionActivationUpperBound: -1.0, + }, + wantError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.opts.Validate() + if (err != nil) != tt.wantError { + t.Errorf("Validate() error = %v, wantError %v", err, tt.wantError) + } + }) + } +} + +func TestVMwareAvoidLongTermContendedHostsStep_Init(t *testing.T) { + scheme, err := v1alpha1.SchemeBuilder.Build() + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + validParams := runtime.RawExtension{ + Raw: []byte(`{ + "avgCPUContentionLowerBound": 0, + "avgCPUContentionUpperBound": 100, + "avgCPUContentionActivationLowerBound": 0, + "avgCPUContentionActivationUpperBound": -1, + "maxCPUContentionLowerBound": 0, + "maxCPUContentionUpperBound": 100, + "maxCPUContentionActivationLowerBound": 0, + "maxCPUContentionActivationUpperBound": -1 + }`), + } + + tests := []struct { + name string + knowledge *v1alpha1.Knowledge + weigherSpec v1alpha1.WeigherSpec + wantError bool + errorContains string + }{ + { + name: "successful init with valid knowledge", + knowledge: &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{Name: "vmware-long-term-contended-hosts"}, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: metav1.ConditionTrue, + }, + }, + RawLength: 10, + }, + }, + weigherSpec: v1alpha1.WeigherSpec{ + Name: "vmware_avoid_long_term_contended_hosts", + Params: validParams, + }, + wantError: false, + }, + { + name: "fails when knowledge doesn't exist", + knowledge: nil, + weigherSpec: v1alpha1.WeigherSpec{ + Name: "vmware_avoid_long_term_contended_hosts", + Params: validParams, + }, + wantError: true, + errorContains: "failed to get knowledge", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + builder := fake.NewClientBuilder().WithScheme(scheme) + if tt.knowledge != nil { + builder = builder.WithObjects(tt.knowledge) + } + client := builder.Build() + + step := &VMwareAvoidLongTermContendedHostsStep{} + err := step.Init(context.Background(), client, tt.weigherSpec) + + if tt.wantError { + if err == nil { + t.Error("expected error, got nil") + } else if tt.errorContains != "" && !strings.Contains(err.Error(), tt.errorContains) { + t.Errorf("expected error containing %q, got %q", tt.errorContains, err.Error()) + } + } else { + if err != nil { + t.Errorf("unexpected error: %v", err) + } + } + }) + } +} + +func TestVMwareAvoidLongTermContendedHostsStep_Run(t *testing.T) { + scheme, err := v1alpha1.SchemeBuilder.Build() + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + vropsHostsystemContentionLongTerm, err := v1alpha1.BoxFeatureList([]any{ + &compute.VROpsHostsystemContentionLongTerm{ComputeHost: "host1", AvgCPUContention: 0.0, MaxCPUContention: 0.0}, + &compute.VROpsHostsystemContentionLongTerm{ComputeHost: "host2", AvgCPUContention: 100.0, MaxCPUContention: 0.0}, + &compute.VROpsHostsystemContentionLongTerm{ComputeHost: "host3", AvgCPUContention: 0.0, MaxCPUContention: 100.0}, + &compute.VROpsHostsystemContentionLongTerm{ComputeHost: "host4", AvgCPUContention: 100.0, MaxCPUContention: 100.0}, + }) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + // Create an instance of the step + step := &VMwareAvoidLongTermContendedHostsStep{} + step.Options.AvgCPUContentionLowerBound = 0 + step.Options.AvgCPUContentionUpperBound = 100 + step.Options.AvgCPUContentionActivationLowerBound = 0.0 + step.Options.AvgCPUContentionActivationUpperBound = -1.0 + step.Options.MaxCPUContentionLowerBound = 0 + step.Options.MaxCPUContentionUpperBound = 100 + step.Options.MaxCPUContentionActivationLowerBound = 0.0 + step.Options.MaxCPUContentionActivationUpperBound = -1.0 + step.Client = fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(&v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{Name: "vmware-long-term-contended-hosts"}, + Status: v1alpha1.KnowledgeStatus{Raw: vropsHostsystemContentionLongTerm}, + }). + Build() + + tests := []struct { + name string + request api.ExternalSchedulerRequest + expected map[string]float64 + }{ + { + name: "Avoid contended hosts", + request: api.ExternalSchedulerRequest{ + VMware: true, + Hosts: []api.ExternalSchedulerHost{ + {ComputeHost: "host1"}, + {ComputeHost: "host2"}, + {ComputeHost: "host3"}, + {ComputeHost: "host4"}, + }, + }, + expected: map[string]float64{ + "host1": 0, + "host2": -1, + "host3": -1, + "host4": -2, // Max and avg contention stack up. + }, + }, + { + name: "Missing data", + request: api.ExternalSchedulerRequest{ + VMware: true, + Hosts: []api.ExternalSchedulerHost{ + {ComputeHost: "host4"}, + {ComputeHost: "host5"}, + }, + }, + expected: map[string]float64{ + "host4": -2, + "host5": 0, // No data but still contained in the result. + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := step.Run(slog.Default(), tt.request) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + // Check that the weights have decreased + for host, weight := range result.Activations { + expected := tt.expected[host] + if weight != expected { + t.Errorf("expected weight for host %s to be %f, got %f", host, expected, weight) + } + } + }) + } +} diff --git a/internal/scheduling/decisions/nova/plugins/weighers/vmware_avoid_short_term_contended_hosts.go b/internal/scheduling/nova/plugins/weighers/vmware_avoid_short_term_contended_hosts.go similarity index 84% rename from internal/scheduling/decisions/nova/plugins/weighers/vmware_avoid_short_term_contended_hosts.go rename to internal/scheduling/nova/plugins/weighers/vmware_avoid_short_term_contended_hosts.go index f584765b6..21a55ced6 100644 --- a/internal/scheduling/decisions/nova/plugins/weighers/vmware_avoid_short_term_contended_hosts.go +++ b/internal/scheduling/nova/plugins/weighers/vmware_avoid_short_term_contended_hosts.go @@ -12,6 +12,7 @@ import ( "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -45,12 +46,23 @@ func (o VMwareAvoidShortTermContendedHostsStepOpts) Validate() error { // Step to avoid recently contended hosts by downvoting them. type VMwareAvoidShortTermContendedHostsStep struct { // BaseStep is a helper struct that provides common functionality for all steps. - lib.BaseStep[api.ExternalSchedulerRequest, VMwareAvoidShortTermContendedHostsStepOpts] + lib.BaseWeigher[api.ExternalSchedulerRequest, VMwareAvoidShortTermContendedHostsStepOpts] +} + +// Initialize the step and validate that all required knowledges are ready. +func (s *VMwareAvoidShortTermContendedHostsStep) Init(ctx context.Context, client client.Client, weigher v1alpha1.WeigherSpec) error { + if err := s.BaseWeigher.Init(ctx, client, weigher); err != nil { + return err + } + if err := s.CheckKnowledges(ctx, corev1.ObjectReference{Name: "vmware-short-term-contended-hosts"}); err != nil { + return err + } + return nil } // Downvote hosts that are highly contended. -func (s *VMwareAvoidShortTermContendedHostsStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.StepResult, error) { - result := s.PrepareResult(request) +func (s *VMwareAvoidShortTermContendedHostsStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) if !request.VMware { slog.Debug("Skipping general purpose balancing step for non-VMware VM") diff --git a/internal/scheduling/nova/plugins/weighers/vmware_avoid_short_term_contended_hosts_test.go b/internal/scheduling/nova/plugins/weighers/vmware_avoid_short_term_contended_hosts_test.go new file mode 100644 index 000000000..13bd1cbd6 --- /dev/null +++ b/internal/scheduling/nova/plugins/weighers/vmware_avoid_short_term_contended_hosts_test.go @@ -0,0 +1,252 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package weighers + +import ( + "context" + "log/slog" + "strings" + "testing" + + api "github.com/cobaltcore-dev/cortex/api/delegation/nova" + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestVMwareAvoidShortTermContendedHostsStepOpts_Validate(t *testing.T) { + tests := []struct { + name string + opts VMwareAvoidShortTermContendedHostsStepOpts + wantError bool + }{ + { + name: "valid opts with different bounds", + opts: VMwareAvoidShortTermContendedHostsStepOpts{ + AvgCPUContentionLowerBound: 0.0, + AvgCPUContentionUpperBound: 100.0, + AvgCPUContentionActivationLowerBound: 0.0, + AvgCPUContentionActivationUpperBound: -1.0, + MaxCPUContentionLowerBound: 0.0, + MaxCPUContentionUpperBound: 100.0, + MaxCPUContentionActivationLowerBound: 0.0, + MaxCPUContentionActivationUpperBound: -1.0, + }, + wantError: false, + }, + { + name: "invalid opts - equal avg bounds", + opts: VMwareAvoidShortTermContendedHostsStepOpts{ + AvgCPUContentionLowerBound: 50.0, + AvgCPUContentionUpperBound: 50.0, // Same as lower + AvgCPUContentionActivationLowerBound: 0.0, + AvgCPUContentionActivationUpperBound: -1.0, + MaxCPUContentionLowerBound: 0.0, + MaxCPUContentionUpperBound: 100.0, + MaxCPUContentionActivationLowerBound: 0.0, + MaxCPUContentionActivationUpperBound: -1.0, + }, + wantError: true, + }, + { + name: "invalid opts - equal max bounds", + opts: VMwareAvoidShortTermContendedHostsStepOpts{ + AvgCPUContentionLowerBound: 0.0, + AvgCPUContentionUpperBound: 100.0, + AvgCPUContentionActivationLowerBound: 0.0, + AvgCPUContentionActivationUpperBound: -1.0, + MaxCPUContentionLowerBound: 50.0, + MaxCPUContentionUpperBound: 50.0, // Same as lower + MaxCPUContentionActivationLowerBound: 0.0, + MaxCPUContentionActivationUpperBound: -1.0, + }, + wantError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.opts.Validate() + if (err != nil) != tt.wantError { + t.Errorf("Validate() error = %v, wantError %v", err, tt.wantError) + } + }) + } +} + +func TestVMwareAvoidShortTermContendedHostsStep_Init(t *testing.T) { + scheme, err := v1alpha1.SchemeBuilder.Build() + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + validParams := runtime.RawExtension{ + Raw: []byte(`{ + "avgCPUContentionLowerBound": 0, + "avgCPUContentionUpperBound": 100, + "avgCPUContentionActivationLowerBound": 0, + "avgCPUContentionActivationUpperBound": -1, + "maxCPUContentionLowerBound": 0, + "maxCPUContentionUpperBound": 100, + "maxCPUContentionActivationLowerBound": 0, + "maxCPUContentionActivationUpperBound": -1 + }`), + } + + tests := []struct { + name string + knowledge *v1alpha1.Knowledge + weigherSpec v1alpha1.WeigherSpec + wantError bool + errorContains string + }{ + { + name: "successful init with valid knowledge", + knowledge: &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{Name: "vmware-short-term-contended-hosts"}, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: metav1.ConditionTrue, + }, + }, + RawLength: 10, + }, + }, + weigherSpec: v1alpha1.WeigherSpec{ + Name: "vmware_avoid_short_term_contended_hosts", + Params: validParams, + }, + wantError: false, + }, + { + name: "fails when knowledge doesn't exist", + knowledge: nil, + weigherSpec: v1alpha1.WeigherSpec{ + Name: "vmware_avoid_short_term_contended_hosts", + Params: validParams, + }, + wantError: true, + errorContains: "failed to get knowledge", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + builder := fake.NewClientBuilder().WithScheme(scheme) + if tt.knowledge != nil { + builder = builder.WithObjects(tt.knowledge) + } + client := builder.Build() + + step := &VMwareAvoidShortTermContendedHostsStep{} + err := step.Init(context.Background(), client, tt.weigherSpec) + + if tt.wantError { + if err == nil { + t.Error("expected error, got nil") + } else if tt.errorContains != "" && !strings.Contains(err.Error(), tt.errorContains) { + t.Errorf("expected error containing %q, got %q", tt.errorContains, err.Error()) + } + } else { + if err != nil { + t.Errorf("unexpected error: %v", err) + } + } + }) + } +} + +func TestVMwareAvoidShortTermContendedHostsStep_Run(t *testing.T) { + scheme, err := v1alpha1.SchemeBuilder.Build() + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + vropsHostsystemContentionShortTerm, err := v1alpha1.BoxFeatureList([]any{ + &compute.VROpsHostsystemContentionShortTerm{ComputeHost: "host1", AvgCPUContention: 0.0, MaxCPUContention: 0.0}, + &compute.VROpsHostsystemContentionShortTerm{ComputeHost: "host2", AvgCPUContention: 100.0, MaxCPUContention: 0.0}, + &compute.VROpsHostsystemContentionShortTerm{ComputeHost: "host3", AvgCPUContention: 0.0, MaxCPUContention: 100.0}, + &compute.VROpsHostsystemContentionShortTerm{ComputeHost: "host4", AvgCPUContention: 100.0, MaxCPUContention: 100.0}, + }) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + // Create an instance of the step + step := &VMwareAvoidShortTermContendedHostsStep{} + step.Options.AvgCPUContentionLowerBound = 0 + step.Options.AvgCPUContentionUpperBound = 100 + step.Options.AvgCPUContentionActivationLowerBound = 0.0 + step.Options.AvgCPUContentionActivationUpperBound = -1.0 + step.Options.MaxCPUContentionLowerBound = 0 + step.Options.MaxCPUContentionUpperBound = 100 + step.Options.MaxCPUContentionActivationLowerBound = 0.0 + step.Options.MaxCPUContentionActivationUpperBound = -1.0 + step.Client = fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(&v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{Name: "vmware-short-term-contended-hosts"}, + Status: v1alpha1.KnowledgeStatus{Raw: vropsHostsystemContentionShortTerm}, + }). + Build() + + tests := []struct { + name string + request api.ExternalSchedulerRequest + expected map[string]float64 + }{ + { + name: "Avoid contended hosts", + request: api.ExternalSchedulerRequest{ + VMware: true, + Hosts: []api.ExternalSchedulerHost{ + {ComputeHost: "host1"}, + {ComputeHost: "host2"}, + {ComputeHost: "host3"}, + {ComputeHost: "host4"}, + }, + }, + expected: map[string]float64{ + "host1": 0, + "host2": -1, + "host3": -1, + "host4": -2, // Max and avg contention stack up. + }, + }, + { + name: "Missing data", + request: api.ExternalSchedulerRequest{ + VMware: true, + Hosts: []api.ExternalSchedulerHost{ + {ComputeHost: "host4"}, + {ComputeHost: "host5"}, // No data for host5 + }, + }, + expected: map[string]float64{ + "host4": -2, + "host5": 0, // No data but still contained in the result. + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := step.Run(slog.Default(), tt.request) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + // Check that the weights have decreased + for host, weight := range result.Activations { + expected := tt.expected[host] + if weight != expected { + t.Errorf("expected weight for host %s to be %f, got %f", host, expected, weight) + } + } + }) + } +} diff --git a/internal/scheduling/decisions/nova/plugins/weighers/vmware_general_purpose_balancing.go b/internal/scheduling/nova/plugins/weighers/vmware_general_purpose_balancing.go similarity index 82% rename from internal/scheduling/decisions/nova/plugins/weighers/vmware_general_purpose_balancing.go rename to internal/scheduling/nova/plugins/weighers/vmware_general_purpose_balancing.go index ef97caae5..db2086a86 100644 --- a/internal/scheduling/decisions/nova/plugins/weighers/vmware_general_purpose_balancing.go +++ b/internal/scheduling/nova/plugins/weighers/vmware_general_purpose_balancing.go @@ -12,7 +12,8 @@ import ( api "github.com/cobaltcore-dev/cortex/api/delegation/nova" "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" - scheduling "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -35,12 +36,26 @@ func (o VMwareGeneralPurposeBalancingStepOpts) Validate() error { // Step to balance VMs on hosts based on the host's available resources. type VMwareGeneralPurposeBalancingStep struct { // BaseStep is a helper struct that provides common functionality for all steps. - scheduling.BaseStep[api.ExternalSchedulerRequest, VMwareGeneralPurposeBalancingStepOpts] + lib.BaseWeigher[api.ExternalSchedulerRequest, VMwareGeneralPurposeBalancingStepOpts] +} + +// Initialize the step and validate that all required knowledges are ready. +func (s *VMwareGeneralPurposeBalancingStep) Init(ctx context.Context, client client.Client, weigher v1alpha1.WeigherSpec) error { + if err := s.BaseWeigher.Init(ctx, client, weigher); err != nil { + return err + } + if err := s.CheckKnowledges(ctx, + corev1.ObjectReference{Name: "host-utilization"}, + corev1.ObjectReference{Name: "host-capabilities"}, + ); err != nil { + return err + } + return nil } // Pack VMs on hosts based on their flavor. -func (s *VMwareGeneralPurposeBalancingStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*scheduling.StepResult, error) { - result := s.PrepareResult(request) +func (s *VMwareGeneralPurposeBalancingStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) // Don't execute the step for non-hana flavors. if strings.Contains(request.Spec.Data.Flavor.Data.Name, "hana") { slog.Debug("Skipping general purpose balancing step for HANA flavor", "flavor", request.Spec.Data.Flavor.Data.Name) @@ -74,7 +89,7 @@ func (s *VMwareGeneralPurposeBalancingStep) Run(traceLog *slog.Logger, request a result. Statistics["ram utilized"]. Subjects[hostUtilization.ComputeHost] = hostUtilization.RAMUtilizedPct - result.Activations[hostUtilization.ComputeHost] = scheduling.MinMaxScale( + result.Activations[hostUtilization.ComputeHost] = lib.MinMaxScale( hostUtilization.RAMUtilizedPct, s.Options.RAMUtilizedLowerBoundPct, s.Options.RAMUtilizedUpperBoundPct, diff --git a/internal/scheduling/decisions/nova/plugins/weighers/vmware_general_purpose_balancing_test.go b/internal/scheduling/nova/plugins/weighers/vmware_general_purpose_balancing_test.go similarity index 72% rename from internal/scheduling/decisions/nova/plugins/weighers/vmware_general_purpose_balancing_test.go rename to internal/scheduling/nova/plugins/weighers/vmware_general_purpose_balancing_test.go index 446a0ec9e..6a33c2762 100644 --- a/internal/scheduling/decisions/nova/plugins/weighers/vmware_general_purpose_balancing_test.go +++ b/internal/scheduling/nova/plugins/weighers/vmware_general_purpose_balancing_test.go @@ -4,13 +4,16 @@ package weighers import ( + "context" "log/slog" + "strings" "testing" api "github.com/cobaltcore-dev/cortex/api/delegation/nova" "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) @@ -48,6 +51,94 @@ func TestVMwareGeneralPurposeBalancingStepOpts_Validate(t *testing.T) { } } +func TestVMwareGeneralPurposeBalancingStep_Init(t *testing.T) { + scheme, err := v1alpha1.SchemeBuilder.Build() + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + validParams := runtime.RawExtension{ + Raw: []byte(`{ + "ramUtilizedLowerBoundPct": 20.0, + "ramUtilizedUpperBoundPct": 80.0, + "ramUtilizedActivationLowerBound": 0.0, + "ramUtilizedActivationUpperBound": 1.0 + }`), + } + + tests := []struct { + name string + knowledges []*v1alpha1.Knowledge + weigherSpec v1alpha1.WeigherSpec + wantError bool + errorContains string + }{ + { + name: "successful init with valid knowledges", + knowledges: []*v1alpha1.Knowledge{ + { + ObjectMeta: metav1.ObjectMeta{Name: "host-utilization"}, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []metav1.Condition{ + {Type: v1alpha1.KnowledgeConditionReady, Status: metav1.ConditionTrue}, + }, + RawLength: 10, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "host-capabilities"}, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []metav1.Condition{ + {Type: v1alpha1.KnowledgeConditionReady, Status: metav1.ConditionTrue}, + }, + RawLength: 10, + }, + }, + }, + weigherSpec: v1alpha1.WeigherSpec{ + Name: "vmware_general_purpose_balancing", + Params: validParams, + }, + wantError: false, + }, + { + name: "fails when host-utilization knowledge doesn't exist", + knowledges: nil, + weigherSpec: v1alpha1.WeigherSpec{ + Name: "vmware_general_purpose_balancing", + Params: validParams, + }, + wantError: true, + errorContains: "failed to get knowledge", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + builder := fake.NewClientBuilder().WithScheme(scheme) + for _, k := range tt.knowledges { + builder = builder.WithObjects(k) + } + client := builder.Build() + + step := &VMwareGeneralPurposeBalancingStep{} + err := step.Init(context.Background(), client, tt.weigherSpec) + + if tt.wantError { + if err == nil { + t.Error("expected error, got nil") + } else if tt.errorContains != "" && !strings.Contains(err.Error(), tt.errorContains) { + t.Errorf("expected error containing %q, got %q", tt.errorContains, err.Error()) + } + } else { + if err != nil { + t.Errorf("unexpected error: %v", err) + } + } + }) + } +} + func TestVMwareGeneralPurposeBalancingStep_Run(t *testing.T) { scheme, err := v1alpha1.SchemeBuilder.Build() if err != nil { @@ -105,11 +196,11 @@ func TestVMwareGeneralPurposeBalancingStep_Run(t *testing.T) { step.Client = fake.NewClientBuilder(). WithScheme(scheme). WithObjects(&v1alpha1.Knowledge{ - ObjectMeta: v1.ObjectMeta{Name: "host-utilization"}, + ObjectMeta: metav1.ObjectMeta{Name: "host-utilization"}, Status: v1alpha1.KnowledgeStatus{Raw: hostUtilizations}, }). WithObjects(&v1alpha1.Knowledge{ - ObjectMeta: v1.ObjectMeta{Name: "host-capabilities"}, + ObjectMeta: metav1.ObjectMeta{Name: "host-capabilities"}, Status: v1alpha1.KnowledgeStatus{Raw: hostCapabilities}, }). Build() diff --git a/internal/scheduling/decisions/nova/plugins/weighers/vmware_hana_binpacking.go b/internal/scheduling/nova/plugins/weighers/vmware_hana_binpacking.go similarity index 85% rename from internal/scheduling/decisions/nova/plugins/weighers/vmware_hana_binpacking.go rename to internal/scheduling/nova/plugins/weighers/vmware_hana_binpacking.go index 962b639c7..704d66ba6 100644 --- a/internal/scheduling/decisions/nova/plugins/weighers/vmware_hana_binpacking.go +++ b/internal/scheduling/nova/plugins/weighers/vmware_hana_binpacking.go @@ -12,7 +12,8 @@ import ( api "github.com/cobaltcore-dev/cortex/api/delegation/nova" "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" - scheduling "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -35,12 +36,26 @@ func (o VMwareHanaBinpackingStepOpts) Validate() error { // Step to balance VMs on hosts based on the host's available resources. type VMwareHanaBinpackingStep struct { // BaseStep is a helper struct that provides common functionality for all steps. - scheduling.BaseStep[api.ExternalSchedulerRequest, VMwareHanaBinpackingStepOpts] + lib.BaseWeigher[api.ExternalSchedulerRequest, VMwareHanaBinpackingStepOpts] +} + +// Initialize the step and validate that all required knowledges are ready. +func (s *VMwareHanaBinpackingStep) Init(ctx context.Context, client client.Client, weigher v1alpha1.WeigherSpec) error { + if err := s.BaseWeigher.Init(ctx, client, weigher); err != nil { + return err + } + if err := s.CheckKnowledges(ctx, + corev1.ObjectReference{Name: "host-utilization"}, + corev1.ObjectReference{Name: "host-capabilities"}, + ); err != nil { + return err + } + return nil } // Pack VMs on hosts based on their flavor. -func (s *VMwareHanaBinpackingStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*scheduling.StepResult, error) { - result := s.PrepareResult(request) +func (s *VMwareHanaBinpackingStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) // Don't execute the step for non-hana flavors. if !strings.Contains(request.Spec.Data.Flavor.Data.Name, "hana") { slog.Debug("Skipping hana binpacking step for non-HANA flavor", "flavor", request.Spec.Data.Flavor.Data.Name) @@ -124,7 +139,7 @@ func (s *VMwareHanaBinpackingStep) Run(traceLog *slog.Logger, request api.Extern if after < s.Options.RAMUtilizedAfterLowerBoundPct || after > s.Options.RAMUtilizedAfterUpperBoundPct { result.Activations[hostUtilization.ComputeHost] = s.NoEffect() } else { - result.Activations[hostUtilization.ComputeHost] = scheduling.MinMaxScale( + result.Activations[hostUtilization.ComputeHost] = lib.MinMaxScale( after, s.Options.RAMUtilizedAfterLowerBoundPct, s.Options.RAMUtilizedAfterUpperBoundPct, diff --git a/internal/scheduling/decisions/nova/plugins/weighers/vmware_hana_binpacking_test.go b/internal/scheduling/nova/plugins/weighers/vmware_hana_binpacking_test.go similarity index 70% rename from internal/scheduling/decisions/nova/plugins/weighers/vmware_hana_binpacking_test.go rename to internal/scheduling/nova/plugins/weighers/vmware_hana_binpacking_test.go index 15066c044..72612a05b 100644 --- a/internal/scheduling/decisions/nova/plugins/weighers/vmware_hana_binpacking_test.go +++ b/internal/scheduling/nova/plugins/weighers/vmware_hana_binpacking_test.go @@ -4,13 +4,16 @@ package weighers import ( + "context" "log/slog" + "strings" "testing" api "github.com/cobaltcore-dev/cortex/api/delegation/nova" "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) @@ -48,6 +51,94 @@ func TestVMwareHanaBinpackingStepOpts_Validate(t *testing.T) { } } +func TestVMwareHanaBinpackingStep_Init(t *testing.T) { + scheme, err := v1alpha1.SchemeBuilder.Build() + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + validParams := runtime.RawExtension{ + Raw: []byte(`{ + "ramUtilizedAfterLowerBoundPct": 30.0, + "ramUtilizedAfterUpperBoundPct": 80.0, + "ramUtilizedAfterActivationLowerBound": 0.0, + "ramUtilizedAfterActivationUpperBound": 1.0 + }`), + } + + tests := []struct { + name string + knowledges []*v1alpha1.Knowledge + weigherSpec v1alpha1.WeigherSpec + wantError bool + errorContains string + }{ + { + name: "successful init with valid knowledges", + knowledges: []*v1alpha1.Knowledge{ + { + ObjectMeta: metav1.ObjectMeta{Name: "host-utilization"}, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []metav1.Condition{ + {Type: v1alpha1.KnowledgeConditionReady, Status: metav1.ConditionTrue}, + }, + RawLength: 10, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "host-capabilities"}, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []metav1.Condition{ + {Type: v1alpha1.KnowledgeConditionReady, Status: metav1.ConditionTrue}, + }, + RawLength: 10, + }, + }, + }, + weigherSpec: v1alpha1.WeigherSpec{ + Name: "vmware_hana_binpacking", + Params: validParams, + }, + wantError: false, + }, + { + name: "fails when host-utilization knowledge doesn't exist", + knowledges: nil, + weigherSpec: v1alpha1.WeigherSpec{ + Name: "vmware_hana_binpacking", + Params: validParams, + }, + wantError: true, + errorContains: "failed to get knowledge", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + builder := fake.NewClientBuilder().WithScheme(scheme) + for _, k := range tt.knowledges { + builder = builder.WithObjects(k) + } + client := builder.Build() + + step := &VMwareHanaBinpackingStep{} + err := step.Init(context.Background(), client, tt.weigherSpec) + + if tt.wantError { + if err == nil { + t.Error("expected error, got nil") + } else if tt.errorContains != "" && !strings.Contains(err.Error(), tt.errorContains) { + t.Errorf("expected error containing %q, got %q", tt.errorContains, err.Error()) + } + } else { + if err != nil { + t.Errorf("unexpected error: %v", err) + } + } + }) + } +} + func TestVMwareHanaBinpackingStep_Run(t *testing.T) { scheme, err := v1alpha1.SchemeBuilder.Build() if err != nil { @@ -102,11 +193,11 @@ func TestVMwareHanaBinpackingStep_Run(t *testing.T) { step.Client = fake.NewClientBuilder(). WithScheme(scheme). WithObjects(&v1alpha1.Knowledge{ - ObjectMeta: v1.ObjectMeta{Name: "host-utilization"}, + ObjectMeta: metav1.ObjectMeta{Name: "host-utilization"}, Status: v1alpha1.KnowledgeStatus{Raw: hostUtilizations}, }). WithObjects(&v1alpha1.Knowledge{ - ObjectMeta: v1.ObjectMeta{Name: "host-capabilities"}, + ObjectMeta: metav1.ObjectMeta{Name: "host-capabilities"}, Status: v1alpha1.KnowledgeStatus{Raw: hostCapabilities}, }). Build() diff --git a/internal/scheduling/nova/supported_detectors.go b/internal/scheduling/nova/supported_detectors.go new file mode 100644 index 000000000..680437962 --- /dev/null +++ b/internal/scheduling/nova/supported_detectors.go @@ -0,0 +1,16 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package nova + +import ( + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/nova/plugins" + "github.com/cobaltcore-dev/cortex/internal/scheduling/nova/plugins/detectors" +) + +// Configuration of steps supported by the descheduler. +// The steps actually used by the scheduler are defined through the configuration file. +var supportedDetectors = map[string]lib.Detector[plugins.VMDetection]{ + "avoid_high_steal_pct": &detectors.AvoidHighStealPctStep{}, +} diff --git a/internal/scheduling/nova/supported_filters.go b/internal/scheduling/nova/supported_filters.go new file mode 100644 index 000000000..2b5554b1f --- /dev/null +++ b/internal/scheduling/nova/supported_filters.go @@ -0,0 +1,31 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package nova + +import ( + api "github.com/cobaltcore-dev/cortex/api/delegation/nova" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/nova/plugins/filters" +) + +type NovaFilter = lib.Filter[api.ExternalSchedulerRequest] + +// Configuration of filters supported by the nova scheduler. +var supportedFilters = map[string]func() NovaFilter{ + "filter_has_accelerators": func() NovaFilter { return &filters.FilterHasAcceleratorsStep{} }, + "filter_correct_az": func() NovaFilter { return &filters.FilterCorrectAZStep{} }, + "filter_status_conditions": func() NovaFilter { return &filters.FilterStatusConditionsStep{} }, + "filter_maintenance": func() NovaFilter { return &filters.FilterMaintenanceStep{} }, + "filter_packed_virtqueue": func() NovaFilter { return &filters.FilterPackedVirtqueueStep{} }, + "filter_external_customer": func() NovaFilter { return &filters.FilterExternalCustomerStep{} }, + "filter_allowed_projects": func() NovaFilter { return &filters.FilterAllowedProjectsStep{} }, + "filter_capabilities": func() NovaFilter { return &filters.FilterCapabilitiesStep{} }, + "filter_has_requested_traits": func() NovaFilter { return &filters.FilterHasRequestedTraits{} }, + "filter_has_enough_capacity": func() NovaFilter { return &filters.FilterHasEnoughCapacity{} }, + "filter_host_instructions": func() NovaFilter { return &filters.FilterHostInstructionsStep{} }, + "filter_instance_group_affinity": func() NovaFilter { return &filters.FilterInstanceGroupAffinityStep{} }, + "filter_instance_group_anti_affinity": func() NovaFilter { return &filters.FilterInstanceGroupAntiAffinityStep{} }, + "filter_live_migratable": func() NovaFilter { return &filters.FilterLiveMigratableStep{} }, + "filter_requested_destination": func() NovaFilter { return &filters.FilterRequestedDestinationStep{} }, +} diff --git a/internal/scheduling/nova/supported_weighers.go b/internal/scheduling/nova/supported_weighers.go new file mode 100644 index 000000000..11bfb28eb --- /dev/null +++ b/internal/scheduling/nova/supported_weighers.go @@ -0,0 +1,21 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package nova + +import ( + api "github.com/cobaltcore-dev/cortex/api/delegation/nova" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/nova/plugins/weighers" +) + +type NovaWeigher = lib.Weigher[api.ExternalSchedulerRequest] + +// Configuration of weighers supported by the nova scheduler. +var supportedWeighers = map[string]func() NovaWeigher{ + "vmware_anti_affinity_noisy_projects": func() NovaWeigher { return &weighers.VMwareAntiAffinityNoisyProjectsStep{} }, + "vmware_avoid_long_term_contended_hosts": func() NovaWeigher { return &weighers.VMwareAvoidLongTermContendedHostsStep{} }, + "vmware_avoid_short_term_contended_hosts": func() NovaWeigher { return &weighers.VMwareAvoidShortTermContendedHostsStep{} }, + "vmware_hana_binpacking": func() NovaWeigher { return &weighers.VMwareHanaBinpackingStep{} }, + "vmware_general_purpose_balancing": func() NovaWeigher { return &weighers.VMwareGeneralPurposeBalancingStep{} }, +} diff --git a/internal/scheduling/decisions/pods/pipeline_controller.go b/internal/scheduling/pods/filter_weigher_pipeline_controller.go similarity index 89% rename from internal/scheduling/decisions/pods/pipeline_controller.go rename to internal/scheduling/pods/filter_weigher_pipeline_controller.go index 63a143d2c..888fcabcc 100644 --- a/internal/scheduling/decisions/pods/pipeline_controller.go +++ b/internal/scheduling/pods/filter_weigher_pipeline_controller.go @@ -36,9 +36,9 @@ import ( // // Additionally, the controller watches for pipeline and step changes to // reconfigure the pipelines as needed. -type DecisionPipelineController struct { +type FilterWeigherPipelineController struct { // Toolbox shared between all pipeline controllers. - lib.BasePipelineController[lib.Pipeline[pods.PodPipelineRequest]] + lib.BasePipelineController[lib.FilterWeigherPipeline[pods.PodPipelineRequest]] // Mutex to only allow one process at a time processMu sync.Mutex @@ -46,15 +46,15 @@ type DecisionPipelineController struct { // Config for the scheduling operator. Conf conf.Config // Monitor to pass down to all pipelines. - Monitor lib.PipelineMonitor + Monitor lib.FilterWeigherPipelineMonitor } // The type of pipeline this controller manages. -func (c *DecisionPipelineController) PipelineType() v1alpha1.PipelineType { +func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { return v1alpha1.PipelineTypeFilterWeigher } -func (c *DecisionPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { c.processMu.Lock() defer c.processMu.Unlock() @@ -74,7 +74,7 @@ func (c *DecisionPipelineController) Reconcile(ctx context.Context, req ctrl.Req return ctrl.Result{}, nil } -func (c *DecisionPipelineController) ProcessNewPod(ctx context.Context, pod *corev1.Pod) error { +func (c *FilterWeigherPipelineController) ProcessNewPod(ctx context.Context, pod *corev1.Pod) error { c.processMu.Lock() defer c.processMu.Unlock() @@ -131,7 +131,7 @@ func (c *DecisionPipelineController) ProcessNewPod(ctx context.Context, pod *cor return err } -func (c *DecisionPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { +func (c *FilterWeigherPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { log := ctrl.LoggerFrom(ctx) startedAt := time.Now() // So we can measure sync duration. @@ -194,15 +194,20 @@ func (c *DecisionPipelineController) process(ctx context.Context, decision *v1al } // The base controller will delegate the pipeline creation down to this method. -func (c *DecisionPipelineController) InitPipeline( +func (c *FilterWeigherPipelineController) InitPipeline( ctx context.Context, p v1alpha1.Pipeline, -) (lib.Pipeline[pods.PodPipelineRequest], error) { +) lib.PipelineInitResult[lib.FilterWeigherPipeline[pods.PodPipelineRequest]] { - return lib.NewPipeline(ctx, c.Client, p.Name, supportedSteps, p.Spec.Steps, c.Monitor) + return lib.InitNewFilterWeigherPipeline( + ctx, c.Client, p.Name, + supportedFilters, p.Spec.Filters, + supportedWeighers, p.Spec.Weighers, + c.Monitor, + ) } -func (c *DecisionPipelineController) handlePod() handler.EventHandler { +func (c *FilterWeigherPipelineController) handlePod() handler.EventHandler { return handler.Funcs{ CreateFunc: func(ctx context.Context, evt event.CreateEvent, queue workqueue.TypedRateLimitingInterface[reconcile.Request]) { pod := evt.Object.(*corev1.Pod) @@ -242,7 +247,7 @@ func (c *DecisionPipelineController) handlePod() handler.EventHandler { } } -func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { +func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainPods if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { diff --git a/internal/scheduling/decisions/pods/pipeline_controller_test.go b/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go similarity index 82% rename from internal/scheduling/decisions/pods/pipeline_controller_test.go rename to internal/scheduling/pods/filter_weigher_pipeline_controller_test.go index 0c57fd1e3..9e4fffed1 100644 --- a/internal/scheduling/decisions/pods/pipeline_controller_test.go +++ b/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go @@ -20,7 +20,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" ) -func TestDecisionPipelineController_Reconcile(t *testing.T) { +func TestFilterWeigherPipelineController_Reconcile(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add scheduling scheme: %v", err) @@ -117,16 +117,16 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { WithStatusSubresource(&v1alpha1.Decision{}). Build() - controller := &DecisionPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.Pipeline[pods.PodPipelineRequest]]{ - Pipelines: map[string]lib.Pipeline[pods.PodPipelineRequest]{ + controller := &FilterWeigherPipelineController{ + BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[pods.PodPipelineRequest]]{ + Pipelines: map[string]lib.FilterWeigherPipeline[pods.PodPipelineRequest]{ "pods-scheduler": createMockPodPipeline(), }, }, Conf: conf.Config{ SchedulingDomain: v1alpha1.SchedulingDomainPods, }, - Monitor: lib.PipelineMonitor{}, + Monitor: lib.FilterWeigherPipelineMonitor{}, } controller.Client = client @@ -179,72 +179,77 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { } } -func TestDecisionPipelineController_InitPipeline(t *testing.T) { - controller := &DecisionPipelineController{ - Monitor: lib.PipelineMonitor{}, +func TestFilterWeigherPipelineController_InitPipeline(t *testing.T) { + controller := &FilterWeigherPipelineController{ + Monitor: lib.FilterWeigherPipelineMonitor{}, } tests := []struct { - name string - steps []v1alpha1.StepSpec - expectError bool + name string + filters []v1alpha1.FilterSpec + weighers []v1alpha1.WeigherSpec + expectNonCriticalError bool + expectCriticalError bool }{ { - name: "empty steps", - steps: []v1alpha1.StepSpec{}, - expectError: false, + name: "empty steps", + filters: []v1alpha1.FilterSpec{}, + weighers: []v1alpha1.WeigherSpec{}, + expectNonCriticalError: false, + expectCriticalError: false, }, { name: "noop step", - steps: []v1alpha1.StepSpec{ + filters: []v1alpha1.FilterSpec{ { - Impl: "noop", - Type: v1alpha1.StepTypeFilter, + Name: "noop", }, }, - expectError: false, + expectNonCriticalError: false, + expectCriticalError: false, }, { name: "unsupported step", - steps: []v1alpha1.StepSpec{ + filters: []v1alpha1.FilterSpec{ { - Impl: "unsupported", - Type: v1alpha1.StepTypeFilter, + Name: "unsupported", }, }, - expectError: true, + expectNonCriticalError: false, + expectCriticalError: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + initResult := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pipeline", }, Spec: v1alpha1.PipelineSpec{ - Steps: tt.steps, + Filters: tt.filters, + Weighers: tt.weighers, }, }) - if tt.expectError && err == nil { - t.Error("expected error but got none") - return + if tt.expectCriticalError && len(initResult.FilterErrors) == 0 { + t.Error("expected critical error but got none") } - - if !tt.expectError && err != nil { - t.Errorf("expected no error, got: %v", err) - return + if !tt.expectCriticalError && len(initResult.FilterErrors) > 0 { + t.Errorf("unexpected critical error: %v", initResult.FilterErrors) } - if !tt.expectError && pipeline == nil { - t.Error("expected pipeline to be non-nil") + if tt.expectNonCriticalError && len(initResult.WeigherErrors) == 0 { + t.Error("expected non-critical error but got none") + } + if !tt.expectNonCriticalError && len(initResult.WeigherErrors) > 0 { + t.Errorf("unexpected non-critical error: %v", initResult.WeigherErrors) } }) } } -func TestDecisionPipelineController_ProcessNewPod(t *testing.T) { +func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add scheduling scheme: %v", err) @@ -291,7 +296,8 @@ func TestDecisionPipelineController_ProcessNewPod(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainPods, CreateDecisions: true, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, createDecisions: true, @@ -324,7 +330,8 @@ func TestDecisionPipelineController_ProcessNewPod(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainPods, CreateDecisions: false, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, createDecisions: false, @@ -370,7 +377,8 @@ func TestDecisionPipelineController_ProcessNewPod(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainPods, CreateDecisions: true, - Steps: []v1alpha1.StepSpec{}, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, createDecisions: true, @@ -396,15 +404,15 @@ func TestDecisionPipelineController_ProcessNewPod(t *testing.T) { WithStatusSubresource(&v1alpha1.Decision{}). Build() - controller := &DecisionPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.Pipeline[pods.PodPipelineRequest]]{ - Pipelines: map[string]lib.Pipeline[pods.PodPipelineRequest]{}, + controller := &FilterWeigherPipelineController{ + BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[pods.PodPipelineRequest]]{ + Pipelines: map[string]lib.FilterWeigherPipeline[pods.PodPipelineRequest]{}, PipelineConfigs: map[string]v1alpha1.Pipeline{}, }, Conf: conf.Config{ SchedulingDomain: v1alpha1.SchedulingDomainPods, }, - Monitor: lib.PipelineMonitor{}, + Monitor: lib.FilterWeigherPipelineMonitor{}, } controller.Client = client @@ -516,7 +524,7 @@ func TestDecisionPipelineController_ProcessNewPod(t *testing.T) { } // Helper function to create a mock pipeline that works with the pod types -func createMockPodPipeline() lib.Pipeline[pods.PodPipelineRequest] { +func createMockPodPipeline() lib.FilterWeigherPipeline[pods.PodPipelineRequest] { return &mockPodPipeline{} } diff --git a/internal/scheduling/decisions/pods/helpers/resources.go b/internal/scheduling/pods/helpers/resources.go similarity index 100% rename from internal/scheduling/decisions/pods/helpers/resources.go rename to internal/scheduling/pods/helpers/resources.go diff --git a/internal/scheduling/decisions/pods/helpers/resources_test.go b/internal/scheduling/pods/helpers/resources_test.go similarity index 100% rename from internal/scheduling/decisions/pods/helpers/resources_test.go rename to internal/scheduling/pods/helpers/resources_test.go diff --git a/internal/scheduling/decisions/pods/plugins/filters/filter_node_affinity.go b/internal/scheduling/pods/plugins/filters/filter_node_affinity.go similarity index 90% rename from internal/scheduling/decisions/pods/plugins/filters/filter_node_affinity.go rename to internal/scheduling/pods/plugins/filters/filter_node_affinity.go index 265bffa24..c378fe6c5 100644 --- a/internal/scheduling/decisions/pods/plugins/filters/filter_node_affinity.go +++ b/internal/scheduling/pods/plugins/filters/filter_node_affinity.go @@ -19,13 +19,13 @@ type NodeAffinityFilter struct { Alias string } -func (f *NodeAffinityFilter) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { +func (f *NodeAffinityFilter) Init(ctx context.Context, client client.Client, step v1alpha1.FilterSpec) error { return nil } -func (NodeAffinityFilter) Run(traceLog *slog.Logger, request pods.PodPipelineRequest) (*lib.StepResult, error) { +func (NodeAffinityFilter) Run(traceLog *slog.Logger, request pods.PodPipelineRequest) (*lib.FilterWeigherPipelineStepResult, error) { activations := make(map[string]float64) - stats := make(map[string]lib.StepStatistics) + stats := make(map[string]lib.FilterWeigherPipelineStepStatistics) for _, node := range request.Nodes { if matchesNodeAffinity(node, request.Pod) { @@ -33,7 +33,7 @@ func (NodeAffinityFilter) Run(traceLog *slog.Logger, request pods.PodPipelineReq } } - return &lib.StepResult{Activations: activations, Statistics: stats}, nil + return &lib.FilterWeigherPipelineStepResult{Activations: activations, Statistics: stats}, nil } func matchesNodeAffinity(node corev1.Node, pod corev1.Pod) bool { diff --git a/internal/scheduling/decisions/pods/plugins/filters/filter_node_affinity_test.go b/internal/scheduling/pods/plugins/filters/filter_node_affinity_test.go similarity index 97% rename from internal/scheduling/decisions/pods/plugins/filters/filter_node_affinity_test.go rename to internal/scheduling/pods/plugins/filters/filter_node_affinity_test.go index f8f08da3a..93070523b 100644 --- a/internal/scheduling/decisions/pods/plugins/filters/filter_node_affinity_test.go +++ b/internal/scheduling/pods/plugins/filters/filter_node_affinity_test.go @@ -8,10 +8,30 @@ import ( "testing" "github.com/cobaltcore-dev/cortex/api/delegation/pods" + "github.com/cobaltcore-dev/cortex/api/v1alpha1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" ) +func TestNodeAffinityFilter_Init(t *testing.T) { + filter := &NodeAffinityFilter{} + scheme := runtime.NewScheme() + cl := fake.NewClientBuilder().WithScheme(scheme).Build() + + err := filter.Init(t.Context(), cl, v1alpha1.FilterSpec{ + Name: "node-affinity", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }) + + if err != nil { + t.Errorf("expected no error, got %v", err) + } +} + func TestNodeAffinityFilter_Run(t *testing.T) { tests := []struct { name string diff --git a/internal/scheduling/decisions/pods/plugins/filters/filter_node_available.go b/internal/scheduling/pods/plugins/filters/filter_node_available.go similarity index 83% rename from internal/scheduling/decisions/pods/plugins/filters/filter_node_available.go rename to internal/scheduling/pods/plugins/filters/filter_node_available.go index 45ae98067..02aca3896 100644 --- a/internal/scheduling/decisions/pods/plugins/filters/filter_node_available.go +++ b/internal/scheduling/pods/plugins/filters/filter_node_available.go @@ -18,13 +18,13 @@ type NodeAvailableFilter struct { Alias string } -func (f *NodeAvailableFilter) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { +func (f *NodeAvailableFilter) Init(ctx context.Context, client client.Client, step v1alpha1.FilterSpec) error { return nil } -func (NodeAvailableFilter) Run(traceLog *slog.Logger, request pods.PodPipelineRequest) (*lib.StepResult, error) { +func (NodeAvailableFilter) Run(traceLog *slog.Logger, request pods.PodPipelineRequest) (*lib.FilterWeigherPipelineStepResult, error) { activations := make(map[string]float64) - stats := make(map[string]lib.StepStatistics) + stats := make(map[string]lib.FilterWeigherPipelineStepStatistics) for _, node := range request.Nodes { if isNodeHealthy(node) && isNodeSchedulable(node) { @@ -32,7 +32,7 @@ func (NodeAvailableFilter) Run(traceLog *slog.Logger, request pods.PodPipelineRe } } - return &lib.StepResult{Activations: activations, Statistics: stats}, nil + return &lib.FilterWeigherPipelineStepResult{Activations: activations, Statistics: stats}, nil } func isNodeHealthy(node corev1.Node) bool { diff --git a/internal/scheduling/decisions/pods/plugins/filters/filter_node_available_test.go b/internal/scheduling/pods/plugins/filters/filter_node_available_test.go similarity index 95% rename from internal/scheduling/decisions/pods/plugins/filters/filter_node_available_test.go rename to internal/scheduling/pods/plugins/filters/filter_node_available_test.go index 3649da5de..0dba64bf0 100644 --- a/internal/scheduling/decisions/pods/plugins/filters/filter_node_available_test.go +++ b/internal/scheduling/pods/plugins/filters/filter_node_available_test.go @@ -8,10 +8,30 @@ import ( "testing" "github.com/cobaltcore-dev/cortex/api/delegation/pods" + "github.com/cobaltcore-dev/cortex/api/v1alpha1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" ) +func TestNodeAvailableFilter_Init(t *testing.T) { + filter := &NodeAvailableFilter{} + scheme := runtime.NewScheme() + cl := fake.NewClientBuilder().WithScheme(scheme).Build() + + err := filter.Init(t.Context(), cl, v1alpha1.FilterSpec{ + Name: "node-available", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }) + + if err != nil { + t.Errorf("expected no error, got %v", err) + } +} + func TestNodeAvailableFilter_Run(t *testing.T) { tests := []struct { name string diff --git a/internal/scheduling/decisions/pods/plugins/filters/filter_node_capacity.go b/internal/scheduling/pods/plugins/filters/filter_node_capacity.go similarity index 76% rename from internal/scheduling/decisions/pods/plugins/filters/filter_node_capacity.go rename to internal/scheduling/pods/plugins/filters/filter_node_capacity.go index 44d185580..f148aaecf 100644 --- a/internal/scheduling/decisions/pods/plugins/filters/filter_node_capacity.go +++ b/internal/scheduling/pods/plugins/filters/filter_node_capacity.go @@ -9,8 +9,8 @@ import ( "github.com/cobaltcore-dev/cortex/api/delegation/pods" "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/internal/scheduling/decisions/pods/helpers" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/pods/helpers" corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -19,13 +19,13 @@ type NodeCapacityFilter struct { Alias string } -func (f *NodeCapacityFilter) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { +func (f *NodeCapacityFilter) Init(ctx context.Context, client client.Client, step v1alpha1.FilterSpec) error { return nil } -func (NodeCapacityFilter) Run(traceLog *slog.Logger, request pods.PodPipelineRequest) (*lib.StepResult, error) { +func (NodeCapacityFilter) Run(traceLog *slog.Logger, request pods.PodPipelineRequest) (*lib.FilterWeigherPipelineStepResult, error) { activations := make(map[string]float64) - stats := make(map[string]lib.StepStatistics) + stats := make(map[string]lib.FilterWeigherPipelineStepStatistics) podRequests := helpers.GetPodResourceRequests(request.Pod) @@ -35,7 +35,7 @@ func (NodeCapacityFilter) Run(traceLog *slog.Logger, request pods.PodPipelineReq } } - return &lib.StepResult{Activations: activations, Statistics: stats}, nil + return &lib.FilterWeigherPipelineStepResult{Activations: activations, Statistics: stats}, nil } func hasCapacityForPod(node corev1.Node, podRequests corev1.ResourceList) bool { diff --git a/internal/scheduling/decisions/pods/plugins/filters/filter_node_capacity_test.go b/internal/scheduling/pods/plugins/filters/filter_node_capacity_test.go similarity index 96% rename from internal/scheduling/decisions/pods/plugins/filters/filter_node_capacity_test.go rename to internal/scheduling/pods/plugins/filters/filter_node_capacity_test.go index 790459deb..04f2d2085 100644 --- a/internal/scheduling/decisions/pods/plugins/filters/filter_node_capacity_test.go +++ b/internal/scheduling/pods/plugins/filters/filter_node_capacity_test.go @@ -8,11 +8,31 @@ import ( "testing" "github.com/cobaltcore-dev/cortex/api/delegation/pods" + "github.com/cobaltcore-dev/cortex/api/v1alpha1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" ) +func TestNodeCapacityFilter_Init(t *testing.T) { + filter := &NodeCapacityFilter{} + scheme := runtime.NewScheme() + cl := fake.NewClientBuilder().WithScheme(scheme).Build() + + err := filter.Init(t.Context(), cl, v1alpha1.FilterSpec{ + Name: "node-capacity", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }) + + if err != nil { + t.Errorf("expected no error, got %v", err) + } +} + func TestNodeCapacityFilter_Run(t *testing.T) { tests := []struct { name string diff --git a/internal/scheduling/decisions/pods/plugins/filters/filter_noop.go b/internal/scheduling/pods/plugins/filters/filter_noop.go similarity index 80% rename from internal/scheduling/decisions/pods/plugins/filters/filter_noop.go rename to internal/scheduling/pods/plugins/filters/filter_noop.go index 3cd328a50..006c2d868 100644 --- a/internal/scheduling/decisions/pods/plugins/filters/filter_noop.go +++ b/internal/scheduling/pods/plugins/filters/filter_noop.go @@ -18,7 +18,7 @@ type NoopFilter struct { Alias string } -func (f *NoopFilter) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { +func (f *NoopFilter) Init(ctx context.Context, client client.Client, step v1alpha1.FilterSpec) error { return nil } @@ -27,12 +27,12 @@ func (f *NoopFilter) Init(ctx context.Context, client client.Client, step v1alph // not in the map are considered as filtered out. // Provide a traceLog that contains the global request id and should // be used to log the step's execution. -func (NoopFilter) Run(traceLog *slog.Logger, request pods.PodPipelineRequest) (*lib.StepResult, error) { +func (NoopFilter) Run(traceLog *slog.Logger, request pods.PodPipelineRequest) (*lib.FilterWeigherPipelineStepResult, error) { activations := make(map[string]float64, len(request.Nodes)) - stats := make(map[string]lib.StepStatistics) + stats := make(map[string]lib.FilterWeigherPipelineStepStatistics) // Usually you would do some filtering here, or adjust the weights. for _, node := range request.Nodes { activations[node.Name] = 0.0 } - return &lib.StepResult{Activations: activations, Statistics: stats}, nil + return &lib.FilterWeigherPipelineStepResult{Activations: activations, Statistics: stats}, nil } diff --git a/internal/scheduling/decisions/pods/plugins/filters/filter_noop_test.go b/internal/scheduling/pods/plugins/filters/filter_noop_test.go similarity index 81% rename from internal/scheduling/decisions/pods/plugins/filters/filter_noop_test.go rename to internal/scheduling/pods/plugins/filters/filter_noop_test.go index e42ae9f23..de3396b56 100644 --- a/internal/scheduling/decisions/pods/plugins/filters/filter_noop_test.go +++ b/internal/scheduling/pods/plugins/filters/filter_noop_test.go @@ -8,10 +8,30 @@ import ( "testing" "github.com/cobaltcore-dev/cortex/api/delegation/pods" + "github.com/cobaltcore-dev/cortex/api/v1alpha1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" ) +func TestNoopFilter_Init(t *testing.T) { + filter := &NoopFilter{} + scheme := runtime.NewScheme() + cl := fake.NewClientBuilder().WithScheme(scheme).Build() + + err := filter.Init(t.Context(), cl, v1alpha1.FilterSpec{ + Name: "noop", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }) + + if err != nil { + t.Errorf("expected no error, got %v", err) + } +} + func TestNoopFilter_Run(t *testing.T) { tests := []struct { name string diff --git a/internal/scheduling/decisions/pods/plugins/filters/filter_taint.go b/internal/scheduling/pods/plugins/filters/filter_taint.go similarity index 83% rename from internal/scheduling/decisions/pods/plugins/filters/filter_taint.go rename to internal/scheduling/pods/plugins/filters/filter_taint.go index 82135b161..d02af1849 100644 --- a/internal/scheduling/decisions/pods/plugins/filters/filter_taint.go +++ b/internal/scheduling/pods/plugins/filters/filter_taint.go @@ -18,13 +18,13 @@ type TaintFilter struct { Alias string } -func (f *TaintFilter) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { +func (f *TaintFilter) Init(ctx context.Context, client client.Client, step v1alpha1.FilterSpec) error { return nil } -func (TaintFilter) Run(traceLog *slog.Logger, request pods.PodPipelineRequest) (*lib.StepResult, error) { +func (TaintFilter) Run(traceLog *slog.Logger, request pods.PodPipelineRequest) (*lib.FilterWeigherPipelineStepResult, error) { activations := make(map[string]float64) - stats := make(map[string]lib.StepStatistics) + stats := make(map[string]lib.FilterWeigherPipelineStepStatistics) for _, node := range request.Nodes { if canScheduleOnNode(node, request.Pod) { @@ -32,7 +32,7 @@ func (TaintFilter) Run(traceLog *slog.Logger, request pods.PodPipelineRequest) ( } } - return &lib.StepResult{Activations: activations, Statistics: stats}, nil + return &lib.FilterWeigherPipelineStepResult{Activations: activations, Statistics: stats}, nil } func canScheduleOnNode(node corev1.Node, pod corev1.Pod) bool { diff --git a/internal/scheduling/decisions/pods/plugins/filters/filter_taint_test.go b/internal/scheduling/pods/plugins/filters/filter_taint_test.go similarity index 94% rename from internal/scheduling/decisions/pods/plugins/filters/filter_taint_test.go rename to internal/scheduling/pods/plugins/filters/filter_taint_test.go index 605b96114..97d5b2323 100644 --- a/internal/scheduling/decisions/pods/plugins/filters/filter_taint_test.go +++ b/internal/scheduling/pods/plugins/filters/filter_taint_test.go @@ -8,10 +8,30 @@ import ( "testing" "github.com/cobaltcore-dev/cortex/api/delegation/pods" + "github.com/cobaltcore-dev/cortex/api/v1alpha1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" ) +func TestTaintFilter_Init(t *testing.T) { + filter := &TaintFilter{} + scheme := runtime.NewScheme() + cl := fake.NewClientBuilder().WithScheme(scheme).Build() + + err := filter.Init(t.Context(), cl, v1alpha1.FilterSpec{ + Name: "taint", + Params: runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }) + + if err != nil { + t.Errorf("expected no error, got %v", err) + } +} + func TestTaintFilter_Run(t *testing.T) { tests := []struct { name string diff --git a/internal/scheduling/decisions/pods/plugins/weighers/binpack.go b/internal/scheduling/pods/plugins/weighers/binpack.go similarity index 85% rename from internal/scheduling/decisions/pods/plugins/weighers/binpack.go rename to internal/scheduling/pods/plugins/weighers/binpack.go index 80ae3b7d8..62a345694 100644 --- a/internal/scheduling/decisions/pods/plugins/weighers/binpack.go +++ b/internal/scheduling/pods/plugins/weighers/binpack.go @@ -9,8 +9,8 @@ import ( "math" api "github.com/cobaltcore-dev/cortex/api/delegation/pods" - "github.com/cobaltcore-dev/cortex/internal/scheduling/decisions/pods/helpers" - scheduling "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/pods/helpers" corev1 "k8s.io/api/core/v1" ) @@ -28,11 +28,11 @@ func (o BinpackingStepOpts) Validate() error { } type BinpackingStep struct { - scheduling.BaseStep[api.PodPipelineRequest, BinpackingStepOpts] + lib.BaseWeigher[api.PodPipelineRequest, BinpackingStepOpts] } -func (s *BinpackingStep) Run(traceLog *slog.Logger, request api.PodPipelineRequest) (*scheduling.StepResult, error) { - result := s.PrepareResult(request) +func (s *BinpackingStep) Run(traceLog *slog.Logger, request api.PodPipelineRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) podResources := helpers.GetPodResourceRequests(request.Pod) diff --git a/internal/scheduling/decisions/pods/plugins/weighers/binpack_test.go b/internal/scheduling/pods/plugins/weighers/binpack_test.go similarity index 82% rename from internal/scheduling/decisions/pods/plugins/weighers/binpack_test.go rename to internal/scheduling/pods/plugins/weighers/binpack_test.go index 7f82be8c7..7d7eea9b5 100644 --- a/internal/scheduling/decisions/pods/plugins/weighers/binpack_test.go +++ b/internal/scheduling/pods/plugins/weighers/binpack_test.go @@ -14,6 +14,64 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +func TestBinpackingStepOpts_Validate(t *testing.T) { + tests := []struct { + name string + opts BinpackingStepOpts + expectError bool + }{ + { + name: "valid options with positive weights", + opts: BinpackingStepOpts{ + ResourceWeights: map[corev1.ResourceName]float64{ + corev1.ResourceCPU: 2.0, + corev1.ResourceMemory: 1.0, + }, + }, + expectError: false, + }, + { + name: "valid options with zero weights", + opts: BinpackingStepOpts{ + ResourceWeights: map[corev1.ResourceName]float64{ + corev1.ResourceCPU: 0.0, + corev1.ResourceMemory: 0.0, + }, + }, + expectError: false, + }, + { + name: "valid options with empty weights", + opts: BinpackingStepOpts{ + ResourceWeights: map[corev1.ResourceName]float64{}, + }, + expectError: false, + }, + { + name: "invalid options with negative weight", + opts: BinpackingStepOpts{ + ResourceWeights: map[corev1.ResourceName]float64{ + corev1.ResourceCPU: -1.0, + corev1.ResourceMemory: 1.0, + }, + }, + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.opts.Validate() + if tt.expectError && err == nil { + t.Error("expected error, got nil") + } + if !tt.expectError && err != nil { + t.Errorf("expected no error, got %v", err) + } + }) + } +} + func TestBinpackingStep_Run(t *testing.T) { tests := []struct { name string diff --git a/internal/scheduling/pods/supported_filters.go b/internal/scheduling/pods/supported_filters.go new file mode 100644 index 000000000..4c39652d3 --- /dev/null +++ b/internal/scheduling/pods/supported_filters.go @@ -0,0 +1,20 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package pods + +import ( + "github.com/cobaltcore-dev/cortex/api/delegation/pods" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/pods/plugins/filters" +) + +type PodFilter = lib.Filter[pods.PodPipelineRequest] + +// Configuration of filters supported by the pods scheduler. +var supportedFilters = map[string]func() PodFilter{ + "noop": func() PodFilter { return &filters.NoopFilter{} }, + "taint": func() PodFilter { return &filters.TaintFilter{} }, + "nodeaffinity": func() PodFilter { return &filters.NodeAffinityFilter{} }, + "nodecapacity": func() PodFilter { return &filters.NodeCapacityFilter{} }, +} diff --git a/internal/scheduling/pods/supported_weighers.go b/internal/scheduling/pods/supported_weighers.go new file mode 100644 index 000000000..7fae5b073 --- /dev/null +++ b/internal/scheduling/pods/supported_weighers.go @@ -0,0 +1,17 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package pods + +import ( + "github.com/cobaltcore-dev/cortex/api/delegation/pods" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "github.com/cobaltcore-dev/cortex/internal/scheduling/pods/plugins/weighers" +) + +type PodWeigher = lib.Weigher[pods.PodPipelineRequest] + +// Configuration of weighers supported by the pods scheduler. +var supportedWeighers = map[string]func() PodWeigher{ + "binpack": func() PodWeigher { return &weighers.BinpackingStep{} }, +} diff --git a/tools/plutono/provisioning/dashboards/cortex-status.json b/tools/plutono/provisioning/dashboards/cortex-status.json index fee633efc..efb4b4588 100644 --- a/tools/plutono/provisioning/dashboards/cortex-status.json +++ b/tools/plutono/provisioning/dashboards/cortex-status.json @@ -571,7 +571,7 @@ "targets": [ { "exemplar": false, - "expr": "sum(delta(cortex_scheduler_pipeline_step_shift_origin_bucket{outidx=\"0\",pipeline=~\"nova-external-scheduler-.*\"}[2m]) / 2) by (le)", + "expr": "sum(delta(cortex_filter_weigher_pipeline_step_shift_origin_bucket{outidx=\"0\",pipeline=~\"nova-external-scheduler-.*\"}[2m]) / 2) by (le)", "format": "heatmap", "instant": false, "interval": "", @@ -647,7 +647,7 @@ "targets": [ { "exemplar": false, - "expr": "sum(delta(cortex_scheduler_pipeline_step_shift_origin_bucket{outidx=\"0\",pipeline=\"manila-external-scheduler\"}[2m]) / 2) by (le)", + "expr": "sum(delta(cortex_filter_weigher_pipeline_step_shift_origin_bucket{outidx=\"0\",pipeline=\"manila-external-scheduler\"}[2m]) / 2) by (le)", "format": "heatmap", "instant": false, "interval": "", @@ -733,7 +733,7 @@ "targets": [ { "exemplar": true, - "expr": "sum by (pipeline, subject, step, alias) (delta(cortex_scheduler_pipeline_step_weight_modification[2m]))", + "expr": "sum by (pipeline, subject, step, alias) (delta(cortex_filter_weigher_pipeline_step_weight_modification[2m]))", "format": "time_series", "instant": false, "interval": "", @@ -1265,7 +1265,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(cortex_scheduler_pipeline_step_run_duration_seconds_bucket[2m])) by (le, step, alias, pipeline))", + "expr": "histogram_quantile(0.95, sum(rate(cortex_filter_weigher_pipeline_step_run_duration_seconds_bucket[2m])) by (le, step, alias, pipeline))", "interval": "", "legendFormat": "{{pipeline}} {{step}}", "refId": "A" @@ -1372,7 +1372,7 @@ "targets": [ { "exemplar": true, - "expr": "sum by(pipeline) (rate(cortex_scheduler_pipeline_requests_total{}[2m]))", + "expr": "sum by(pipeline) (rate(cortex_filter_weigher_pipeline_requests_total{}[2m]))", "interval": "", "legendFormat": "Pipeline: {{pipeline}}", "refId": "A" @@ -2222,7 +2222,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(cortex_descheduler_pipeline_step_run_duration_seconds_bucket[2m])) by (le, step))", + "expr": "histogram_quantile(0.95, sum(rate(cortex_detector_pipeline_step_run_duration_seconds_bucket[2m])) by (le, step))", "interval": "", "legendFormat": "{{step}}", "refId": "A"