diff --git a/api/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml b/api/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml index b5f3c3a4e1..a655cf08ce 100644 --- a/api/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml +++ b/api/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml @@ -96,6 +96,12 @@ spec: description: Time before the deployment is requeued in seconds minimum: 1 type: integer + fallbackToListOrder: + default: true + description: |- + FallbackToListOrder preserves service list order for services + without explicit dependsOn. Defaults to true. + type: boolean nodeSets: description: NodeSets is the list of NodeSets deployed items: diff --git a/api/bases/dataplane.openstack.org_openstackdataplaneservices.yaml b/api/bases/dataplane.openstack.org_openstackdataplaneservices.yaml index 917bae442f..66704d0171 100644 --- a/api/bases/dataplane.openstack.org_openstackdataplaneservices.yaml +++ b/api/bases/dataplane.openstack.org_openstackdataplaneservices.yaml @@ -116,6 +116,14 @@ spec: x-kubernetes-map-type: atomic type: object type: array + dependsOn: + description: |- + DependsOn is a list of EDPMServiceType or CR name values that must + complete before this service runs. If empty, the service depends on + the preceding entry in the NodeSet service list. + items: + type: string + type: array deployOnAllNodeSets: description: |- DeployOnAllNodeSets - should the service be deploy across all nodesets diff --git a/api/dataplane/v1beta1/openstackdataplanedeployment_types.go b/api/dataplane/v1beta1/openstackdataplanedeployment_types.go index 9c87d7e2fa..6e3eda579a 100644 --- a/api/dataplane/v1beta1/openstackdataplanedeployment_types.go +++ b/api/dataplane/v1beta1/openstackdataplanedeployment_types.go @@ -78,6 +78,13 @@ type OpenStackDataPlaneDeploymentSpec struct { // variables to inject into the Ansible Execution Environment pod. // If not specified, defaults to "openstack-aee-default-env". AnsibleEEEnvConfigMapName string `json:"ansibleEEEnvConfigMapName,omitempty"` + + // FallbackToListOrder preserves service list order for services + // without explicit dependsOn. Defaults to true. + // +kubebuilder:validation:Optional + // +kubebuilder:default=true + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors={"urn:alm:descriptor:com.tectonic.ui:booleanSwitch"} + FallbackToListOrder *bool `json:"fallbackToListOrder"` } // AnsibleExecutionSummary captures the final ansible-runner execution result diff --git a/api/dataplane/v1beta1/openstackdataplaneservice_types.go b/api/dataplane/v1beta1/openstackdataplaneservice_types.go index d613f4fab6..1ae3b9d242 100644 --- a/api/dataplane/v1beta1/openstackdataplaneservice_types.go +++ b/api/dataplane/v1beta1/openstackdataplaneservice_types.go @@ -117,6 +117,12 @@ type OpenStackDataPlaneServiceSpec struct { // to manage the service. If not set, will default to the // OpenStackDataPlaneService name. EDPMServiceType string `json:"edpmServiceType,omitempty" yaml:"edpmServiceType,omitempty"` + + // DependsOn is a list of EDPMServiceType or CR name values that must + // complete before this service runs. If empty, the service depends on + // the preceding entry in the NodeSet service list. + // +kubebuilder:validation:Optional + DependsOn []string `json:"dependsOn,omitempty" yaml:"dependsOn,omitempty"` } // OpenStackDataPlaneServiceStatus defines the observed state of OpenStackDataPlaneService diff --git a/api/dataplane/v1beta1/zz_generated.deepcopy.go b/api/dataplane/v1beta1/zz_generated.deepcopy.go index 476cd1ce1f..b83ec685ef 100644 --- a/api/dataplane/v1beta1/zz_generated.deepcopy.go +++ b/api/dataplane/v1beta1/zz_generated.deepcopy.go @@ -418,6 +418,11 @@ func (in *OpenStackDataPlaneDeploymentSpec) DeepCopyInto(out *OpenStackDataPlane (*out)[key] = val } } + if in.FallbackToListOrder != nil { + in, out := &in.FallbackToListOrder, &out.FallbackToListOrder + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OpenStackDataPlaneDeploymentSpec. @@ -829,6 +834,11 @@ func (in *OpenStackDataPlaneServiceSpec) DeepCopyInto(out *OpenStackDataPlaneSer *out = make([]string, len(*in)) copy(*out, *in) } + if in.DependsOn != nil { + in, out := &in.DependsOn, &out.DependsOn + *out = make([]string, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OpenStackDataPlaneServiceSpec. diff --git a/bindata/crds/crds.yaml b/bindata/crds/crds.yaml index b532c4a914..eda60f938d 100644 --- a/bindata/crds/crds.yaml +++ b/bindata/crds/crds.yaml @@ -19035,6 +19035,12 @@ spec: description: Time before the deployment is requeued in seconds minimum: 1 type: integer + fallbackToListOrder: + default: true + description: |- + FallbackToListOrder preserves service list order for services + without explicit dependsOn. Defaults to true. + type: boolean nodeSets: description: NodeSets is the list of NodeSets deployed items: @@ -21399,6 +21405,14 @@ spec: x-kubernetes-map-type: atomic type: object type: array + dependsOn: + description: |- + DependsOn is a list of EDPMServiceType or CR name values that must + complete before this service runs. If empty, the service depends on + the preceding entry in the NodeSet service list. + items: + type: string + type: array deployOnAllNodeSets: description: |- DeployOnAllNodeSets - should the service be deploy across all nodesets diff --git a/config/crd/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml b/config/crd/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml index b5f3c3a4e1..a655cf08ce 100644 --- a/config/crd/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml +++ b/config/crd/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml @@ -96,6 +96,12 @@ spec: description: Time before the deployment is requeued in seconds minimum: 1 type: integer + fallbackToListOrder: + default: true + description: |- + FallbackToListOrder preserves service list order for services + without explicit dependsOn. Defaults to true. + type: boolean nodeSets: description: NodeSets is the list of NodeSets deployed items: diff --git a/config/crd/bases/dataplane.openstack.org_openstackdataplaneservices.yaml b/config/crd/bases/dataplane.openstack.org_openstackdataplaneservices.yaml index 917bae442f..66704d0171 100644 --- a/config/crd/bases/dataplane.openstack.org_openstackdataplaneservices.yaml +++ b/config/crd/bases/dataplane.openstack.org_openstackdataplaneservices.yaml @@ -116,6 +116,14 @@ spec: x-kubernetes-map-type: atomic type: object type: array + dependsOn: + description: |- + DependsOn is a list of EDPMServiceType or CR name values that must + complete before this service runs. If empty, the service depends on + the preceding entry in the NodeSet service list. + items: + type: string + type: array deployOnAllNodeSets: description: |- DeployOnAllNodeSets - should the service be deploy across all nodesets diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_bootstrap.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_bootstrap.yaml index d9cec33bb0..022eea874c 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_bootstrap.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_bootstrap.yaml @@ -6,3 +6,6 @@ spec: playbook: osp.edpm.bootstrap edpmServiceType: bootstrap caCerts: combined-ca-bundle + dependsOn: + - redhat + - download-cache diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_configure_network.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_configure_network.yaml index 11bfe1e86a..1811a2ac54 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_configure_network.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_configure_network.yaml @@ -5,3 +5,5 @@ metadata: spec: playbook: osp.edpm.configure_network edpmServiceType: configure-network + dependsOn: + - bootstrap diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_configure_os.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_configure_os.yaml index 6e05c430c9..df8f96e471 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_configure_os.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_configure_os.yaml @@ -5,3 +5,5 @@ metadata: spec: playbook: osp.edpm.configure_os edpmServiceType: configure-os + dependsOn: + - install-os diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_download_cache.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_download_cache.yaml index 14c0bb630e..5fd7193c5b 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_download_cache.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_download_cache.yaml @@ -5,3 +5,5 @@ metadata: spec: playbook: osp.edpm.download_cache edpmServiceType: download-cache + dependsOn: + - redhat diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_frr.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_frr.yaml index c6ac1a5031..f094145303 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_frr.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_frr.yaml @@ -4,6 +4,9 @@ metadata: name: frr spec: playbook: osp.edpm.frr + dependsOn: + - run-os + - validate-network containerImageFields: - EdpmFrrImage edpmServiceType: frr diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_install_certs.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_install_certs.yaml index e9efab000d..7481170ffb 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_install_certs.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_install_certs.yaml @@ -6,3 +6,5 @@ spec: playbook: osp.edpm.install_certs addCertMounts: True edpmServiceType: install-certs + dependsOn: + - bootstrap diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_install_os.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_install_os.yaml index e7f92632fc..63dbe2347e 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_install_os.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_install_os.yaml @@ -5,3 +5,5 @@ metadata: spec: playbook: osp.edpm.install_os edpmServiceType: install-os + dependsOn: + - bootstrap diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_libvirt.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_libvirt.yaml index 1878e0e088..1716b492f2 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_libvirt.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_libvirt.yaml @@ -4,6 +4,10 @@ metadata: name: libvirt spec: playbook: osp.edpm.libvirt + dependsOn: + - run-os + - validate-network + - install-certs dataSources: # NOTE: this Secret needs to be created before deploying the data plane. # It should contain the libvirt sasl auth password using the key LibvirtPassword diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_logging.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_logging.yaml index 9f6f23aac7..50a8bb66d5 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_logging.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_logging.yaml @@ -3,6 +3,9 @@ kind: OpenStackDataPlaneService metadata: name: logging spec: + dependsOn: + - run-os + - validate-network dataSources: - secretRef: name: logging-compute-config-data diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_neutron_dhcp.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_neutron_dhcp.yaml index 9c4b1d0091..5170b3cdc2 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_neutron_dhcp.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_neutron_dhcp.yaml @@ -4,6 +4,9 @@ metadata: name: neutron-dhcp spec: playbook: osp.edpm.neutron_dhcp + dependsOn: + - run-os + - validate-network dataSources: - secretRef: name: neutron-dhcp-agent-neutron-config diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_neutron_metadata.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_neutron_metadata.yaml index 8a2aee102c..f90862dd6a 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_neutron_metadata.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_neutron_metadata.yaml @@ -4,6 +4,8 @@ metadata: name: neutron-metadata spec: playbook: osp.edpm.neutron_metadata + dependsOn: + - ovn dataSources: - secretRef: name: neutron-ovn-metadata-agent-neutron-config diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_neutron_ovn.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_neutron_ovn.yaml index 5b570a34bb..305dd3d345 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_neutron_ovn.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_neutron_ovn.yaml @@ -4,6 +4,8 @@ metadata: name: neutron-ovn spec: playbook: osp.edpm.neutron_ovn + dependsOn: + - ovn dataSources: - secretRef: name: neutron-ovn-agent-neutron-config diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_neutron_sriov.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_neutron_sriov.yaml index af37cc7995..2518a371eb 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_neutron_sriov.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_neutron_sriov.yaml @@ -4,6 +4,9 @@ metadata: name: neutron-sriov spec: playbook: osp.edpm.neutron_sriov + dependsOn: + - run-os + - validate-network dataSources: - secretRef: name: neutron-sriov-agent-neutron-config diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_nova.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_nova.yaml index cbb029fa40..06794c380d 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_nova.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_nova.yaml @@ -3,6 +3,13 @@ kind: OpenStackDataPlaneService metadata: name: nova spec: + dependsOn: + - libvirt + - ssh-known-hosts + - neutron-ovn + - neutron-sriov + - neutron-dhcp + - neutron-metadata dataSources: - secretRef: name: nova-cell1-compute-config diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_os_reboot.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_os_reboot.yaml index 37abb6c903..35990d6c0d 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_os_reboot.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_os_reboot.yaml @@ -5,3 +5,5 @@ metadata: spec: playbook: osp.edpm.reboot edpmServiceType: reboot-os + dependsOn: + - run-os diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_ovn.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_ovn.yaml index 7dbad97a9c..5bc83ed965 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_ovn.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_ovn.yaml @@ -4,6 +4,10 @@ metadata: name: ovn spec: playbook: osp.edpm.ovn + dependsOn: + - run-os + - validate-network + - install-certs dataSources: - configMapRef: name: ovncontroller-config diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_ovn_bgp_agent.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_ovn_bgp_agent.yaml index 30af41db37..32d5026121 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_ovn_bgp_agent.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_ovn_bgp_agent.yaml @@ -4,6 +4,8 @@ metadata: name: ovn-bgp-agent spec: playbook: osp.edpm.ovn_bgp_agent + dependsOn: + - ovn dataSources: - secretRef: name: neutron-ovn-agent-neutron-config diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_run_os.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_run_os.yaml index 26698cd2c0..069cbcc1e4 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_run_os.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_run_os.yaml @@ -8,3 +8,5 @@ spec: - EdpmLogrotateCrondImage - EdpmIscsidImage edpmServiceType: run-os + dependsOn: + - configure-os diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_ssh_known_hosts.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_ssh_known_hosts.yaml index 1b38aac681..157dba1632 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_ssh_known_hosts.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_ssh_known_hosts.yaml @@ -6,3 +6,6 @@ spec: playbook: osp.edpm.ssh_known_hosts deployOnAllNodeSets: true edpmServiceType: ssh-known-hosts + dependsOn: + - install-os + - configure-network diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_swift.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_swift.yaml index 31a0bb1487..492b7c0857 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_swift.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_swift.yaml @@ -4,6 +4,9 @@ metadata: name: swift spec: playbook: osp.edpm.swift + dependsOn: + - run-os + - validate-network dataSources: - secretRef: name: swift-conf diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_telemetry.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_telemetry.yaml index 9798a0630a..f061b056da 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_telemetry.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_telemetry.yaml @@ -3,6 +3,8 @@ kind: OpenStackDataPlaneService metadata: name: telemetry spec: + dependsOn: + - nova dataSources: - secretRef: name: ceilometer-compute-config-data diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_telemetry_power_monitoring.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_telemetry_power_monitoring.yaml index 1a0a886dfb..fc6afbfe85 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_telemetry_power_monitoring.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_telemetry_power_monitoring.yaml @@ -3,6 +3,8 @@ kind: OpenStackDataPlaneService metadata: name: telemetry-power-monitoring spec: + dependsOn: + - nova dataSources: - secretRef: name: ceilometer-ipmi-config-data diff --git a/config/services/dataplane_v1beta1_openstackdataplaneservice_validate_network.yaml b/config/services/dataplane_v1beta1_openstackdataplaneservice_validate_network.yaml index ef8c48cb6e..1870d1c84b 100644 --- a/config/services/dataplane_v1beta1_openstackdataplaneservice_validate_network.yaml +++ b/config/services/dataplane_v1beta1_openstackdataplaneservice_validate_network.yaml @@ -5,3 +5,5 @@ metadata: spec: playbook: osp.edpm.validate_network edpmServiceType: validate-network + dependsOn: + - configure-network diff --git a/internal/controller/dataplane/openstackdataplanedeployment_controller.go b/internal/controller/dataplane/openstackdataplanedeployment_controller.go index 3fad901114..0a9c4c9cdb 100644 --- a/internal/controller/dataplane/openstackdataplanedeployment_controller.go +++ b/internal/controller/dataplane/openstackdataplanedeployment_controller.go @@ -19,16 +19,19 @@ package dataplane import ( "context" + "errors" "fmt" "time" "github.com/go-playground/validator/v10" + "github.com/iancoleman/strcase" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" k8s_errors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes" + "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" @@ -146,7 +149,7 @@ func (r *OpenStackDataPlaneDeploymentReconciler) Reconcile(ctx context.Context, defer func() { // update the Ready condition based on the sub conditions // Don't update the status, if reconciler Panics if r := recover(); r != nil { - Log.Info(fmt.Sprintf("panic during reconcile %v\n", r)) + Log.Info("panic during reconcile", "panic", r) panic(r) } if instance.Status.Conditions.AllSubConditionIsTrue() { @@ -250,12 +253,45 @@ func (r *OpenStackDataPlaneDeploymentReconciler) Reconcile(ctx context.Context, shouldRequeue := false haveError := false deploymentErrMsg := "" - var nodesetServiceMap map[string][]string + var nodesetServiceMap map[string][][]string + var serviceCache *deployment.ServiceCache backoffLimitReached := false - if nodesetServiceMap, err = deployment.DedupeServices(ctx, helper, nodeSets.Items, - instance.Spec.ServicesOverride); err != nil { + fallbackToListOrder := ptr.Deref(instance.Spec.FallbackToListOrder, true) + + if nodesetServiceMap, serviceCache, err = deployment.DedupeServices(ctx, helper, nodeSets.Items, + instance.Spec.ServicesOverride, fallbackToListOrder); err != nil { util.LogErrorForObject(helper, err, "OpenStackDeployment error for deployment", instance) + var missingServiceErr *deployment.MissingServiceError + if errors.As(err, &missingServiceErr) { + nsConditions := instance.Status.NodeSetConditions[missingServiceErr.NodeSet] + nodeSetServiceErrorMessage := fmt.Sprintf( + dataplanev1.NodeSetServiceDeploymentErrorMessage, + missingServiceErr.Service) + " error %s" + nsConditions.MarkFalse( + dataplanev1.NodeSetDeploymentReadyCondition, + condition.ErrorReason, + condition.SeverityError, + nodeSetServiceErrorMessage, + err.Error()) + serviceCondition := condition.Type(fmt.Sprintf( + "Service%sDeploymentReady", + strcase.ToCamel(missingServiceErr.Service))) + nsConditions.MarkFalse( + serviceCondition, + condition.ErrorReason, + condition.SeverityError, + nodeSetServiceErrorMessage, + err.Error()) + instance.Status.NodeSetConditions[missingServiceErr.NodeSet] = nsConditions + instance.Status.Conditions.MarkFalse( + condition.DeploymentReadyCondition, + condition.ErrorReason, + condition.SeverityWarning, + condition.DeploymentReadyErrorMessage, + fmt.Sprintf("nodeSet: %s error: %s", missingServiceErr.NodeSet, err.Error())) + return ctrl.Result{}, err + } instance.Status.Conditions.MarkFalse( condition.DeploymentReadyCondition, condition.ErrorReason, @@ -276,7 +312,7 @@ func (r *OpenStackDataPlaneDeploymentReconciler) Reconcile(ctx context.Context, // for the first started NodeSet to finish before starting the next. for _, nodeSet := range nodeSets.Items { - Log.Info(fmt.Sprintf("Deploying NodeSet: %s", nodeSet.Name)) + Log.Info("Deploying NodeSet", "nodeSet", nodeSet.Name) Log.Info("Set Status.Deployed to false", "instance", instance) instance.Status.Deployed = false Log.Info("Set DeploymentReadyCondition false") @@ -306,14 +342,13 @@ func (r *OpenStackDataPlaneDeploymentReconciler) Reconcile(ctx context.Context, AeeSpec: &ansibleEESpec, InventorySecrets: globalInventorySecrets, AnsibleSSHPrivateKeySecrets: globalSSHKeySecrets, + ServiceCache: serviceCache, Version: version, } - // When ServicesOverride is set on the OpenStackDataPlaneDeployment, - // deploy those services for each OpenStackDataPlaneNodeSet. Otherwise, - // deploy with the OpenStackDataPlaneNodeSet's Services. var deployResult *ctrl.Result - deployResult, err = deployer.Deploy(nodesetServiceMap[nodeSet.Name]) + deployResult, err = deployer.Deploy( + nodesetServiceMap[nodeSet.Name]) nsConditions := instance.Status.NodeSetConditions[nodeSet.Name] nsConditions.Set(nsConditions.Mirror(dataplanev1.NodeSetDeploymentReadyCondition)) @@ -494,7 +529,7 @@ func (r *OpenStackDataPlaneDeploymentReconciler) SetupWithManager(mgr ctrl.Manag Namespace: dep.GetNamespace(), Name: dep.GetName(), } - Log.Info(fmt.Sprintf("Cert %s is used by deployment %s", obj.GetName(), dep.GetName())) + Log.Info("Cert is used by deployment", "cert", obj.GetName(), "deployment", dep.GetName()) result = append(result, reconcile.Request{NamespacedName: name}) } } diff --git a/internal/dataplane/depgraph.go b/internal/dataplane/depgraph.go new file mode 100644 index 0000000000..796c3e6df2 --- /dev/null +++ b/internal/dataplane/depgraph.go @@ -0,0 +1,205 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "fmt" + + "github.com/openstack-k8s-operators/lib-common/modules/common/helper" + dataplanev1 "github.com/openstack-k8s-operators/openstack-operator/api/dataplane/v1beta1" +) + +// BuildServiceLevels takes a flat list of service names, looks up each +// service's DependsOn field, and returns a list of execution levels. +// Services within the same level have all their dependencies satisfied by +// earlier levels and may run in parallel. +// +// When fallbackToListOrder is true, services without an explicit dependsOn +// inherit an implicit dependency on the preceding service in the list. +// When false, only explicit dependsOn entries affect the DAG. +// Dependencies referencing services not present in the current list are +// silently skipped. +func BuildServiceLevels(ctx context.Context, helper *helper.Helper, + serviceCache *ServiceCache, + services []string, + fallbackToListOrder bool, +) ([][]string, error) { + + if len(services) == 0 { + return nil, nil + } + + deps, err := loadDependencies(ctx, helper, serviceCache, services, fallbackToListOrder) + if err != nil { + return nil, err + } + + return topoLevelSort(services, deps) +} + +// loadDependencies fetches every service CR and collects its DependsOn +// list. Each service has an effective service type (EDPMServiceType, or +// CR name as fallback). The dependsOn entries are resolved against these +// service types; references to services not in the list are skipped. +func loadDependencies(ctx context.Context, helper *helper.Helper, + serviceCache *ServiceCache, + services []string, + fallbackToListOrder bool, +) (map[string][]string, error) { + + // Build effective serviceType <-> CR name lookups from the service list. + typeToName := make(map[string]string, len(services)) + nameToType := make(map[string]string, len(services)) + serviceObjects := make(map[string]dataplanev1.OpenStackDataPlaneService, len(services)) + + for _, svc := range services { + service, err := serviceCache.Get(ctx, helper, svc) + if err != nil { + return nil, err + } + serviceObjects[svc] = service + serviceType := service.Spec.EDPMServiceType + typeToName[serviceType] = svc + nameToType[svc] = serviceType + } + + deps := make(map[string][]string, len(services)) + + // With fallback enabled, services without dependsOn follow predecessor + // order. Otherwise only explicit dependsOn entries participate in the DAG. + for i, svc := range services { + service := serviceObjects[svc] + seen := make(map[string]struct{}) + var resolved []string + addDep := func(dep string) { + if _, ok := seen[dep]; ok { + return + } + seen[dep] = struct{}{} + resolved = append(resolved, dep) + } + + if len(service.Spec.DependsOn) > 0 { + for _, dep := range service.Spec.DependsOn { + target, err := resolveDependency(dep, svc, nameToType[svc], typeToName, nameToType) + if err != nil { + return nil, err + } + if target != "" { + addDep(target) + } + } + } else if fallbackToListOrder && i > 0 { + addDep(services[i-1]) + } + + deps[svc] = resolved + } + + return deps, nil +} + +// resolveDependency resolves a single dependsOn entry (service type or +// CR name) to a CR name in the current list. Dependencies referencing +// services not in the list are silently skipped (returns empty string). +func resolveDependency( + dep string, + sourceSvc string, + sourceType string, + typeToName map[string]string, + nameToType map[string]string, +) (string, error) { + if _, ok := typeToName[dep]; !ok { + if resolvedType, ok := nameToType[dep]; ok { + dep = resolvedType + } + } + + if dep == sourceType { + return "", fmt.Errorf("service %q has a self-dependency", sourceSvc) + } + + if target, ok := typeToName[dep]; ok { + if target == sourceSvc { + return "", fmt.Errorf("service %q has a self-dependency", sourceSvc) + } + return target, nil + } + + return "", nil +} + +// topoLevelSort performs a Kahn-style topological sort that groups nodes +// into parallel execution levels. Returns an error on cycles. +func topoLevelSort(services []string, deps map[string][]string) ([][]string, error) { + inDegree := make(map[string]int, len(services)) + dependents := make(map[string][]string, len(services)) + + for _, svc := range services { + if _, ok := inDegree[svc]; !ok { + inDegree[svc] = 0 + } + for _, dep := range deps[svc] { + inDegree[svc]++ + dependents[dep] = append(dependents[dep], svc) + } + } + + // Seed level 0 with all services that have no dependencies, in original + // list order for determinism. + var current []string + for _, svc := range services { + if inDegree[svc] == 0 { + current = append(current, svc) + } + } + + var levels [][]string + visited := 0 + + for len(current) > 0 { + levels = append(levels, current) + visited += len(current) + + // Build the next level: services whose in-degree drops to 0. + // Collect into a set first, then emit in original list order. + ready := make(map[string]struct{}) + for _, svc := range current { + for _, dep := range dependents[svc] { + inDegree[dep]-- + if inDegree[dep] == 0 { + ready[dep] = struct{}{} + } + } + } + + var next []string + for _, svc := range services { + if _, ok := ready[svc]; ok { + next = append(next, svc) + } + } + current = next + } + + if visited != len(services) { + return nil, fmt.Errorf("circular dependency detected among services") + } + + return levels, nil +} diff --git a/internal/dataplane/depgraph_test.go b/internal/dataplane/depgraph_test.go new file mode 100644 index 0000000000..2c237dc26e --- /dev/null +++ b/internal/dataplane/depgraph_test.go @@ -0,0 +1,168 @@ +package deployment + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTopoLevelSort(t *testing.T) { + tests := []struct { + name string + services []string + deps map[string][]string + want [][]string + wantErr string + }{ + { + name: "no deps - single level", + services: []string{"a", "b", "c"}, + deps: map[string][]string{"a": nil, "b": nil, "c": nil}, + want: [][]string{{"a", "b", "c"}}, + }, + { + name: "linear chain", + services: []string{"a", "b", "c"}, + deps: map[string][]string{"a": nil, "b": {"a"}, "c": {"b"}}, + want: [][]string{{"a"}, {"b"}, {"c"}}, + }, + { + name: "diamond", + services: []string{"a", "b", "c", "d"}, + deps: map[string][]string{"a": nil, "b": {"a"}, "c": {"a"}, "d": {"b", "c"}}, + want: [][]string{{"a"}, {"b", "c"}, {"d"}}, + }, + { + name: "multiple roots", + services: []string{"x", "y", "z", "w"}, + deps: map[string][]string{"x": nil, "y": nil, "z": {"x"}, "w": {"y"}}, + want: [][]string{{"x", "y"}, {"z", "w"}}, + }, + { + name: "single service", + services: []string{"only"}, + deps: map[string][]string{"only": nil}, + want: [][]string{{"only"}}, + }, + { + name: "preserves order within level", + services: []string{"c", "b", "a", "d"}, + deps: map[string][]string{"c": nil, "b": nil, "a": nil, "d": {"c", "b", "a"}}, + want: [][]string{{"c", "b", "a"}, {"d"}}, + }, + { + name: "complex DAG", + services: []string{"bootstrap", "configure-network", "install-os", "ovn", "libvirt", "nova"}, + deps: map[string][]string{ + "bootstrap": nil, "configure-network": {"bootstrap"}, "install-os": {"bootstrap"}, + "ovn": {"configure-network"}, "libvirt": {"install-os"}, "nova": {"ovn", "libvirt"}, + }, + want: [][]string{{"bootstrap"}, {"configure-network", "install-os"}, {"ovn", "libvirt"}, {"nova"}}, + }, + { + name: "cycle detected", + services: []string{"a", "b", "c"}, + deps: map[string][]string{"a": {"c"}, "b": {"a"}, "c": {"b"}}, + wantErr: "circular dependency", + }, + { + name: "two-node cycle", + services: []string{"a", "b"}, + deps: map[string][]string{"a": {"b"}, "b": {"a"}}, + wantErr: "circular dependency", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + levels, err := topoLevelSort(tt.services, tt.deps) + if tt.wantErr != "" { + require.Error(t, err) + assert.Contains(t, err.Error(), tt.wantErr) + return + } + require.NoError(t, err) + require.Len(t, levels, len(tt.want)) + for i := range tt.want { + assert.ElementsMatch(t, tt.want[i], levels[i], "level %d", i) + } + }) + } +} + +func TestResolveDependency(t *testing.T) { + typeToName := map[string]string{"ovn": "custom-ovn", "nova": "nova"} + nameToType := map[string]string{"custom-ovn": "ovn", "nova": "nova"} + + target, err := resolveDependency("ovn", "nova", "nova", typeToName, nameToType) + require.NoError(t, err) + assert.Equal(t, "custom-ovn", target, "should resolve by type") + + target, err = resolveDependency("custom-ovn", "nova", "nova", typeToName, nameToType) + require.NoError(t, err) + assert.Equal(t, "custom-ovn", target, "should resolve by CR name") + + target, err = resolveDependency("missing", "nova", "nova", typeToName, nameToType) + require.NoError(t, err) + assert.Equal(t, "", target, "missing dep should be skipped") + + _, err = resolveDependency("nova", "nova", "nova", typeToName, nameToType) + require.Error(t, err) + assert.Contains(t, err.Error(), "self-dependency") + + typeToName = map[string]string{"self-service": "self-service"} + nameToType = map[string]string{"self-service": "self-service"} + _, err = resolveDependency("self-service", "self-service", "", typeToName, nameToType) + require.Error(t, err) + assert.Contains(t, err.Error(), "self-dependency") + + typeToName = map[string]string{"ovn": "custom-ovn"} + nameToType = map[string]string{"custom-ovn": "ovn"} + _, err = resolveDependency("custom-ovn", "custom-ovn", "ovn", typeToName, nameToType) + require.Error(t, err) + assert.Contains(t, err.Error(), "self-dependency") +} + +func TestTopoLevelSortDependencySemantics(t *testing.T) { + tests := []struct { + name string + deps map[string][]string + svcs []string + want [][]string + }{ + { + name: "fallback order forms chain", + svcs: []string{"a", "b", "c"}, + deps: map[string][]string{"a": nil, "b": {"a"}, "c": {"b"}}, + want: [][]string{{"a"}, {"b"}, {"c"}}, + }, + { + name: "explicit deps only keep multiple roots together", + svcs: []string{"b", "c", "d", "e"}, + deps: map[string][]string{"b": nil, "c": nil, "d": {"c"}, "e": {"c"}}, + want: [][]string{{"b", "c"}, {"d", "e"}}, + }, + { + name: "explicit deps do not inherit predecessor order", + svcs: []string{"a", "b", "c"}, + deps: map[string][]string{"a": nil, "b": nil, "c": {"a"}}, + want: [][]string{{"a", "b"}, {"c"}}, + }, + { + name: "explicit deps can chain independently", + svcs: []string{"a", "b", "c", "d1"}, + deps: map[string][]string{"a": nil, "b": nil, "c": {"a"}, "d1": {"c"}}, + want: [][]string{{"a", "b"}, {"c"}, {"d1"}}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + levels, err := topoLevelSort(tt.svcs, tt.deps) + require.NoError(t, err) + require.Len(t, levels, len(tt.want)) + for i := range tt.want { + assert.ElementsMatch(t, tt.want[i], levels[i], "level %d", i) + } + }) + } +} diff --git a/internal/dataplane/deployment.go b/internal/dataplane/deployment.go index 6b2f476cf5..283dedf2d5 100644 --- a/internal/dataplane/deployment.go +++ b/internal/dataplane/deployment.go @@ -55,127 +55,112 @@ type Deployer struct { AeeSpec *dataplanev1.AnsibleEESpec InventorySecrets map[string]string AnsibleSSHPrivateKeySecrets map[string]string + ServiceCache *ServiceCache Version *openstackv1.OpenStackVersion } -// Deploy function encapsulating primary deloyment handling -func (d *Deployer) Deploy(services []string) (*ctrl.Result, error) { +// Deploy function encapsulating primary deployment handling. +// It accepts a leveled execution plan: each level is a group of services +// whose dependencies are all satisfied by earlier levels. +func (d *Deployer) Deploy(serviceLevels [][]string) (*ctrl.Result, error) { log := d.Helper.GetLogger() + if d.ServiceCache == nil { + d.ServiceCache = NewServiceCache() + } - var readyCondition condition.Type - var readyMessage string - var readyWaitingMessage string - var readyErrorMessage string - var deployName string - - // Save a copy of the original ExtraMounts so it can be reset after each - // service deployment - aeeSpecMounts := make([]storage.VolMounts, len(d.AeeSpec.ExtraMounts)) - copy(aeeSpecMounts, d.AeeSpec.ExtraMounts) - // Deploy the composable services - for _, service := range services { - deployName = service - readyCondition = condition.Type(fmt.Sprintf("Service%sDeploymentReady", strcase.ToCamel(service))) - readyWaitingMessage = fmt.Sprintf(dataplanev1.NodeSetServiceDeploymentReadyWaitingMessage, deployName) - readyMessage = fmt.Sprintf(dataplanev1.NodeSetServiceDeploymentReadyMessage, deployName) - readyErrorMessage = fmt.Sprintf(dataplanev1.NodeSetServiceDeploymentErrorMessage, deployName) + " error %s" - - nsConditions := d.Status.NodeSetConditions[d.NodeSet.Name] - log.Info("Deploying service", "service", service) - foundService, err := GetService(d.Ctx, d.Helper, service) - if err != nil { - nsConditions.Set(condition.FalseCondition( - readyCondition, - condition.ErrorReason, - condition.SeverityError, - readyErrorMessage, - err.Error())) - d.Status.NodeSetConditions[d.NodeSet.Name] = nsConditions - return &ctrl.Result{}, err - } + // Flatten levels into a single list for cert-mount lookups. + var allServices []string + for _, level := range serviceLevels { + allServices = append(allServices, level...) + } - containerImages := dataplaneutil.GetContainerImages(d.Version) - if containerImages.AnsibleeeImage != nil { - d.AeeSpec.OpenStackAnsibleEERunnerImage = *containerImages.AnsibleeeImage - } - if len(foundService.Spec.OpenStackAnsibleEERunnerImage) > 0 { - d.AeeSpec.OpenStackAnsibleEERunnerImage = foundService.Spec.OpenStackAnsibleEERunnerImage - } + for levelIdx, level := range serviceLevels { + log.Info("Deploying service level", "level", levelIdx, "services", level) + levelServices := make(map[string]dataplanev1.OpenStackDataPlaneService, len(level)) - // Reset ExtraMounts to its original value, and then add in service - // specific mounts. - d.AeeSpec.ExtraMounts = make([]storage.VolMounts, len(aeeSpecMounts)) - copy(d.AeeSpec.ExtraMounts, aeeSpecMounts) - d.AeeSpec, err = d.addServiceExtraMounts(foundService) - if err != nil { - nsConditions.Set(condition.FalseCondition( - readyCondition, - condition.ErrorReason, - condition.SeverityError, - readyErrorMessage, - err.Error())) - d.Status.NodeSetConditions[d.NodeSet.Name] = nsConditions - return &ctrl.Result{}, err - } + // Dispatch all services in this level + for _, service := range level { + deployName := service + readyCondition := condition.Type(fmt.Sprintf("Service%sDeploymentReady", strcase.ToCamel(service))) + readyWaitingMessage := fmt.Sprintf(dataplanev1.NodeSetServiceDeploymentReadyWaitingMessage, deployName) + readyMessage := fmt.Sprintf(dataplanev1.NodeSetServiceDeploymentReadyMessage, deployName) + readyErrorMessage := fmt.Sprintf(dataplanev1.NodeSetServiceDeploymentErrorMessage, deployName) + " error %s" - // Add certMounts - if foundService.Spec.AddCertMounts { - d.AeeSpec, err = d.addCertMounts(services) + log.Info("Deploying service", "service", service, "level", levelIdx) + foundService, err := d.ServiceCache.Get(d.Ctx, d.Helper, service) if err != nil { - nsConditions.Set(condition.FalseCondition( - readyCondition, - condition.ErrorReason, - condition.SeverityError, - readyErrorMessage, - err.Error())) - d.Status.NodeSetConditions[d.NodeSet.Name] = nsConditions + d.setServiceError(readyCondition, readyErrorMessage, err) return &ctrl.Result{}, err } - } else if len(foundService.Spec.CACerts) > 0 { - // In general, we use the install-certs service (which has AddCertMounts=true - // to copy the cacerts over. This may not be early enough for some services - // though. In particular, we want the bootstrap service to copy the cacerts - // to the default location on the compute node. - d.AeeSpec, err = d.addCACertMount(foundService) + levelServices[service] = foundService + + serviceAeeSpec := d.AeeSpec.DeepCopy() + containerImages := dataplaneutil.GetContainerImages(d.Version) + if containerImages.AnsibleeeImage != nil { + serviceAeeSpec.OpenStackAnsibleEERunnerImage = *containerImages.AnsibleeeImage + } + if len(foundService.Spec.OpenStackAnsibleEERunnerImage) > 0 { + serviceAeeSpec.OpenStackAnsibleEERunnerImage = foundService.Spec.OpenStackAnsibleEERunnerImage + } + + err = d.addServiceExtraMounts(serviceAeeSpec, foundService) if err != nil { - nsConditions.Set(condition.FalseCondition( - readyCondition, - condition.ErrorReason, - condition.SeverityError, - readyErrorMessage, - err.Error())) - d.Status.NodeSetConditions[d.NodeSet.Name] = nsConditions + d.setServiceError(readyCondition, readyErrorMessage, err) return &ctrl.Result{}, err } - } - err = d.ConditionalDeploy( - readyCondition, - readyMessage, - readyWaitingMessage, - readyErrorMessage, - deployName, - foundService, - ) - - nsConditions = d.Status.NodeSetConditions[d.NodeSet.Name] - if err != nil || !nsConditions.IsTrue(readyCondition) { - log.Info(fmt.Sprintf("Condition %s not ready", readyCondition)) - return &ctrl.Result{}, err - } + // Add certMounts + if foundService.Spec.AddCertMounts { + err = d.addCertMounts(serviceAeeSpec, allServices) + if err != nil { + d.setServiceError(readyCondition, readyErrorMessage, err) + return &ctrl.Result{}, err + } + } else if len(foundService.Spec.CACerts) > 0 { + err = d.addCACertMount(serviceAeeSpec, foundService) + if err != nil { + d.setServiceError(readyCondition, readyErrorMessage, err) + return &ctrl.Result{}, err + } + } - log.Info(fmt.Sprintf("Condition %s ready", readyCondition)) + err = d.ConditionalDeploy( + readyCondition, + readyMessage, + readyWaitingMessage, + readyErrorMessage, + deployName, + foundService, + serviceAeeSpec, + ) - // (TODO) Only considers the container image values from the Version - // for the time being. Can be expanded later to look at the actual - // values used from the inventory, etc. - if d.Version != nil { - vContainerImages := reflect.ValueOf(d.Version.Status.ContainerImages) - for _, cif := range foundService.Spec.ContainerImageFields { - d.Deployment.Status.ContainerImages[cif] = reflect.Indirect(vContainerImages.FieldByName(cif)).String() + if err != nil { + log.Info("Condition error in service level", "condition", readyCondition, "level", levelIdx) + return &ctrl.Result{}, err } } + // After dispatching all services in this level, check that every + // service in the level is ready before advancing to the next level. + for _, service := range level { + readyCondition := condition.Type(fmt.Sprintf("Service%sDeploymentReady", strcase.ToCamel(service))) + nsConditions := d.Status.NodeSetConditions[d.NodeSet.Name] + if !nsConditions.IsTrue(readyCondition) { + log.Info("Condition not ready in service level, waiting", "condition", readyCondition, "level", levelIdx) + return &ctrl.Result{}, nil + } + log.Info("Condition ready", "condition", readyCondition) + + foundService := levelServices[service] + // (TODO) Only considers the container image values from the Version + // for the time being. + if d.Version != nil { + vContainerImages := reflect.ValueOf(d.Version.Status.ContainerImages) + for _, cif := range foundService.Spec.ContainerImageFields { + d.Deployment.Status.ContainerImages[cif] = reflect.Indirect(vContainerImages.FieldByName(cif)).String() + } + } + } } return nil, nil @@ -190,15 +175,16 @@ func (d *Deployer) ConditionalDeploy( readyErrorMessage string, deployName string, foundService dataplanev1.OpenStackDataPlaneService, + aeeSpec *dataplanev1.AnsibleEESpec, ) error { var err error log := d.Helper.GetLogger() nsConditions := d.Status.NodeSetConditions[d.NodeSet.Name] if nsConditions.IsUnknown(readyCondition) { - log.Info(fmt.Sprintf("%s Unknown, starting %s", readyCondition, deployName)) + log.Info("Condition unknown, starting deploy", "condition", readyCondition, "service", deployName) err = d.DeployService( - foundService) + foundService, aeeSpec) if err != nil { util.LogErrorForObject(d.Helper, err, fmt.Sprintf("Unable to %s for %s", deployName, d.NodeSet.Name), d.NodeSet) return err @@ -219,10 +205,10 @@ func (d *Deployer) ConditionalDeploy( if err != nil { // Return nil if we don't have AnsibleEE available yet if k8s_errors.IsNotFound(err) { - log.Info(fmt.Sprintf("%s AnsibleEE job is not yet found", readyCondition)) + log.Info("AnsibleEE job not yet found", "condition", readyCondition) return nil } - log.Error(err, fmt.Sprintf("Error getting ansibleJob job for %s", deployName)) + log.Error(err, "Error getting ansibleJob job", "service", deployName) nsConditions.Set(condition.FalseCondition( readyCondition, condition.ErrorReason, @@ -232,7 +218,7 @@ func (d *Deployer) ConditionalDeploy( } if ansibleJob.Status.Succeeded > 0 { d.storeExecutionSummary(ansibleJob) - log.Info(fmt.Sprintf("Condition %s ready", readyCondition)) + log.Info("Condition ready", "condition", readyCondition) nsConditions.Set(condition.TrueCondition( readyCondition, "%s", readyMessage)) @@ -247,7 +233,7 @@ func (d *Deployer) ConditionalDeploy( errorMsg = fmt.Sprintf("backoff limit reached for execution.name %s execution.namespace %s execution.condition.message: %s", ansibleJob.Name, ansibleJob.Namespace, ansibleCondition.Message) } d.storeExecutionSummary(ansibleJob) - log.Info(fmt.Sprintf("Condition %s error", readyCondition)) + log.Info("Condition error", "condition", readyCondition) err = fmt.Errorf("%s", errorMsg) nsConditions.Set(condition.FalseCondition( readyCondition, @@ -256,7 +242,10 @@ func (d *Deployer) ConditionalDeploy( readyErrorMessage, err.Error())) } else { - log.Info(fmt.Sprintf("AnsibleEE job is not yet completed: Execution: %s, Active pods: %d, Failed pods: %d", ansibleJob.Name, ansibleJob.Status.Active, ansibleJob.Status.Failed)) + log.Info("AnsibleEE job not yet completed", + "execution", ansibleJob.Name, + "activePods", ansibleJob.Status.Active, + "failedPods", ansibleJob.Status.Failed) nsConditions.Set(condition.FalseCondition( readyCondition, condition.RequestedReason, @@ -287,25 +276,41 @@ func (d *Deployer) storeExecutionSummary(ansibleJob *batchv1.Job) { d.Status.AnsibleExecutionSummaries[ansibleJob.Name] = *summary } +func (d *Deployer) setServiceError( + readyCondition condition.Type, + readyErrorMessage string, + err error, +) { + nsConditions := d.Status.NodeSetConditions[d.NodeSet.Name] + nsConditions.Set(condition.FalseCondition( + readyCondition, + condition.ErrorReason, + condition.SeverityError, + readyErrorMessage, + err.Error())) + d.Status.NodeSetConditions[d.NodeSet.Name] = nsConditions +} + // addCertMounts adds the cert mounts to the aeeSpec for the install-certs service func (d *Deployer) addCertMounts( + aeeSpec *dataplanev1.AnsibleEESpec, services []string, -) (*dataplanev1.AnsibleEESpec, error) { +) error { log := d.Helper.GetLogger() client := d.Helper.GetClient() for _, svc := range services { - service, err := GetService(d.Ctx, d.Helper, svc) + service, err := d.ServiceCache.Get(d.Ctx, d.Helper, svc) if err != nil { - return nil, err + return err } if service.Spec.CertsFrom != "" && service.Spec.TLSCerts == nil && service.Spec.CACerts == "" { if slices.Contains(services, service.Spec.CertsFrom) { continue } - service, err = GetService(d.Ctx, d.Helper, service.Spec.CertsFrom) + service, err = d.ServiceCache.Get(d.Ctx, d.Helper, service.Spec.CertsFrom) if err != nil { - return nil, err + return err } } @@ -313,9 +318,9 @@ func (d *Deployer) addCertMounts( if slices.Contains(services, service.Spec.EDPMServiceType) { continue } - service, err = GetService(d.Ctx, d.Helper, service.Spec.EDPMServiceType) + service, err = d.ServiceCache.Get(d.Ctx, d.Helper, service.Spec.EDPMServiceType) if err != nil { - return nil, err + return err } } @@ -336,7 +341,7 @@ func (d *Deployer) addCertMounts( certSecret := &corev1.Secret{} err := client.Get(d.Ctx, types.NamespacedName{Name: secretName, Namespace: service.Namespace}, certSecret) if err != nil { - return d.AeeSpec, err + return err } numberOfSecrets, _ := strconv.Atoi(certSecret.Labels["numberOfSecrets"]) projectedVolumeSource := corev1.ProjectedVolumeSource{ @@ -347,7 +352,7 @@ func (d *Deployer) addCertMounts( certSecret := &corev1.Secret{} err := client.Get(d.Ctx, types.NamespacedName{Name: secretName, Namespace: service.Namespace}, certSecret) if err != nil { - return d.AeeSpec, err + return err } volumeProjection := corev1.VolumeProjection{ Secret: &corev1.SecretProjection{ @@ -381,25 +386,26 @@ func (d *Deployer) addCertMounts( } volMounts.Volumes = append(volMounts.Volumes, certVolume) volMounts.Mounts = append(volMounts.Mounts, certVolumeMount) - d.AeeSpec.ExtraMounts = append(d.AeeSpec.ExtraMounts, volMounts) + aeeSpec.ExtraMounts = append(aeeSpec.ExtraMounts, volMounts) } } // add mount for cacert bundle, even if TLS-E is not enabled if len(service.Spec.CACerts) > 0 { - d.AeeSpec, err = d.addCACertMount(service) + err = d.addCACertMount(aeeSpec, service) if err != nil { - return d.AeeSpec, err + return err } } } - return d.AeeSpec, nil + return nil } func (d *Deployer) addCACertMount( + aeeSpec *dataplanev1.AnsibleEESpec, service dataplanev1.OpenStackDataPlaneService, -) (*dataplanev1.AnsibleEESpec, error) { +) error { log := d.Helper.GetLogger() client := d.Helper.GetClient() @@ -408,7 +414,7 @@ func (d *Deployer) addCACertMount( cacertSecret := &corev1.Secret{} err := client.Get(d.Ctx, types.NamespacedName{Name: service.Spec.CACerts, Namespace: service.Namespace}, cacertSecret) if err != nil { - return d.AeeSpec, err + return err } volumeName := fmt.Sprintf("%s-%s", service.Name, service.Spec.CACerts) if len(volumeName) > apimachineryvalidation.DNS1123LabelMaxLength { @@ -431,14 +437,15 @@ func (d *Deployer) addCACertMount( volMounts.Volumes = append(volMounts.Volumes, cacertVolume) volMounts.Mounts = append(volMounts.Mounts, cacertVolumeMount) - d.AeeSpec.ExtraMounts = append(d.AeeSpec.ExtraMounts, volMounts) - return d.AeeSpec, nil + aeeSpec.ExtraMounts = append(aeeSpec.ExtraMounts, volMounts) + return nil } // addServiceExtraMounts adds the service configs as ExtraMounts to aeeSpec func (d *Deployer) addServiceExtraMounts( + aeeSpec *dataplanev1.AnsibleEESpec, service dataplanev1.OpenStackDataPlaneService, -) (*dataplanev1.AnsibleEESpec, error) { +) error { baseMountPath := path.Join(ConfigPaths, service.Spec.EDPMServiceType) var configMaps []*corev1.ConfigMap @@ -447,7 +454,7 @@ func (d *Deployer) addServiceExtraMounts( for _, dataSource := range service.Spec.DataSources { _cm, _secret, err := dataplaneutil.GetDataSourceCmSecret(d.Ctx, d.Helper, service.Namespace, dataSource) if err != nil { - return nil, err + return err } if _cm != nil { @@ -505,7 +512,7 @@ func (d *Deployer) addServiceExtraMounts( } - d.AeeSpec.ExtraMounts = append(d.AeeSpec.ExtraMounts, volMounts) + aeeSpec.ExtraMounts = append(aeeSpec.ExtraMounts, volMounts) } for _, sec := range secrets { @@ -549,8 +556,8 @@ func (d *Deployer) addServiceExtraMounts( } - d.AeeSpec.ExtraMounts = append(d.AeeSpec.ExtraMounts, volMounts) + aeeSpec.ExtraMounts = append(aeeSpec.ExtraMounts, volMounts) } - return d.AeeSpec, nil + return nil } diff --git a/internal/dataplane/service.go b/internal/dataplane/service.go index 0383a95721..af2ca3f343 100644 --- a/internal/dataplane/service.go +++ b/internal/dataplane/service.go @@ -44,8 +44,50 @@ type ServiceYAML struct { Spec yaml.Node } +// MissingServiceError indicates that a requested service does not exist. +type MissingServiceError struct { + NodeSet string + Service string + Err error +} + +func (e *MissingServiceError) Error() string { + return e.Err.Error() +} + +func (e *MissingServiceError) Unwrap() error { + return e.Err +} + +// ServiceCache stores reconcile-local service objects to avoid repeated API lookups. +type ServiceCache struct { + services map[string]dataplanev1.OpenStackDataPlaneService +} + +// NewServiceCache returns an empty reconcile-local service cache. +func NewServiceCache() *ServiceCache { + return &ServiceCache{ + services: map[string]dataplanev1.OpenStackDataPlaneService{}, + } +} + +// Get returns a cached service or fetches it once from the API. +func (c *ServiceCache) Get(ctx context.Context, helper *helper.Helper, service string) (dataplanev1.OpenStackDataPlaneService, error) { + if cached, ok := c.services[service]; ok { + return cached, nil + } + + foundService, err := GetService(ctx, helper, service) + if err != nil { + return dataplanev1.OpenStackDataPlaneService{}, err + } + + c.services[service] = foundService + return foundService, nil +} + // DeployService service deployment -func (d *Deployer) DeployService(foundService dataplanev1.OpenStackDataPlaneService) error { +func (d *Deployer) DeployService(foundService dataplanev1.OpenStackDataPlaneService, aeeSpec *dataplanev1.AnsibleEESpec) error { err := dataplaneutil.AnsibleExecution( d.Ctx, d.Helper, @@ -53,10 +95,10 @@ func (d *Deployer) DeployService(foundService dataplanev1.OpenStackDataPlaneServ &foundService, d.AnsibleSSHPrivateKeySecrets, d.InventorySecrets, - d.AeeSpec, + aeeSpec, d.NodeSet) if err != nil { - d.Helper.GetLogger().Error(err, fmt.Sprintf("Unable to execute Ansible for %s", foundService.Name)) + d.Helper.GetLogger().Error(err, "Unable to execute Ansible", "service", foundService.Name) return err } @@ -152,51 +194,61 @@ func EnsureServices(ctx context.Context, helper *helper.Helper, instance *datapl return nil } -// DedupeServices deduplicates services to deploy. -// Multiple Services of same ServiceType/ServiceName in a nodeset -// Global Services in multiple NodeSets for a deployment +// DedupeServices deduplicates services to deploy and builds execution +// levels based on service dependencies. The returned map contains a +// leveled execution plan for each nodeset: each level is a list of +// services whose dependencies are all in earlier levels. func DedupeServices(ctx context.Context, helper *helper.Helper, nodesets []dataplanev1.OpenStackDataPlaneNodeSet, - serviceOverride []string) (map[string][]string, error) { - var nodeSetServiceMap = make(map[string][]string, 0) + serviceOverride []string, + fallbackToListOrder bool, +) (map[string][][]string, *ServiceCache, error) { + nodeSetServiceMap := make(map[string][][]string) var globalServices []string var services []string var err error + serviceCache := NewServiceCache() - for _, nodeset := range nodesets { + for i := range nodesets { + nodeset := &nodesets[i] var nodeSetServices []string if len(serviceOverride) != 0 { nodeSetServices = serviceOverride } else { nodeSetServices = nodeset.Spec.Services } - services, globalServices, err = dedupe(ctx, helper, nodeSetServices, globalServices) + services, globalServices, err = dedupe(ctx, helper, serviceCache, nodeset.Name, nodeSetServices, globalServices) + if err != nil { + return nil, nil, err + } + levels, err := BuildServiceLevels(ctx, helper, serviceCache, services, fallbackToListOrder) if err != nil { - return nil, err + return nil, nil, fmt.Errorf("error building service dependency graph for nodeset %s: %w", nodeset.Name, err) } - nodeSetServiceMap[nodeset.Name] = services + nodeSetServiceMap[nodeset.Name] = levels } - helper.GetLogger().Info(fmt.Sprintf("Current Global Services %v", globalServices)) - return nodeSetServiceMap, nil + helper.GetLogger().Info("Current global services", "services", globalServices) + return nodeSetServiceMap, serviceCache, nil } func dedupe(ctx context.Context, helper *helper.Helper, + serviceCache *ServiceCache, + nodeSetName string, services []string, globalServices []string) ([]string, []string, error) { var dedupedServices []string var nodeSetServiceTypes []string updatedglobalServices := globalServices for _, svc := range services { - service, err := GetService(ctx, helper, svc) + service, err := serviceCache.Get(ctx, helper, svc) if err != nil { - if k8s_errors.IsNotFound(err) && !slices.Contains(dedupedServices, svc) { - helper.GetLogger().Error(err, fmt.Sprintf("Configured service %s does not exist", svc)) - // Add the service to the service list as it would fail later when deploying and - // Update the statuses accordingly. - dedupedServices = append(dedupedServices, svc) - continue + if !k8s_errors.IsNotFound(err) { + return dedupedServices, updatedglobalServices, err + } + return dedupedServices, updatedglobalServices, &MissingServiceError{ + NodeSet: nodeSetName, + Service: svc, + Err: err, } - return dedupedServices, updatedglobalServices, err - } serviceType := service.Spec.EDPMServiceType diff --git a/test/functional/dataplane/openstackdataplanedeployment_controller_test.go b/test/functional/dataplane/openstackdataplanedeployment_controller_test.go index ec5cd209b6..2ebbf04195 100644 --- a/test/functional/dataplane/openstackdataplanedeployment_controller_test.go +++ b/test/functional/dataplane/openstackdataplanedeployment_controller_test.go @@ -165,6 +165,7 @@ var _ = Describe("Dataplane Deployment Test", func() { DeploymentRequeueTime: 15, ServicesOverride: nil, AnsibleEEEnvConfigMapName: "openstack-aee-default-env", + FallbackToListOrder: ptr.To(true), } Expect(dataplaneDeploymentInstance.Spec).Should(Equal(expectedSpec)) }) @@ -362,6 +363,7 @@ var _ = Describe("Dataplane Deployment Test", func() { DeploymentRequeueTime: 15, ServicesOverride: nil, AnsibleEEEnvConfigMapName: "openstack-aee-default-env", + FallbackToListOrder: ptr.To(true), } Expect(dataplaneDeploymentInstance.Spec).Should(Equal(expectedSpec)) }) @@ -573,6 +575,7 @@ var _ = Describe("Dataplane Deployment Test", func() { DeploymentRequeueTime: 15, ServicesOverride: nil, AnsibleEEEnvConfigMapName: "openstack-aee-default-env", + FallbackToListOrder: ptr.To(true), } Expect(dataplaneDeploymentInstance.Spec).Should(Equal(expectedSpec)) }) @@ -688,6 +691,7 @@ var _ = Describe("Dataplane Deployment Test", func() { DeploymentRequeueTime: 15, ServicesOverride: nil, AnsibleEEEnvConfigMapName: "openstack-aee-default-env", + FallbackToListOrder: ptr.To(true), } Expect(dataplaneDeploymentInstance.Spec).Should(Equal(expectedSpec)) }) @@ -841,6 +845,7 @@ var _ = Describe("Dataplane Deployment Test", func() { DeploymentRequeueTime: 15, ServicesOverride: nil, AnsibleEEEnvConfigMapName: "openstack-aee-default-env", + FallbackToListOrder: ptr.To(true), } Expect(dataplaneDeploymentInstance.Spec).Should(Equal(expectedSpec)) }) @@ -1005,6 +1010,7 @@ var _ = Describe("Dataplane Deployment Test", func() { DeploymentRequeueTime: 15, ServicesOverride: []string{dataplaneServiceName.Name, "duplicate-service"}, AnsibleEEEnvConfigMapName: "openstack-aee-default-env", + FallbackToListOrder: ptr.To(true), } Expect(dataplaneDeploymentInstance.Spec).Should(Equal(expectedSpec)) }) @@ -1168,6 +1174,7 @@ var _ = Describe("Dataplane Deployment Test", func() { DeploymentRequeueTime: 15, ServicesOverride: []string{"global-service"}, AnsibleEEEnvConfigMapName: "openstack-aee-default-env", + FallbackToListOrder: ptr.To(true), } Expect(dataplaneDeploymentInstance.Spec).Should(Equal(expectedSpec)) }) @@ -1377,6 +1384,7 @@ var _ = Describe("Dataplane Deployment Test", func() { DeploymentRequeueTime: 15, ServicesOverride: []string{"foo-service", "global-service", "foo-update-service"}, AnsibleEEEnvConfigMapName: "openstack-aee-default-env", + FallbackToListOrder: ptr.To(true), } Expect(dataplaneDeploymentInstance.Spec).Should(Equal(expectedSpec)) }) @@ -1541,6 +1549,7 @@ var _ = Describe("Dataplane Deployment Test", func() { DeploymentRequeueTime: 15, ServicesOverride: nil, AnsibleEEEnvConfigMapName: "openstack-aee-default-env", + FallbackToListOrder: ptr.To(true), } Expect(dataplaneDeploymentInstance.Spec).Should(Equal(expectedSpec)) }) @@ -1644,6 +1653,7 @@ var _ = Describe("Dataplane Deployment Test", func() { BackoffLimit: ptr.To(int32(6)), PreserveJobs: true, AnsibleEEEnvConfigMapName: "openstack-aee-default-env", + FallbackToListOrder: ptr.To(true), } Expect(dataplaneDeploymentInstance.Spec).Should(Equal(expectedSpec)) }) @@ -1746,6 +1756,7 @@ var _ = Describe("Dataplane Deployment Test", func() { BackoffLimit: ptr.To(int32(6)), PreserveJobs: true, AnsibleEEEnvConfigMapName: "openstack-aee-default-env", + FallbackToListOrder: ptr.To(true), } Expect(dataplaneDeploymentInstance.Spec).Should(Equal(expectedSpec)) }) diff --git a/test/kuttl/tests/dataplane-deploy-global-service-test/01-assert.yaml b/test/kuttl/tests/dataplane-deploy-global-service-test/01-assert.yaml index 7b83c65a14..71fa3be8d4 100644 --- a/test/kuttl/tests/dataplane-deploy-global-service-test/01-assert.yaml +++ b/test/kuttl/tests/dataplane-deploy-global-service-test/01-assert.yaml @@ -743,16 +743,16 @@ spec: name: bootstrap-combined-ca-bundle - mountPath: /var/lib/openstack/cacerts/ovn name: ovn-combined-ca-bundle - - mountPath: /var/lib/openstack/cacerts/neutron-metadata - name: neutron-metadata-combined-ca-bundle - - mountPath: /var/lib/openstack/cacerts/neutron-ovn - name: neutron-ovn-combined-ca-bundle - mountPath: /var/lib/openstack/cacerts/neutron-sriov name: neutron-sriov-combined-ca-bundle - mountPath: /var/lib/openstack/cacerts/neutron-dhcp name: neutron-dhcp-combined-ca-bundle - mountPath: /var/lib/openstack/cacerts/libvirt name: libvirt-combined-ca-bundle + - mountPath: /var/lib/openstack/cacerts/neutron-metadata + name: neutron-metadata-combined-ca-bundle + - mountPath: /var/lib/openstack/cacerts/neutron-ovn + name: neutron-ovn-combined-ca-bundle - mountPath: /var/lib/openstack/cacerts/nova name: nova-combined-ca-bundle - mountPath: /var/lib/openstack/cacerts/custom-global-service @@ -778,23 +778,23 @@ spec: secret: defaultMode: 420 secretName: combined-ca-bundle - - name: neutron-metadata-combined-ca-bundle + - name: neutron-sriov-combined-ca-bundle secret: defaultMode: 420 secretName: combined-ca-bundle - - name: neutron-ovn-combined-ca-bundle + - name: neutron-dhcp-combined-ca-bundle secret: defaultMode: 420 secretName: combined-ca-bundle - - name: neutron-sriov-combined-ca-bundle + - name: libvirt-combined-ca-bundle secret: defaultMode: 420 secretName: combined-ca-bundle - - name: neutron-dhcp-combined-ca-bundle + - name: neutron-metadata-combined-ca-bundle secret: defaultMode: 420 secretName: combined-ca-bundle - - name: libvirt-combined-ca-bundle + - name: neutron-ovn-combined-ca-bundle secret: defaultMode: 420 secretName: combined-ca-bundle diff --git a/test/kuttl/tests/dataplane-deploy-no-nodes-test/01-assert.yaml b/test/kuttl/tests/dataplane-deploy-no-nodes-test/01-assert.yaml index b10ef96481..ca68ab94f6 100644 --- a/test/kuttl/tests/dataplane-deploy-no-nodes-test/01-assert.yaml +++ b/test/kuttl/tests/dataplane-deploy-no-nodes-test/01-assert.yaml @@ -641,16 +641,16 @@ spec: name: bootstrap-combined-ca-bundle - mountPath: /var/lib/openstack/cacerts/ovn name: ovn-combined-ca-bundle - - mountPath: /var/lib/openstack/cacerts/neutron-metadata - name: neutron-metadata-combined-ca-bundle - - mountPath: /var/lib/openstack/cacerts/neutron-ovn - name: neutron-ovn-combined-ca-bundle - mountPath: /var/lib/openstack/cacerts/neutron-sriov name: neutron-sriov-combined-ca-bundle - mountPath: /var/lib/openstack/cacerts/neutron-dhcp name: neutron-dhcp-combined-ca-bundle - mountPath: /var/lib/openstack/cacerts/libvirt name: libvirt-combined-ca-bundle + - mountPath: /var/lib/openstack/cacerts/neutron-metadata + name: neutron-metadata-combined-ca-bundle + - mountPath: /var/lib/openstack/cacerts/neutron-ovn + name: neutron-ovn-combined-ca-bundle - mountPath: /var/lib/openstack/cacerts/nova name: nova-combined-ca-bundle - mountPath: /runner/env/ssh_key/ssh_key_edpm-compute-no-nodes @@ -674,23 +674,23 @@ spec: secret: defaultMode: 420 secretName: combined-ca-bundle - - name: neutron-metadata-combined-ca-bundle + - name: neutron-sriov-combined-ca-bundle secret: defaultMode: 420 secretName: combined-ca-bundle - - name: neutron-ovn-combined-ca-bundle + - name: neutron-dhcp-combined-ca-bundle secret: defaultMode: 420 secretName: combined-ca-bundle - - name: neutron-sriov-combined-ca-bundle + - name: libvirt-combined-ca-bundle secret: defaultMode: 420 secretName: combined-ca-bundle - - name: neutron-dhcp-combined-ca-bundle + - name: neutron-metadata-combined-ca-bundle secret: defaultMode: 420 secretName: combined-ca-bundle - - name: libvirt-combined-ca-bundle + - name: neutron-ovn-combined-ca-bundle secret: defaultMode: 420 secretName: combined-ca-bundle