From 32100c16fa092ff2effe6507add6638ced20a74f Mon Sep 17 00:00:00 2001 From: Luca Miccini Date: Mon, 20 Apr 2026 16:40:24 +0200 Subject: [PATCH 1/4] Add OVNDBBackup and OVNDBRestore CRDs for managed backup/restore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces two new Custom Resource Definitions for automated OVN database backup and restore operations: OVNDBBackup: - Scheduled backups via CronJob using ovsdb-client backup - Configurable retention policy - TLS support for database connections - Persistent storage for backup files (survives CR deletion) OVNDBRestore: - Phase-based state machine: Validating → ScalingDown → Restoring → ScalingUp → Completed - Annotation-based replica override prevents higher-level operators from interfering during restore - Force-deletes pods during scale-down (preStop hooks hang when all RAFT members terminate simultaneously) - Deletes non-pod-0 PVCs to prevent stale RAFT membership state - Copies standalone backup to pod-0 PVC and lets ovn-ctl handle the standalone-to-clustered conversion on startup - Controlled scale-up: pod-0 first, then remaining replicas - Post-restore DB verification via exec Also includes: - Functional tests for both controllers - Webhooks with validation - Documentation Co-Authored-By: Claude Opus 4.6 --- PROJECT | 26 + api/bases/ovn.openstack.org_ovndbbackups.yaml | 134 ++++ .../ovn.openstack.org_ovndbrestores.yaml | 128 ++++ api/test/helpers/crd.go | 113 +++ api/v1beta1/conditions.go | 60 ++ api/v1beta1/ovndbbackup_types.go | 104 +++ api/v1beta1/ovndbbackup_webhook.go | 82 +++ api/v1beta1/ovndbcluster_types.go | 5 + api/v1beta1/ovndbrestore_types.go | 107 +++ api/v1beta1/ovndbrestore_webhook.go | 82 +++ api/v1beta1/zz_generated.deepcopy.go | 217 ++++++ cmd/main.go | 25 + .../bases/ovn.openstack.org_ovndbbackups.yaml | 134 ++++ .../ovn.openstack.org_ovndbrestores.yaml | 128 ++++ config/crd/kustomization.yaml | 2 + .../ovn-operator.clusterserviceversion.yaml | 10 + config/rbac/role.yaml | 9 + config/samples/ovn_v1beta1_ovndbbackup.yaml | 9 + config/samples/ovn_v1beta1_ovndbrestore.yaml | 6 + config/webhook/manifests.yaml | 80 +++ docs/backup-restore.md | 141 ++++ go.mod | 4 + go.sum | 10 + hack/crd-schema-checker.sh | 5 +- internal/controller/ovndbbackup_controller.go | 358 ++++++++++ .../controller/ovndbcluster_controller.go | 13 + .../controller/ovndbrestore_controller.go | 647 ++++++++++++++++++ internal/controller/ovnnorthd_controller.go | 2 +- internal/ovndbbackup/const.go | 22 + internal/ovndbbackup/cronjob.go | 110 +++ internal/ovndbbackup/pvc.go | 57 ++ internal/ovndbbackup/restorejob.go | 102 +++ internal/ovndbbackup/utils.go | 74 ++ internal/ovndbbackup/volumes.go | 56 ++ .../webhook/v1beta1/ovndbbackup_webhook.go | 108 +++ .../webhook/v1beta1/ovndbrestore_webhook.go | 108 +++ templates/ovndbbackup/bin/backup_ovndb | 43 ++ templates/ovndbrestore/bin/restore_ovndb | 24 + test/functional/base_test.go | 43 ++ .../functional/ovndbbackup_controller_test.go | 281 ++++++++ .../ovndbrestore_controller_test.go | 141 ++++ test/functional/suite_test.go | 21 + 42 files changed, 3829 insertions(+), 2 deletions(-) create mode 100644 api/bases/ovn.openstack.org_ovndbbackups.yaml create mode 100644 api/bases/ovn.openstack.org_ovndbrestores.yaml create mode 100644 api/v1beta1/ovndbbackup_types.go create mode 100644 api/v1beta1/ovndbbackup_webhook.go create mode 100644 api/v1beta1/ovndbrestore_types.go create mode 100644 api/v1beta1/ovndbrestore_webhook.go create mode 100644 config/crd/bases/ovn.openstack.org_ovndbbackups.yaml create mode 100644 config/crd/bases/ovn.openstack.org_ovndbrestores.yaml create mode 100644 config/samples/ovn_v1beta1_ovndbbackup.yaml create mode 100644 config/samples/ovn_v1beta1_ovndbrestore.yaml create mode 100644 docs/backup-restore.md create mode 100644 internal/controller/ovndbbackup_controller.go create mode 100644 internal/controller/ovndbrestore_controller.go create mode 100644 internal/ovndbbackup/const.go create mode 100644 internal/ovndbbackup/cronjob.go create mode 100644 internal/ovndbbackup/pvc.go create mode 100644 internal/ovndbbackup/restorejob.go create mode 100644 internal/ovndbbackup/utils.go create mode 100644 internal/ovndbbackup/volumes.go create mode 100644 internal/webhook/v1beta1/ovndbbackup_webhook.go create mode 100644 internal/webhook/v1beta1/ovndbrestore_webhook.go create mode 100755 templates/ovndbbackup/bin/backup_ovndb create mode 100755 templates/ovndbrestore/bin/restore_ovndb create mode 100644 test/functional/ovndbbackup_controller_test.go create mode 100644 test/functional/ovndbrestore_controller_test.go diff --git a/PROJECT b/PROJECT index 585d91b4..eadf8744 100644 --- a/PROJECT +++ b/PROJECT @@ -50,4 +50,30 @@ resources: defaulting: true validation: true webhookVersion: v1 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: openstack.org + group: ovn + kind: OVNDBBackup + path: github.com/openstack-k8s-operators/ovn-operator/api/v1beta1 + version: v1beta1 + webhooks: + defaulting: true + validation: true + webhookVersion: v1 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: openstack.org + group: ovn + kind: OVNDBRestore + path: github.com/openstack-k8s-operators/ovn-operator/api/v1beta1 + version: v1beta1 + webhooks: + defaulting: true + validation: true + webhookVersion: v1 version: "3" diff --git a/api/bases/ovn.openstack.org_ovndbbackups.yaml b/api/bases/ovn.openstack.org_ovndbbackups.yaml new file mode 100644 index 00000000..aa74e7d7 --- /dev/null +++ b/api/bases/ovn.openstack.org_ovndbbackups.yaml @@ -0,0 +1,134 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.18.0 + name: ovndbbackups.ovn.openstack.org +spec: + group: ovn.openstack.org + names: + kind: OVNDBBackup + listKind: OVNDBBackupList + plural: ovndbbackups + singular: ovndbbackup + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Status + jsonPath: .status.conditions[0].status + name: Status + type: string + - description: Message + jsonPath: .status.conditions[0].message + name: Message + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: OVNDBBackup is the Schema for the ovndbbackups API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: OVNDBBackupSpec defines the desired state of OVNDBBackup + properties: + databaseInstance: + description: DatabaseInstance - Name of the OVNDBCluster CR to back + up + type: string + retention: + description: Retention - duration after which old backups are cleaned + up from disk + type: string + schedule: + default: '@daily' + description: Schedule in Cron format for periodic backups + type: string + storageClass: + description: StorageClass for the backup PVC (defaults to the OVNDBCluster's + StorageClass) + type: string + storageRequest: + description: StorageRequest for the backup PVC (defaults to the OVNDBCluster's + StorageRequest) + type: string + required: + - databaseInstance + - schedule + type: object + status: + description: OVNDBBackupStatus defines the observed state of OVNDBBackup + properties: + conditions: + description: Conditions + items: + description: Condition defines an observation of a API resource + operational state. + properties: + lastTransitionTime: + description: |- + Last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when + the API field changed is acceptable. + format: date-time + type: string + message: + description: A human readable message indicating details about + the transition. + type: string + reason: + description: The reason for the condition's last transition + in CamelCase. + type: string + severity: + description: |- + Severity provides a classification of Reason code, so the current situation is immediately + understandable and could act accordingly. + It is meant for situations where Status=False and it should be indicated if it is just + informational, warning (next reconciliation might fix it) or an error (e.g. DB create issue + and no actions to automatically resolve the issue can/should be done). + For conditions where Status=Unknown or Status=True the Severity should be SeverityNone. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type of condition in CamelCase. + type: string + required: + - lastTransitionTime + - status + - type + type: object + type: array + hash: + additionalProperties: + type: string + description: Map of hashes to track input changes + type: object + observedGeneration: + description: ObservedGeneration - the most recent generation observed + format: int64 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/api/bases/ovn.openstack.org_ovndbrestores.yaml b/api/bases/ovn.openstack.org_ovndbrestores.yaml new file mode 100644 index 00000000..b9975130 --- /dev/null +++ b/api/bases/ovn.openstack.org_ovndbrestores.yaml @@ -0,0 +1,128 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.18.0 + name: ovndbrestores.ovn.openstack.org +spec: + group: ovn.openstack.org + names: + kind: OVNDBRestore + listKind: OVNDBRestoreList + plural: ovndbrestores + singular: ovndbrestore + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Phase + jsonPath: .status.phase + name: Phase + type: string + - description: Status + jsonPath: .status.conditions[0].status + name: Status + type: string + - description: Message + jsonPath: .status.conditions[0].message + name: Message + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: OVNDBRestore is the Schema for the ovndbrestores API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: OVNDBRestoreSpec defines the desired state of OVNDBRestore + properties: + backupSource: + description: BackupSource - Name of the OVNDBBackup CR to restore + from + type: string + required: + - backupSource + type: object + status: + description: OVNDBRestoreStatus defines the observed state of OVNDBRestore + properties: + conditions: + description: Conditions + items: + description: Condition defines an observation of a API resource + operational state. + properties: + lastTransitionTime: + description: |- + Last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when + the API field changed is acceptable. + format: date-time + type: string + message: + description: A human readable message indicating details about + the transition. + type: string + reason: + description: The reason for the condition's last transition + in CamelCase. + type: string + severity: + description: |- + Severity provides a classification of Reason code, so the current situation is immediately + understandable and could act accordingly. + It is meant for situations where Status=False and it should be indicated if it is just + informational, warning (next reconciliation might fix it) or an error (e.g. DB create issue + and no actions to automatically resolve the issue can/should be done). + For conditions where Status=Unknown or Status=True the Severity should be SeverityNone. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type of condition in CamelCase. + type: string + required: + - lastTransitionTime + - status + - type + type: object + type: array + hash: + additionalProperties: + type: string + description: Map of hashes to track input changes + type: object + observedGeneration: + description: ObservedGeneration - the most recent generation observed + format: int64 + type: integer + originalReplicas: + description: OriginalReplicas - the replica count saved before scale-down + format: int32 + type: integer + phase: + description: Phase - current phase of the restore operation + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/api/test/helpers/crd.go b/api/test/helpers/crd.go index bb9eb023..0dcf5b1c 100644 --- a/api/test/helpers/crd.go +++ b/api/test/helpers/crd.go @@ -309,3 +309,116 @@ func (th *TestHelper) SimulateOVNControllerReady(name types.NamespacedName) { }, th.Timeout, th.Interval).Should(gomega.Succeed()) th.Logger.Info("Simulated GetOVNController ready", "on", name) } + +// CreateOVNDBBackup creates a new OVNDBBackup instance with the specified +// name and namespace in the Kubernetes cluster. If name is not passed (nil), +// it will be autogenerated. +func (th *TestHelper) CreateOVNDBBackup(backupName *string, namespace string, spec ovnv1.OVNDBBackupSpec) types.NamespacedName { + name := "ovndbbackup-" + uuid.New().String() + if backupName != nil { + name = *backupName + } + ovnDBBackup := &ovnv1.OVNDBBackup{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "ovn.openstack.org/v1beta1", + Kind: "OVNDBBackup", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: spec, + } + + gomega.Expect(th.K8sClient.Create(th.Ctx, ovnDBBackup)).Should(gomega.Succeed()) + th.Logger.Info("OVNDBBackup created", "OVNDBBackup", name) + return types.NamespacedName{Namespace: namespace, Name: name} +} + +// DeleteOVNDBBackup deletes an OVNDBBackup resource from the Kubernetes cluster. +func (th *TestHelper) DeleteOVNDBBackup(name types.NamespacedName) { + gomega.Eventually(func(g gomega.Gomega) { + ovnDBBackup := &ovnv1.OVNDBBackup{} + err := th.K8sClient.Get(th.Ctx, name, ovnDBBackup) + if k8s_errors.IsNotFound(err) { + return + } + g.Expect(err).NotTo(gomega.HaveOccurred()) + + g.Expect(th.K8sClient.Delete(th.Ctx, ovnDBBackup)).Should(gomega.Succeed()) + + err = th.K8sClient.Get(th.Ctx, name, ovnDBBackup) + g.Expect(k8s_errors.IsNotFound(err)).To(gomega.BeTrue()) + }, th.Timeout, th.Interval).Should(gomega.Succeed()) +} + +// GetOVNDBBackup retrieves an OVNDBBackup resource. +func (th *TestHelper) GetOVNDBBackup(name types.NamespacedName) *ovnv1.OVNDBBackup { + instance := &ovnv1.OVNDBBackup{} + gomega.Eventually(func(g gomega.Gomega) { + g.Expect(th.K8sClient.Get(th.Ctx, name, instance)).Should(gomega.Succeed()) + }, th.Timeout, th.Interval).Should(gomega.Succeed()) + return instance +} + +// SimulateOVNDBBackupReady simulates the readiness of an OVNDBBackup resource. +func (th *TestHelper) SimulateOVNDBBackupReady(name types.NamespacedName) { + gomega.Eventually(func(g gomega.Gomega) { + backup := th.GetOVNDBBackup(name) + backup.Status.ObservedGeneration = backup.Generation + backup.Status.Conditions.MarkTrue(condition.ReadyCondition, "Ready") + g.Expect(th.K8sClient.Status().Update(th.Ctx, backup)).To(gomega.Succeed()) + }, th.Timeout, th.Interval).Should(gomega.Succeed()) + th.Logger.Info("Simulated OVNDBBackup ready", "on", name) +} + +// CreateOVNDBRestore creates a new OVNDBRestore instance with the specified +// name and namespace in the Kubernetes cluster. If name is not passed (nil), +// it will be autogenerated. +func (th *TestHelper) CreateOVNDBRestore(restoreName *string, namespace string, spec ovnv1.OVNDBRestoreSpec) types.NamespacedName { + name := "ovndbrestore-" + uuid.New().String() + if restoreName != nil { + name = *restoreName + } + ovnDBRestore := &ovnv1.OVNDBRestore{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "ovn.openstack.org/v1beta1", + Kind: "OVNDBRestore", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: spec, + } + + gomega.Expect(th.K8sClient.Create(th.Ctx, ovnDBRestore)).Should(gomega.Succeed()) + th.Logger.Info("OVNDBRestore created", "OVNDBRestore", name) + return types.NamespacedName{Namespace: namespace, Name: name} +} + +// DeleteOVNDBRestore deletes an OVNDBRestore resource from the Kubernetes cluster. +func (th *TestHelper) DeleteOVNDBRestore(name types.NamespacedName) { + gomega.Eventually(func(g gomega.Gomega) { + ovnDBRestore := &ovnv1.OVNDBRestore{} + err := th.K8sClient.Get(th.Ctx, name, ovnDBRestore) + if k8s_errors.IsNotFound(err) { + return + } + g.Expect(err).NotTo(gomega.HaveOccurred()) + + g.Expect(th.K8sClient.Delete(th.Ctx, ovnDBRestore)).Should(gomega.Succeed()) + + err = th.K8sClient.Get(th.Ctx, name, ovnDBRestore) + g.Expect(k8s_errors.IsNotFound(err)).To(gomega.BeTrue()) + }, th.Timeout, th.Interval).Should(gomega.Succeed()) +} + +// GetOVNDBRestore retrieves an OVNDBRestore resource. +func (th *TestHelper) GetOVNDBRestore(name types.NamespacedName) *ovnv1.OVNDBRestore { + instance := &ovnv1.OVNDBRestore{} + gomega.Eventually(func(g gomega.Gomega) { + g.Expect(th.K8sClient.Get(th.Ctx, name, instance)).Should(gomega.Succeed()) + }, th.Timeout, th.Interval).Should(gomega.Succeed()) + return instance +} diff --git a/api/v1beta1/conditions.go b/api/v1beta1/conditions.go index 86deeb99..c792be28 100644 --- a/api/v1beta1/conditions.go +++ b/api/v1beta1/conditions.go @@ -24,6 +24,21 @@ import ( const ( // ExternalConfigReadyCondition indicates when the external config (e.g. ovncontroller-config ConfigMap) is ready ExternalConfigReadyCondition condition.Type = "External Config Ready" + + // OVNDBClusterReadyCondition indicates when the referenced OVNDBCluster exists and is available + OVNDBClusterReadyCondition condition.Type = "OVNDBClusterReady" + + // PersistentVolumeClaimReadyCondition indicates when the backup PVC is ready + PersistentVolumeClaimReadyCondition condition.Type = "PersistentVolumeClaimReady" + + // CronJobReadyCondition indicates when the backup CronJob is ready + CronJobReadyCondition condition.Type = "CronJobReady" + + // OVNDBBackupReadyCondition indicates when the referenced OVNDBBackup is ready (used by restore) + OVNDBBackupReadyCondition condition.Type = "OVNDBBackupReady" + + // RestoreJobReadyCondition indicates when the restore Job has completed + RestoreJobReadyCondition condition.Type = "RestoreJobReady" ) // Common messages used by API objects. @@ -33,4 +48,49 @@ const ( // ExternalConfigErrorMessage is the error message format for ExternalConfigReadyCondition ExternalConfigErrorMessage = "External config generation error: %s" + + // OVNDBClusterReadyInitMessage + OVNDBClusterReadyInitMessage = "OVNDBCluster not yet available" + + // OVNDBClusterReadyMessage + OVNDBClusterReadyMessage = "OVNDBCluster is available" + + // OVNDBClusterReadyErrorMessage + OVNDBClusterReadyErrorMessage = "OVNDBCluster error occurred %s" + + // PersistentVolumeClaimReadyInitMessage + PersistentVolumeClaimReadyInitMessage = "PersistentVolumeClaim not yet created" + + // PersistentVolumeClaimReadyMessage + PersistentVolumeClaimReadyMessage = "PersistentVolumeClaim created" + + // PersistentVolumeClaimReadyErrorMessage + PersistentVolumeClaimReadyErrorMessage = "PersistentVolumeClaim error occurred %s" + + // CronJobReadyInitMessage + CronJobReadyInitMessage = "CronJob not yet created" + + // CronJobReadyMessage + CronJobReadyMessage = "CronJob created" + + // CronJobReadyErrorMessage + CronJobReadyErrorMessage = "CronJob error occurred %s" + + // OVNDBBackupReadyInitMessage + OVNDBBackupReadyInitMessage = "OVNDBBackup not yet available" + + // OVNDBBackupReadyMessage + OVNDBBackupReadyMessage = "OVNDBBackup is available" + + // OVNDBBackupReadyErrorMessage + OVNDBBackupReadyErrorMessage = "OVNDBBackup error occurred %s" + + // RestoreJobReadyInitMessage + RestoreJobReadyInitMessage = "Restore not yet started" + + // RestoreJobReadyMessage + RestoreJobReadyMessage = "Restore completed" + + // RestoreJobReadyErrorMessage + RestoreJobReadyErrorMessage = "Restore error occurred %s" ) diff --git a/api/v1beta1/ovndbbackup_types.go b/api/v1beta1/ovndbbackup_types.go new file mode 100644 index 00000000..3a6d8bef --- /dev/null +++ b/api/v1beta1/ovndbbackup_types.go @@ -0,0 +1,104 @@ +/* + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + "github.com/openstack-k8s-operators/lib-common/modules/common/condition" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// OVNDBBackupSpec defines the desired state of OVNDBBackup +type OVNDBBackupSpec struct { + // +kubebuilder:validation:Required + // DatabaseInstance - Name of the OVNDBCluster CR to back up + DatabaseInstance string `json:"databaseInstance"` + + // +kubebuilder:validation:Optional + // StorageClass for the backup PVC (defaults to the OVNDBCluster's StorageClass) + StorageClass string `json:"storageClass,omitempty"` + + // +kubebuilder:validation:Optional + // StorageRequest for the backup PVC (defaults to the OVNDBCluster's StorageRequest) + StorageRequest string `json:"storageRequest,omitempty"` + + // +kubebuilder:validation:Required + // +kubebuilder:default="@daily" + // Schedule in Cron format for periodic backups + Schedule string `json:"schedule"` + + // +kubebuilder:validation:Optional + // Retention - duration after which old backups are cleaned up from disk + Retention *metav1.Duration `json:"retention,omitempty"` +} + +// OVNDBBackupStatus defines the observed state of OVNDBBackup +type OVNDBBackupStatus struct { + // Map of hashes to track input changes + Hash map[string]string `json:"hash,omitempty"` + + // Conditions + Conditions condition.Conditions `json:"conditions,omitempty" optional:"true"` + + // ObservedGeneration - the most recent generation observed + ObservedGeneration int64 `json:"observedGeneration,omitempty"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="Status",type="string",JSONPath=".status.conditions[0].status",description="Status" +//+kubebuilder:printcolumn:name="Message",type="string",JSONPath=".status.conditions[0].message",description="Message" + +// OVNDBBackup is the Schema for the ovndbbackups API +type OVNDBBackup struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec OVNDBBackupSpec `json:"spec,omitempty"` + Status OVNDBBackupStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// OVNDBBackupList contains a list of OVNDBBackup +type OVNDBBackupList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []OVNDBBackup `json:"items"` +} + +func init() { + SchemeBuilder.Register(&OVNDBBackup{}, &OVNDBBackupList{}) +} + +// IsReady - returns true if backup is ready +func (instance OVNDBBackup) IsReady() bool { + return instance.Status.Conditions.IsTrue(condition.ReadyCondition) +} + +// RbacConditionsSet - set the conditions for the rbac object +func (instance OVNDBBackup) RbacConditionsSet(c *condition.Condition) { + instance.Status.Conditions.Set(c) +} + +// RbacNamespace - return the namespace +func (instance OVNDBBackup) RbacNamespace() string { + return instance.Namespace +} + +// RbacResourceName - return the name to be used for rbac objects (serviceaccount, role, rolebinding) +func (instance OVNDBBackup) RbacResourceName() string { + return "ovndbbackup-" + instance.Name +} diff --git a/api/v1beta1/ovndbbackup_webhook.go b/api/v1beta1/ovndbbackup_webhook.go new file mode 100644 index 00000000..95b72542 --- /dev/null +++ b/api/v1beta1/ovndbbackup_webhook.go @@ -0,0 +1,82 @@ +/* + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/validation/field" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/webhook" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" +) + +var ovndbbackuplog = logf.Log.WithName("ovndbbackup-resource") + +var _ webhook.Defaulter = &OVNDBBackup{} + +// Default implements webhook.Defaulter so a webhook will be registered for the type +func (r *OVNDBBackup) Default() { + ovndbbackuplog.Info("default", "name", r.Name) +} + +var _ webhook.Validator = &OVNDBBackup{} + +// ValidateCreate implements webhook.Validator so a webhook will be registered for the type +func (r *OVNDBBackup) ValidateCreate() (admission.Warnings, error) { + ovndbbackuplog.Info("validate create", "name", r.Name) + + errors := field.ErrorList{} + basePath := field.NewPath("spec") + + if r.Spec.DatabaseInstance == "" { + errors = append(errors, field.Required(basePath.Child("databaseInstance"), "databaseInstance is required")) + } + + if len(errors) != 0 { + return nil, apierrors.NewInvalid( + schema.GroupKind{Group: "ovn.openstack.org", Kind: "OVNDBBackup"}, + r.Name, errors) + } + return nil, nil +} + +// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type +func (r *OVNDBBackup) ValidateUpdate(old runtime.Object) (admission.Warnings, error) { + ovndbbackuplog.Info("validate update", "name", r.Name) + + errors := field.ErrorList{} + basePath := field.NewPath("spec") + + oldBackup, ok := old.(*OVNDBBackup) + if ok && oldBackup.Spec.DatabaseInstance != r.Spec.DatabaseInstance { + errors = append(errors, field.Forbidden(basePath.Child("databaseInstance"), "databaseInstance is immutable")) + } + + if len(errors) != 0 { + return nil, apierrors.NewInvalid( + schema.GroupKind{Group: "ovn.openstack.org", Kind: "OVNDBBackup"}, + r.Name, errors) + } + return nil, nil +} + +// ValidateDelete implements webhook.Validator so a webhook will be registered for the type +func (r *OVNDBBackup) ValidateDelete() (admission.Warnings, error) { + ovndbbackuplog.Info("validate delete", "name", r.Name) + return nil, nil +} diff --git a/api/v1beta1/ovndbcluster_types.go b/api/v1beta1/ovndbcluster_types.go index 16a7e89d..1958e16b 100644 --- a/api/v1beta1/ovndbcluster_types.go +++ b/api/v1beta1/ovndbcluster_types.go @@ -37,6 +37,11 @@ const ( SBDBType = "SB" ServiceNameSB = "ovsdbserver-sb" + // RestoreInProgressAnnotation is set on an OVNDBCluster during a restore + // to prevent the controller and higher-level operators from overriding + // the replica count while the StatefulSet is scaled down. + RestoreInProgressAnnotation = "ovn.openstack.org/restore-in-progress" + // ServiceHeadlessType - Constant to identify Headless services ServiceHeadlessType = "headless" // ServiceClusterType - Constant to identify Cluster services diff --git a/api/v1beta1/ovndbrestore_types.go b/api/v1beta1/ovndbrestore_types.go new file mode 100644 index 00000000..f8f33b76 --- /dev/null +++ b/api/v1beta1/ovndbrestore_types.go @@ -0,0 +1,107 @@ +/* + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + "github.com/openstack-k8s-operators/lib-common/modules/common/condition" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// OVNDBRestorePhase represents the current phase of the restore operation +type OVNDBRestorePhase string + +const ( + // OVNDBRestorePhaseValidating - validating backup source and cluster + OVNDBRestorePhaseValidating OVNDBRestorePhase = "Validating" + // OVNDBRestorePhaseScalingDown - scaling the OVNDBCluster to 0 + OVNDBRestorePhaseScalingDown OVNDBRestorePhase = "ScalingDown" + // OVNDBRestorePhaseRestoring - running the restore Job + OVNDBRestorePhaseRestoring OVNDBRestorePhase = "Restoring" + // OVNDBRestorePhaseScalingUp - scaling the OVNDBCluster back up + OVNDBRestorePhaseScalingUp OVNDBRestorePhase = "ScalingUp" + // OVNDBRestorePhaseCompleted - restore completed successfully + OVNDBRestorePhaseCompleted OVNDBRestorePhase = "Completed" + // OVNDBRestorePhaseFailed - restore failed + OVNDBRestorePhaseFailed OVNDBRestorePhase = "Failed" +) + +// OVNDBRestoreSpec defines the desired state of OVNDBRestore +type OVNDBRestoreSpec struct { + // +kubebuilder:validation:Required + // BackupSource - Name of the OVNDBBackup CR to restore from + BackupSource string `json:"backupSource"` +} + +// OVNDBRestoreStatus defines the observed state of OVNDBRestore +type OVNDBRestoreStatus struct { + // Map of hashes to track input changes + Hash map[string]string `json:"hash,omitempty"` + + // Conditions + Conditions condition.Conditions `json:"conditions,omitempty" optional:"true"` + + // ObservedGeneration - the most recent generation observed + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + + // OriginalReplicas - the replica count saved before scale-down + OriginalReplicas *int32 `json:"originalReplicas,omitempty"` + + // Phase - current phase of the restore operation + Phase OVNDBRestorePhase `json:"phase,omitempty"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase",description="Phase" +//+kubebuilder:printcolumn:name="Status",type="string",JSONPath=".status.conditions[0].status",description="Status" +//+kubebuilder:printcolumn:name="Message",type="string",JSONPath=".status.conditions[0].message",description="Message" + +// OVNDBRestore is the Schema for the ovndbrestores API +type OVNDBRestore struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec OVNDBRestoreSpec `json:"spec,omitempty"` + Status OVNDBRestoreStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// OVNDBRestoreList contains a list of OVNDBRestore +type OVNDBRestoreList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []OVNDBRestore `json:"items"` +} + +func init() { + SchemeBuilder.Register(&OVNDBRestore{}, &OVNDBRestoreList{}) +} + +// RbacConditionsSet - set the conditions for the rbac object +func (instance OVNDBRestore) RbacConditionsSet(c *condition.Condition) { + instance.Status.Conditions.Set(c) +} + +// RbacNamespace - return the namespace +func (instance OVNDBRestore) RbacNamespace() string { + return instance.Namespace +} + +// RbacResourceName - return the name to be used for rbac objects (serviceaccount, role, rolebinding) +func (instance OVNDBRestore) RbacResourceName() string { + return "ovndbrestore-" + instance.Name +} diff --git a/api/v1beta1/ovndbrestore_webhook.go b/api/v1beta1/ovndbrestore_webhook.go new file mode 100644 index 00000000..685238d2 --- /dev/null +++ b/api/v1beta1/ovndbrestore_webhook.go @@ -0,0 +1,82 @@ +/* + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/validation/field" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/webhook" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" +) + +var ovndbrestorelog = logf.Log.WithName("ovndbrestore-resource") + +var _ webhook.Defaulter = &OVNDBRestore{} + +// Default implements webhook.Defaulter so a webhook will be registered for the type +func (r *OVNDBRestore) Default() { + ovndbrestorelog.Info("default", "name", r.Name) +} + +var _ webhook.Validator = &OVNDBRestore{} + +// ValidateCreate implements webhook.Validator so a webhook will be registered for the type +func (r *OVNDBRestore) ValidateCreate() (admission.Warnings, error) { + ovndbrestorelog.Info("validate create", "name", r.Name) + + errors := field.ErrorList{} + basePath := field.NewPath("spec") + + if r.Spec.BackupSource == "" { + errors = append(errors, field.Required(basePath.Child("backupSource"), "backupSource is required")) + } + + if len(errors) != 0 { + return nil, apierrors.NewInvalid( + schema.GroupKind{Group: "ovn.openstack.org", Kind: "OVNDBRestore"}, + r.Name, errors) + } + return nil, nil +} + +// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type +func (r *OVNDBRestore) ValidateUpdate(old runtime.Object) (admission.Warnings, error) { + ovndbrestorelog.Info("validate update", "name", r.Name) + + errors := field.ErrorList{} + basePath := field.NewPath("spec") + + oldRestore, ok := old.(*OVNDBRestore) + if ok && oldRestore.Spec.BackupSource != r.Spec.BackupSource { + errors = append(errors, field.Forbidden(basePath.Child("backupSource"), "backupSource is immutable")) + } + + if len(errors) != 0 { + return nil, apierrors.NewInvalid( + schema.GroupKind{Group: "ovn.openstack.org", Kind: "OVNDBRestore"}, + r.Name, errors) + } + return nil, nil +} + +// ValidateDelete implements webhook.Validator so a webhook will be registered for the type +func (r *OVNDBRestore) ValidateDelete() (admission.Warnings, error) { + ovndbrestorelog.Info("validate delete", "name", r.Name) + return nil, nil +} diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index 04e7bfed..9be53b62 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -24,6 +24,7 @@ import ( topologyv1beta1 "github.com/openstack-k8s-operators/infra-operator/apis/topology/v1beta1" "github.com/openstack-k8s-operators/lib-common/modules/common/condition" "github.com/openstack-k8s-operators/lib-common/modules/common/service" + "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ) @@ -241,6 +242,114 @@ func (in *OVNControllerStatus) DeepCopy() *OVNControllerStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OVNDBBackup) DeepCopyInto(out *OVNDBBackup) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OVNDBBackup. +func (in *OVNDBBackup) DeepCopy() *OVNDBBackup { + if in == nil { + return nil + } + out := new(OVNDBBackup) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *OVNDBBackup) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OVNDBBackupList) DeepCopyInto(out *OVNDBBackupList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]OVNDBBackup, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OVNDBBackupList. +func (in *OVNDBBackupList) DeepCopy() *OVNDBBackupList { + if in == nil { + return nil + } + out := new(OVNDBBackupList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *OVNDBBackupList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OVNDBBackupSpec) DeepCopyInto(out *OVNDBBackupSpec) { + *out = *in + if in.Retention != nil { + in, out := &in.Retention, &out.Retention + *out = new(v1.Duration) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OVNDBBackupSpec. +func (in *OVNDBBackupSpec) DeepCopy() *OVNDBBackupSpec { + if in == nil { + return nil + } + out := new(OVNDBBackupSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OVNDBBackupStatus) DeepCopyInto(out *OVNDBBackupStatus) { + *out = *in + if in.Hash != nil { + in, out := &in.Hash, &out.Hash + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make(condition.Conditions, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OVNDBBackupStatus. +func (in *OVNDBBackupStatus) DeepCopy() *OVNDBBackupStatus { + if in == nil { + return nil + } + out := new(OVNDBBackupStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *OVNDBCluster) DeepCopyInto(out *OVNDBCluster) { *out = *in @@ -446,6 +555,114 @@ func (in *OVNDBClusterStatus) DeepCopy() *OVNDBClusterStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OVNDBRestore) DeepCopyInto(out *OVNDBRestore) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.Spec = in.Spec + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OVNDBRestore. +func (in *OVNDBRestore) DeepCopy() *OVNDBRestore { + if in == nil { + return nil + } + out := new(OVNDBRestore) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *OVNDBRestore) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OVNDBRestoreList) DeepCopyInto(out *OVNDBRestoreList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]OVNDBRestore, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OVNDBRestoreList. +func (in *OVNDBRestoreList) DeepCopy() *OVNDBRestoreList { + if in == nil { + return nil + } + out := new(OVNDBRestoreList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *OVNDBRestoreList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OVNDBRestoreSpec) DeepCopyInto(out *OVNDBRestoreSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OVNDBRestoreSpec. +func (in *OVNDBRestoreSpec) DeepCopy() *OVNDBRestoreSpec { + if in == nil { + return nil + } + out := new(OVNDBRestoreSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OVNDBRestoreStatus) DeepCopyInto(out *OVNDBRestoreStatus) { + *out = *in + if in.Hash != nil { + in, out := &in.Hash, &out.Hash + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make(condition.Conditions, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.OriginalReplicas != nil { + in, out := &in.OriginalReplicas, &out.OriginalReplicas + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OVNDBRestoreStatus. +func (in *OVNDBRestoreStatus) DeepCopy() *OVNDBRestoreStatus { + if in == nil { + return nil + } + out := new(OVNDBRestoreStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *OVNNorthd) DeepCopyInto(out *OVNNorthd) { *out = *in diff --git a/cmd/main.go b/cmd/main.go index 62a05372..8297d8eb 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -269,6 +269,23 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "OVNController") os.Exit(1) } + if err := (&controller.OVNDBBackupReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Kclient: kclient, + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "OVNDBBackup") + os.Exit(1) + } + if err := (&controller.OVNDBRestoreReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Kclient: kclient, + RestConfig: cfg, + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "OVNDBRestore") + os.Exit(1) + } // Acquire environmental defaults and initialize operator defaults with them ovnv1.SetupDefaults() @@ -288,6 +305,14 @@ func main() { setupLog.Error(err, "unable to create webhook", "webhook", "OVNController") os.Exit(1) } + if err := webhookv1beta1.SetupOVNDBBackupWebhookWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create webhook", "webhook", "OVNDBBackup") + os.Exit(1) + } + if err := webhookv1beta1.SetupOVNDBRestoreWebhookWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create webhook", "webhook", "OVNDBRestore") + os.Exit(1) + } checker = mgr.GetWebhookServer().StartedChecker() } // +kubebuilder:scaffold:builder diff --git a/config/crd/bases/ovn.openstack.org_ovndbbackups.yaml b/config/crd/bases/ovn.openstack.org_ovndbbackups.yaml new file mode 100644 index 00000000..aa74e7d7 --- /dev/null +++ b/config/crd/bases/ovn.openstack.org_ovndbbackups.yaml @@ -0,0 +1,134 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.18.0 + name: ovndbbackups.ovn.openstack.org +spec: + group: ovn.openstack.org + names: + kind: OVNDBBackup + listKind: OVNDBBackupList + plural: ovndbbackups + singular: ovndbbackup + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Status + jsonPath: .status.conditions[0].status + name: Status + type: string + - description: Message + jsonPath: .status.conditions[0].message + name: Message + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: OVNDBBackup is the Schema for the ovndbbackups API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: OVNDBBackupSpec defines the desired state of OVNDBBackup + properties: + databaseInstance: + description: DatabaseInstance - Name of the OVNDBCluster CR to back + up + type: string + retention: + description: Retention - duration after which old backups are cleaned + up from disk + type: string + schedule: + default: '@daily' + description: Schedule in Cron format for periodic backups + type: string + storageClass: + description: StorageClass for the backup PVC (defaults to the OVNDBCluster's + StorageClass) + type: string + storageRequest: + description: StorageRequest for the backup PVC (defaults to the OVNDBCluster's + StorageRequest) + type: string + required: + - databaseInstance + - schedule + type: object + status: + description: OVNDBBackupStatus defines the observed state of OVNDBBackup + properties: + conditions: + description: Conditions + items: + description: Condition defines an observation of a API resource + operational state. + properties: + lastTransitionTime: + description: |- + Last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when + the API field changed is acceptable. + format: date-time + type: string + message: + description: A human readable message indicating details about + the transition. + type: string + reason: + description: The reason for the condition's last transition + in CamelCase. + type: string + severity: + description: |- + Severity provides a classification of Reason code, so the current situation is immediately + understandable and could act accordingly. + It is meant for situations where Status=False and it should be indicated if it is just + informational, warning (next reconciliation might fix it) or an error (e.g. DB create issue + and no actions to automatically resolve the issue can/should be done). + For conditions where Status=Unknown or Status=True the Severity should be SeverityNone. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type of condition in CamelCase. + type: string + required: + - lastTransitionTime + - status + - type + type: object + type: array + hash: + additionalProperties: + type: string + description: Map of hashes to track input changes + type: object + observedGeneration: + description: ObservedGeneration - the most recent generation observed + format: int64 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/ovn.openstack.org_ovndbrestores.yaml b/config/crd/bases/ovn.openstack.org_ovndbrestores.yaml new file mode 100644 index 00000000..b9975130 --- /dev/null +++ b/config/crd/bases/ovn.openstack.org_ovndbrestores.yaml @@ -0,0 +1,128 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.18.0 + name: ovndbrestores.ovn.openstack.org +spec: + group: ovn.openstack.org + names: + kind: OVNDBRestore + listKind: OVNDBRestoreList + plural: ovndbrestores + singular: ovndbrestore + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Phase + jsonPath: .status.phase + name: Phase + type: string + - description: Status + jsonPath: .status.conditions[0].status + name: Status + type: string + - description: Message + jsonPath: .status.conditions[0].message + name: Message + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: OVNDBRestore is the Schema for the ovndbrestores API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: OVNDBRestoreSpec defines the desired state of OVNDBRestore + properties: + backupSource: + description: BackupSource - Name of the OVNDBBackup CR to restore + from + type: string + required: + - backupSource + type: object + status: + description: OVNDBRestoreStatus defines the observed state of OVNDBRestore + properties: + conditions: + description: Conditions + items: + description: Condition defines an observation of a API resource + operational state. + properties: + lastTransitionTime: + description: |- + Last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when + the API field changed is acceptable. + format: date-time + type: string + message: + description: A human readable message indicating details about + the transition. + type: string + reason: + description: The reason for the condition's last transition + in CamelCase. + type: string + severity: + description: |- + Severity provides a classification of Reason code, so the current situation is immediately + understandable and could act accordingly. + It is meant for situations where Status=False and it should be indicated if it is just + informational, warning (next reconciliation might fix it) or an error (e.g. DB create issue + and no actions to automatically resolve the issue can/should be done). + For conditions where Status=Unknown or Status=True the Severity should be SeverityNone. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type of condition in CamelCase. + type: string + required: + - lastTransitionTime + - status + - type + type: object + type: array + hash: + additionalProperties: + type: string + description: Map of hashes to track input changes + type: object + observedGeneration: + description: ObservedGeneration - the most recent generation observed + format: int64 + type: integer + originalReplicas: + description: OriginalReplicas - the replica count saved before scale-down + format: int32 + type: integer + phase: + description: Phase - current phase of the restore operation + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index bdcfa0c7..cadfaea9 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -5,6 +5,8 @@ resources: - bases/ovn.openstack.org_ovnnorthds.yaml - bases/ovn.openstack.org_ovndbclusters.yaml - bases/ovn.openstack.org_ovncontrollers.yaml +- bases/ovn.openstack.org_ovndbbackups.yaml +- bases/ovn.openstack.org_ovndbrestores.yaml # +kubebuilder:scaffold:crdkustomizeresource patches: diff --git a/config/manifests/bases/ovn-operator.clusterserviceversion.yaml b/config/manifests/bases/ovn-operator.clusterserviceversion.yaml index ac8c33c0..c9ed156d 100644 --- a/config/manifests/bases/ovn-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/ovn-operator.clusterserviceversion.yaml @@ -27,6 +27,11 @@ spec: displayName: TLS path: tls version: v1beta1 + - description: OVNDBBackup is the Schema for the ovndbbackups API + displayName: OVNDBBackup + kind: OVNDBBackup + name: ovndbbackups.ovn.openstack.org + version: v1beta1 - description: OVNDBCluster is the Schema for the ovndbclusters API displayName: OVNDBCluster kind: OVNDBCluster @@ -36,6 +41,11 @@ spec: displayName: TLS path: tls version: v1beta1 + - description: OVNDBRestore is the Schema for the ovndbrestores API + displayName: OVNDBRestore + kind: OVNDBRestore + name: ovndbrestores.ovn.openstack.org + version: v1beta1 - description: OVNNorthd is the Schema for the ovnnorthds API displayName: OVNNorthd kind: OVNNorthd diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 4e6e5293..4cf077b5 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -8,6 +8,7 @@ rules: - "" resources: - configmaps + - persistentvolumeclaims - pods - secrets - services @@ -53,9 +54,11 @@ rules: - list - patch - update + - watch - apiGroups: - batch resources: + - cronjobs - jobs verbs: - create @@ -99,7 +102,9 @@ rules: - ovn.openstack.org resources: - ovncontrollers + - ovndbbackups - ovndbclusters + - ovndbrestores - ovnnorthds verbs: - create @@ -113,7 +118,9 @@ rules: - ovn.openstack.org resources: - ovncontrollers/finalizers + - ovndbbackups/finalizers - ovndbclusters/finalizers + - ovndbrestores/finalizers - ovnnorthds/finalizers verbs: - patch @@ -122,6 +129,8 @@ rules: - ovn.openstack.org resources: - ovncontrollers/status + - ovndbbackups/status + - ovndbrestores/status - ovnnorthds/status verbs: - get diff --git a/config/samples/ovn_v1beta1_ovndbbackup.yaml b/config/samples/ovn_v1beta1_ovndbbackup.yaml new file mode 100644 index 00000000..ee8fefb0 --- /dev/null +++ b/config/samples/ovn_v1beta1_ovndbbackup.yaml @@ -0,0 +1,9 @@ +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBBackup +metadata: + name: ovndbbackup-nb-sample +spec: + databaseInstance: ovndbcluster-nb + schedule: "@daily" + storageRequest: "10G" + retention: "168h" diff --git a/config/samples/ovn_v1beta1_ovndbrestore.yaml b/config/samples/ovn_v1beta1_ovndbrestore.yaml new file mode 100644 index 00000000..e604a1c1 --- /dev/null +++ b/config/samples/ovn_v1beta1_ovndbrestore.yaml @@ -0,0 +1,6 @@ +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBRestore +metadata: + name: ovndbrestore-nb-sample +spec: + backupSource: ovndbbackup-nb-sample diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml index c89b3d63..aa4a3991 100644 --- a/config/webhook/manifests.yaml +++ b/config/webhook/manifests.yaml @@ -24,6 +24,26 @@ webhooks: resources: - ovncontrollers sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: webhook-service + namespace: system + path: /mutate-ovn-openstack-org-v1beta1-ovndbbackup + failurePolicy: Fail + name: movndbbackup-v1beta1.kb.io + rules: + - apiGroups: + - ovn.openstack.org + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - ovndbbackups + sideEffects: None - admissionReviewVersions: - v1 clientConfig: @@ -44,6 +64,26 @@ webhooks: resources: - ovndbclusters sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: webhook-service + namespace: system + path: /mutate-ovn-openstack-org-v1beta1-ovndbrestore + failurePolicy: Fail + name: movndbrestore-v1beta1.kb.io + rules: + - apiGroups: + - ovn.openstack.org + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - ovndbrestores + sideEffects: None - admissionReviewVersions: - v1 clientConfig: @@ -90,6 +130,26 @@ webhooks: resources: - ovncontrollers sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: webhook-service + namespace: system + path: /validate-ovn-openstack-org-v1beta1-ovndbbackup + failurePolicy: Fail + name: vovndbbackup-v1beta1.kb.io + rules: + - apiGroups: + - ovn.openstack.org + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - ovndbbackups + sideEffects: None - admissionReviewVersions: - v1 clientConfig: @@ -110,6 +170,26 @@ webhooks: resources: - ovndbclusters sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: webhook-service + namespace: system + path: /validate-ovn-openstack-org-v1beta1-ovndbrestore + failurePolicy: Fail + name: vovndbrestore-v1beta1.kb.io + rules: + - apiGroups: + - ovn.openstack.org + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - ovndbrestores + sideEffects: None - admissionReviewVersions: - v1 clientConfig: diff --git a/docs/backup-restore.md b/docs/backup-restore.md new file mode 100644 index 00000000..9d581dd6 --- /dev/null +++ b/docs/backup-restore.md @@ -0,0 +1,141 @@ +# OVN Database Backup and Restore + +This document describes how to back up and restore OVN Northbound and Southbound databases using the `OVNDBBackup` and `OVNDBRestore` custom resources. + +## Backup + +### Creating a Scheduled Backup + +Create an `OVNDBBackup` CR to set up automated, periodic backups of an OVN database: + +```yaml +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBBackup +metadata: + name: ovndbbackup-nb +spec: + databaseInstance: ovndbcluster-nb + schedule: "@daily" + storageRequest: "10G" + retention: "168h" +``` + +| Field | Required | Description | +|--------------------|----------|-------------| +| `databaseInstance` | Yes | Name of the `OVNDBCluster` CR to back up (e.g. `ovndbcluster-nb` or `ovndbcluster-sb`). | +| `schedule` | Yes | Cron schedule expression (default: `@daily`). Examples: `@hourly`, `0 */6 * * *`. | +| `storageRequest` | No | Size of the backup PVC. Defaults to the `OVNDBCluster`'s `storageRequest`. | +| `storageClass` | No | Storage class for the backup PVC. Defaults to the `OVNDBCluster`'s `storageClass`. | +| `retention` | No | Duration after which old backups are deleted from disk (e.g. `168h` = 7 days). If unset, backups are kept indefinitely. | + +### How It Works + +The controller creates: + +1. **A PersistentVolumeClaim** to store backup files. This PVC is intentionally *not* owned by the CR, so backup data survives if the `OVNDBBackup` resource is deleted. +2. **A CronJob** that runs on the specified schedule. Each job connects to the OVN database service endpoint and runs `ovsdb-client backup` to produce a standalone OVSDB file. If `retention` is set, old backup files are cleaned up afterward. + +### Backing Up Both Databases + +To back up both NB and SB databases, create two `OVNDBBackup` resources: + +```yaml +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBBackup +metadata: + name: ovndbbackup-nb +spec: + databaseInstance: ovndbcluster-nb + schedule: "@daily" + storageRequest: "10G" + retention: "168h" +--- +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBBackup +metadata: + name: ovndbbackup-sb +spec: + databaseInstance: ovndbcluster-sb + schedule: "@daily" + storageRequest: "10G" + retention: "168h" +``` + +### Monitoring Backup Status + +```bash +oc get ovndbbackup +oc describe ovndbbackup ovndbbackup-nb +``` + +The CR is `Ready` when the CronJob, PVC, and ConfigMap are all created successfully. To check recent backup job runs: + +```bash +oc get jobs -l app=ovndbbackup +``` + +## Restore + +> **Warning**: Restoring a database is a disruptive operation. The OVN DB cluster will be scaled down to zero during the process, causing a temporary loss of OVN control plane availability. + +### Creating a Restore + +Create an `OVNDBRestore` CR referencing the backup to restore from: + +```yaml +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBRestore +metadata: + name: ovndbrestore-nb +spec: + backupSource: ovndbbackup-nb +``` + +| Field | Required | Description | +|----------------|----------|-------------| +| `backupSource` | Yes | Name of the `OVNDBBackup` CR to restore from. The backup must be in `Ready` state. | + +### Restore Phases + +The restore proceeds through a state machine: + +| Phase | Description | +|--------------|-------------| +| `Validating` | Validates the backup source and saves the current replica count. | +| `ScalingDown`| Sets a restore annotation on the `OVNDBCluster` to override replicas to 0, force-deletes all pods (preStop hooks hang when all RAFT members terminate simultaneously), and deletes non-pod-0 PVCs to prevent stale RAFT state. | +| `Restoring` | Runs a Job that copies the latest standalone backup onto pod-0's PVC. When pod-0 starts, `ovn-ctl` automatically converts the standalone file to a RAFT cluster. | +| `ScalingUp` | Scales to 1 replica first (pod-0 bootstraps the restored DB), verifies the DB, then removes the restore annotation so the cluster scales to the original replica count. Remaining pods join the cluster with fresh PVCs. | +| `Completed` | Restore finished successfully. | +| `Failed` | Restore job failed. Check the job logs for details. | + +### Monitoring Restore Progress + +```bash +oc get ovndbrestore +oc describe ovndbrestore ovndbrestore-nb +``` + +The `Phase` field shows the current step. To check the restore job: + +```bash +oc get jobs -l app=ovndbrestore +oc logs job/ +``` + +### What Happens During Restore + +1. A finalizer is added to the `OVNDBBackup` CR to prevent its deletion during the restore. +2. A restore-in-progress annotation is set on the `OVNDBCluster` to override the StatefulSet replica count to 0, preventing higher-level operators (e.g. OpenStackControlPlane) from interfering. +3. All pods are force-deleted (graceful RAFT shutdown hangs when all members terminate simultaneously). Non-pod-0 PVCs are deleted to prevent stale RAFT membership state on restart. +4. A Job mounts pod-0's PVC and the backup PVC, removes the old database file, and copies the standalone backup in its place. +5. The annotation is updated to allow 1 replica. When pod-0 starts, `ovn-ctl` detects the standalone database file and automatically converts it to a RAFT cluster. After pod-0 is ready, the DB schema version is verified via `ovsdb-client get-schema-version`. +6. The annotation is removed, allowing the cluster to scale to its original replica count. The remaining pods start with fresh PVCs and join the RAFT cluster. +7. The finalizer on the `OVNDBBackup` CR is removed when the `OVNDBRestore` is deleted. + +### Cleanup After Restore + +The `OVNDBRestore` CR can be deleted after the restore completes. Deleting it removes the finalizer from the referenced `OVNDBBackup` CR: + +```bash +oc delete ovndbrestore ovndbrestore-nb +``` diff --git a/go.mod b/go.mod index 3631fde0..0a26b61d 100644 --- a/go.mod +++ b/go.mod @@ -48,15 +48,18 @@ require ( github.com/google/go-cmp v0.7.0 // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83 // indirect + github.com/gorilla/websocket v1.5.1 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect github.com/imdario/mergo v0.3.16 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/mailru/easyjson v0.9.0 // indirect + github.com/moby/spdystream v0.4.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/openshift/api v3.9.0+incompatible // indirect github.com/pkg/errors v0.9.1 // indirect github.com/prometheus/client_golang v1.22.0 // indirect @@ -102,6 +105,7 @@ require ( k8s.io/component-base v0.33.2 // indirect k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20250902184714-7fc278399c7f // indirect + k8s.io/kubectl v0.31.14 // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.3 // indirect sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect sigs.k8s.io/randfill v1.0.0 // indirect diff --git a/go.sum b/go.sum index 8cd351a5..3f332972 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,8 @@ github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1 github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI= github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -72,6 +74,8 @@ github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83 h1:z2ogiKUYzX5Is6zr/v github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83/go.mod h1:MxpfABSjhmINe3F1It9d+8exIHFvUqtLIRCdOGNXqiI= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/QY= +github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY= github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= @@ -102,6 +106,8 @@ github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo= github.com/maruel/natural v1.1.1/go.mod h1:v+Rfd79xlw1AgVBjbO0BEQmptqb5HvL/k9GRHB7ZKEg= github.com/mfridman/tparse v0.18.0 h1:wh6dzOKaIwkUGyKgOntDW4liXSo37qg5AXbIhkMV3vE= github.com/mfridman/tparse v0.18.0/go.mod h1:gEvqZTuCgEhPbYk/2lS3Kcxg1GmTxxU7kTC8DvP0i/A= +github.com/moby/spdystream v0.4.0 h1:Vy79D6mHeJJjiPdFEL2yku1kl0chZpJfZcPpb16BRl8= +github.com/moby/spdystream v0.4.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -110,6 +116,8 @@ github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFd github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= github.com/onsi/ginkgo/v2 v2.28.1 h1:S4hj+HbZp40fNKuLUQOYLDgZLwNUVn19N3Atb98NCyI= github.com/onsi/ginkgo/v2 v2.28.1/go.mod h1:CLtbVInNckU3/+gC8LzkGUb9oF+e8W8TdUsxPwvdOgE= github.com/onsi/gomega v1.39.1 h1:1IJLAad4zjPn2PsnhH70V4DKRFlrCzGBNrNaru+Vf28= @@ -275,6 +283,8 @@ k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-openapi v0.0.0-20250627150254-e9823e99808e h1:UGI9rv1A2cV87NhXr4s+AUBxIuoo/SME/IyJ3b6KztE= k8s.io/kube-openapi v0.0.0-20250627150254-e9823e99808e/go.mod h1:GLOk5B+hDbRROvt0X2+hqX64v/zO3vXN7J78OUmBSKw= +k8s.io/kubectl v0.31.14 h1:3SsqtFmv6TwI7p0IjJ0C/HwfIrKscLQjhuE8Bdmo+FY= +k8s.io/kubectl v0.31.14/go.mod h1:OUUYe8E7IjiiM/rR6CLCm4Ix2Fq1cY+DpE+BXfRbwAo= k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d h1:wAhiDyZ4Tdtt7e46e9M5ZSAJ/MnPGPs+Ki1gHw4w1R0= k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.3 h1:2770sDpzrjjsAtVhSeUFseziht227YAWYHLGNM8QPwY= diff --git a/hack/crd-schema-checker.sh b/hack/crd-schema-checker.sh index 84d34f0e..34200d7a 100755 --- a/hack/crd-schema-checker.sh +++ b/hack/crd-schema-checker.sh @@ -24,7 +24,10 @@ trap cleanup EXIT for crd in config/crd/bases/*.yaml; do mkdir -p "$(dirname "$TMP_DIR/$crd")" - git show "$BASE_REF:$crd" > "$TMP_DIR/$crd" + if ! git show "$BASE_REF:$crd" > "$TMP_DIR/$crd" 2>/dev/null; then + echo "Skipping $crd (new CRD, not present in $BASE_REF)" + continue + fi $CHECKER check-manifests \ $CHECKER_ARGS \ --existing-crd-filename="$TMP_DIR/$crd" \ diff --git a/internal/controller/ovndbbackup_controller.go b/internal/controller/ovndbbackup_controller.go new file mode 100644 index 00000000..4b3d23fe --- /dev/null +++ b/internal/controller/ovndbbackup_controller.go @@ -0,0 +1,358 @@ +/* + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/go-logr/logr" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/log" + + "github.com/openstack-k8s-operators/lib-common/modules/common/condition" + "github.com/openstack-k8s-operators/lib-common/modules/common/configmap" + "github.com/openstack-k8s-operators/lib-common/modules/common/env" + "github.com/openstack-k8s-operators/lib-common/modules/common/helper" + common_rbac "github.com/openstack-k8s-operators/lib-common/modules/common/rbac" + "github.com/openstack-k8s-operators/lib-common/modules/common/util" + ovnv1 "github.com/openstack-k8s-operators/ovn-operator/api/v1beta1" + ovn_common "github.com/openstack-k8s-operators/ovn-operator/internal/common" + "github.com/openstack-k8s-operators/ovn-operator/internal/ovndbbackup" + "github.com/openstack-k8s-operators/ovn-operator/internal/ovndbcluster" + + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + k8s_errors "k8s.io/apimachinery/pkg/api/errors" +) + +// OVNDBBackupReconciler reconciles a OVNDBBackup object +type OVNDBBackupReconciler struct { + client.Client + Kclient kubernetes.Interface + Scheme *runtime.Scheme +} + +// GetClient - +func (r *OVNDBBackupReconciler) GetClient() client.Client { + return r.Client +} + +// GetKClient - +func (r *OVNDBBackupReconciler) GetKClient() kubernetes.Interface { + return r.Kclient +} + +// GetScheme - +func (r *OVNDBBackupReconciler) GetScheme() *runtime.Scheme { + return r.Scheme +} + +// GetLogger returns a logger object with a prefix of "controller.name" and additional controller context fields +func (r *OVNDBBackupReconciler) GetLogger(ctx context.Context) logr.Logger { + return log.FromContext(ctx).WithName("Controllers").WithName("OVNDBBackup") +} + +//+kubebuilder:rbac:groups=ovn.openstack.org,resources=ovndbbackups,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=ovn.openstack.org,resources=ovndbbackups/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=ovn.openstack.org,resources=ovndbbackups/finalizers,verbs=update;patch +//+kubebuilder:rbac:groups=ovn.openstack.org,resources=ovndbclusters,verbs=get;list;watch +//+kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch +//+kubebuilder:rbac:groups=batch,resources=cronjobs,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete; + +// Reconcile - OVN DB Backup +func (r *OVNDBBackupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, _err error) { + Log := r.GetLogger(ctx) + + instance := &ovnv1.OVNDBBackup{} + err := r.Get(ctx, req.NamespacedName, instance) + if err != nil { + if k8s_errors.IsNotFound(err) { + return ctrl.Result{}, nil + } + return ctrl.Result{}, err + } + + helper, err := helper.NewHelper( + instance, + r.Client, + r.Kclient, + r.Scheme, + Log, + ) + if err != nil { + return ctrl.Result{}, err + } + + if instance.Status.Conditions == nil { + instance.Status.Conditions = condition.Conditions{} + } + + savedConditions := instance.Status.Conditions.DeepCopy() + + cl := condition.CreateList( + condition.UnknownCondition(ovnv1.OVNDBClusterReadyCondition, condition.InitReason, ovnv1.OVNDBClusterReadyInitMessage), + condition.UnknownCondition(condition.ServiceConfigReadyCondition, condition.InitReason, condition.ServiceConfigReadyInitMessage), + condition.UnknownCondition(ovnv1.PersistentVolumeClaimReadyCondition, condition.InitReason, ovnv1.PersistentVolumeClaimReadyInitMessage), + condition.UnknownCondition(ovnv1.CronJobReadyCondition, condition.InitReason, ovnv1.CronJobReadyInitMessage), + condition.UnknownCondition(condition.ServiceAccountReadyCondition, condition.InitReason, condition.ServiceAccountReadyInitMessage), + condition.UnknownCondition(condition.RoleReadyCondition, condition.InitReason, condition.RoleReadyInitMessage), + condition.UnknownCondition(condition.RoleBindingReadyCondition, condition.InitReason, condition.RoleBindingReadyInitMessage), + ) + instance.Status.Conditions.Init(&cl) + instance.Status.ObservedGeneration = instance.Generation + + if instance.Status.Hash == nil { + instance.Status.Hash = map[string]string{} + } + + defer func() { + if r := recover(); r != nil { + Log.Info(fmt.Sprintf("panic during reconcile %v\n", r)) + panic(r) + } + if instance.Status.Conditions.AllSubConditionIsTrue() { + instance.Status.Conditions.MarkTrue( + condition.ReadyCondition, condition.ReadyMessage) + } else { + instance.Status.Conditions.MarkUnknown( + condition.ReadyCondition, condition.InitReason, condition.ReadyInitMessage) + instance.Status.Conditions.Set( + instance.Status.Conditions.Mirror(condition.ReadyCondition)) + } + condition.RestoreLastTransitionTimes(&instance.Status.Conditions, savedConditions) + err := helper.PatchInstance(ctx, instance) + if err != nil { + _err = err + return + } + }() + + if instance.DeletionTimestamp.IsZero() && controllerutil.AddFinalizer(instance, helper.GetFinalizer()) { + return ctrl.Result{}, nil + } + + if !instance.DeletionTimestamp.IsZero() { + return r.reconcileDelete(ctx, instance, helper) + } + + return r.reconcileNormal(ctx, instance, helper) +} + +func (r *OVNDBBackupReconciler) reconcileDelete(_ context.Context, instance *ovnv1.OVNDBBackup, helper *helper.Helper) (ctrl.Result, error) { + Log := r.GetLogger(context.Background()) + Log.Info("Reconciling OVNDBBackup delete") + + controllerutil.RemoveFinalizer(instance, helper.GetFinalizer()) + Log.Info("Reconciled OVNDBBackup delete successfully") + + return ctrl.Result{}, nil +} + +func (r *OVNDBBackupReconciler) reconcileNormal(ctx context.Context, instance *ovnv1.OVNDBBackup, helper *helper.Helper) (ctrl.Result, error) { + Log := r.GetLogger(ctx) + Log.Info("Reconciling OVNDBBackup") + + // RBAC + rbacRules := []rbacv1.PolicyRule{ + { + APIGroups: []string{"security.openshift.io"}, + ResourceNames: []string{"restricted-v2"}, + Resources: []string{"securitycontextconstraints"}, + Verbs: []string{"use"}, + }, + } + rbacResult, err := common_rbac.ReconcileRbac(ctx, helper, instance, rbacRules) + if err != nil { + return rbacResult, err + } else if (rbacResult != ctrl.Result{}) { + return rbacResult, nil + } + + // Lookup referenced OVNDBCluster + cluster := &ovnv1.OVNDBCluster{} + err = r.Get(ctx, types.NamespacedName{ + Name: instance.Spec.DatabaseInstance, + Namespace: instance.Namespace, + }, cluster) + if err != nil { + if k8s_errors.IsNotFound(err) { + instance.Status.Conditions.Set(condition.FalseCondition( + ovnv1.OVNDBClusterReadyCondition, + condition.RequestedReason, + condition.SeverityWarning, + ovnv1.OVNDBClusterReadyErrorMessage, + fmt.Sprintf("OVNDBCluster %s not found", instance.Spec.DatabaseInstance))) + return ctrl.Result{RequeueAfter: time.Second * 10}, nil + } + return ctrl.Result{}, err + } + instance.Status.Conditions.MarkTrue(ovnv1.OVNDBClusterReadyCondition, ovnv1.OVNDBClusterReadyMessage) + + serviceName := ovnv1.ServiceNameNB + if cluster.Spec.DBType == ovnv1.SBDBType { + serviceName = ovnv1.ServiceNameSB + } + serviceLabels := map[string]string{ + "app": "ovndbbackup", + "ovndbbackup": instance.Name, + "service": serviceName, + } + + // Generate backup scripts ConfigMap + configMapVars := make(map[string]env.Setter) + err = r.generateBackupConfigMaps(ctx, helper, instance, cluster, &configMapVars, serviceLabels) + if err != nil { + instance.Status.Conditions.Set(condition.FalseCondition( + condition.ServiceConfigReadyCondition, + condition.ErrorReason, + condition.SeverityWarning, + condition.ServiceConfigReadyErrorMessage, + err.Error())) + return ctrl.Result{}, err + } + + inputHash, err := r.createHashOfInputHashes(instance, configMapVars) + if err != nil { + instance.Status.Conditions.Set(condition.FalseCondition( + condition.ServiceConfigReadyCondition, + condition.ErrorReason, + condition.SeverityWarning, + condition.ServiceConfigReadyErrorMessage, + err.Error())) + return ctrl.Result{}, err + } + instance.Status.Conditions.MarkTrue(condition.ServiceConfigReadyCondition, condition.ServiceConfigReadyMessage) + + // Create backup PVC (NOT owned by the CR) + backupPVC := ovndbbackup.BackupPVC(instance, cluster) + foundPVC := &corev1.PersistentVolumeClaim{} + err = r.Get(ctx, types.NamespacedName{Name: backupPVC.Name, Namespace: backupPVC.Namespace}, foundPVC) + if err != nil { + if k8s_errors.IsNotFound(err) { + Log.Info("Creating backup PVC", "PVC.Name", backupPVC.Name) + err = r.Create(ctx, backupPVC) + if err != nil { + instance.Status.Conditions.Set(condition.FalseCondition( + ovnv1.PersistentVolumeClaimReadyCondition, + condition.ErrorReason, + condition.SeverityWarning, + ovnv1.PersistentVolumeClaimReadyErrorMessage, + err.Error())) + return ctrl.Result{}, err + } + } else { + return ctrl.Result{}, err + } + } + instance.Status.Conditions.MarkTrue(ovnv1.PersistentVolumeClaimReadyCondition, ovnv1.PersistentVolumeClaimReadyMessage) + + // Create or update CronJob (owned by the CR) + cronJob := ovndbbackup.BackupCronJob(instance, cluster, serviceLabels, inputHash) + op, err := controllerutil.CreateOrPatch(ctx, r.Client, cronJob, func() error { + err := controllerutil.SetControllerReference(instance, cronJob, r.Scheme) + if err != nil { + return err + } + cronJob.Spec.Schedule = instance.Spec.Schedule + cronJob.Spec.JobTemplate.Spec.Template.Spec.Containers[0].Image = cluster.Spec.ContainerImage + return nil + }) + if err != nil { + instance.Status.Conditions.Set(condition.FalseCondition( + ovnv1.CronJobReadyCondition, + condition.ErrorReason, + condition.SeverityWarning, + ovnv1.CronJobReadyErrorMessage, + err.Error())) + return ctrl.Result{}, err + } + if op != controllerutil.OperationResultNone { + Log.Info("CronJob operationResult", "CronJob.Name", cronJob.Name, "result", op) + } + instance.Status.Conditions.MarkTrue(ovnv1.CronJobReadyCondition, ovnv1.CronJobReadyMessage) + + Log.Info("Reconciled OVNDBBackup successfully") + return ctrl.Result{}, nil +} + +func (r *OVNDBBackupReconciler) generateBackupConfigMaps( + ctx context.Context, + h *helper.Helper, + instance *ovnv1.OVNDBBackup, + cluster *ovnv1.OVNDBCluster, + envVars *map[string]env.Setter, + cmLabels map[string]string, +) error { + templateParameters := make(map[string]any) + templateParameters["TLS"] = cluster.Spec.TLS.Enabled() + templateParameters["OVNDB_CERT_PATH"] = ovn_common.OVNDbCertPath + templateParameters["OVNDB_KEY_PATH"] = ovn_common.OVNDbKeyPath + templateParameters["OVNDB_CACERT_PATH"] = ovn_common.OVNDbCaCertPath + templateParameters["SERVICE_NAME"] = ovndbbackup.ServiceName(cluster) + templateParameters["NAMESPACE"] = instance.GetNamespace() + templateParameters["DB_TYPE"] = strings.ToLower(cluster.Spec.DBType) + templateParameters["DB_PORT"] = ovndbcluster.DbPortNB + if cluster.Spec.DBType == ovnv1.SBDBType { + templateParameters["DB_PORT"] = ovndbcluster.DbPortSB + } + + cms := []util.Template{ + { + Name: ovndbbackup.BackupScriptsConfigMapName(instance), + Namespace: instance.Namespace, + Type: util.TemplateTypeScripts, + InstanceType: "OVNDBBackup", + Labels: cmLabels, + ConfigOptions: templateParameters, + }, + } + return configmap.EnsureConfigMaps(ctx, h, instance, cms, envVars) +} + +func (r *OVNDBBackupReconciler) createHashOfInputHashes( + instance *ovnv1.OVNDBBackup, + envVars map[string]env.Setter, +) (string, error) { + mergedMapVars := env.MergeEnvs([]corev1.EnvVar{}, envVars) + hash, err := util.ObjectHash(mergedMapVars) + if err != nil { + return hash, err + } + instance.Status.Hash[string(condition.InputReadyCondition)] = hash + return hash, nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *OVNDBBackupReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&ovnv1.OVNDBBackup{}). + Owns(&batchv1.CronJob{}). + Owns(&corev1.ConfigMap{}). + Owns(&corev1.ServiceAccount{}). + Owns(&rbacv1.Role{}). + Owns(&rbacv1.RoleBinding{}). + Complete(r) +} diff --git a/internal/controller/ovndbcluster_controller.go b/internal/controller/ovndbcluster_controller.go index 69f6ce8b..de35146a 100644 --- a/internal/controller/ovndbcluster_controller.go +++ b/internal/controller/ovndbcluster_controller.go @@ -19,6 +19,7 @@ package controller import ( "context" "fmt" + "strconv" "strings" "time" @@ -639,6 +640,18 @@ func (r *OVNDBClusterReconciler) reconcileNormal(ctx context.Context, instance * if err != nil { return ctrl.Result{}, err } + + // During a restore, the annotation carries the desired replica count. + // This overrides whatever higher-level operators may have set in the spec. + if restoreReplicas, restoring := instance.Annotations[ovnv1.RestoreInProgressAnnotation]; restoring { + replicas, err := strconv.ParseInt(restoreReplicas, 10, 32) + if err == nil { + replicaOverride := int32(replicas) + stsSpec.Spec.Replicas = &replicaOverride + Log.Info("Restore in progress, overriding StatefulSet replicas", "replicas", replicaOverride) + } + } + // Define a new Statefulset object sfset := statefulset.NewStatefulSet(stsSpec, time.Duration(5)*time.Second) ctrlResult, err = sfset.CreateOrPatch(ctx, helper) diff --git a/internal/controller/ovndbrestore_controller.go b/internal/controller/ovndbrestore_controller.go new file mode 100644 index 00000000..a9190129 --- /dev/null +++ b/internal/controller/ovndbrestore_controller.go @@ -0,0 +1,647 @@ +/* + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "bytes" + "context" + "fmt" + "strings" + "time" + + "github.com/go-logr/logr" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/log" + + "github.com/openstack-k8s-operators/lib-common/modules/common" + "github.com/openstack-k8s-operators/lib-common/modules/common/condition" + "github.com/openstack-k8s-operators/lib-common/modules/common/configmap" + "github.com/openstack-k8s-operators/lib-common/modules/common/env" + "github.com/openstack-k8s-operators/lib-common/modules/common/helper" + common_rbac "github.com/openstack-k8s-operators/lib-common/modules/common/rbac" + "github.com/openstack-k8s-operators/lib-common/modules/common/rsh" + "github.com/openstack-k8s-operators/lib-common/modules/common/util" + ovnv1 "github.com/openstack-k8s-operators/ovn-operator/api/v1beta1" + "github.com/openstack-k8s-operators/ovn-operator/internal/ovndbbackup" + "github.com/openstack-k8s-operators/ovn-operator/internal/ovndbcluster" + + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + k8s_errors "k8s.io/apimachinery/pkg/api/errors" +) + +// OVNDBRestoreReconciler reconciles a OVNDBRestore object +type OVNDBRestoreReconciler struct { + client.Client + Kclient kubernetes.Interface + RestConfig *rest.Config + Scheme *runtime.Scheme +} + +// GetClient - +func (r *OVNDBRestoreReconciler) GetClient() client.Client { + return r.Client +} + +// GetKClient - +func (r *OVNDBRestoreReconciler) GetKClient() kubernetes.Interface { + return r.Kclient +} + +// GetScheme - +func (r *OVNDBRestoreReconciler) GetScheme() *runtime.Scheme { + return r.Scheme +} + +// GetLogger returns a logger object with a prefix of "controller.name" and additional controller context fields +func (r *OVNDBRestoreReconciler) GetLogger(ctx context.Context) logr.Logger { + return log.FromContext(ctx).WithName("Controllers").WithName("OVNDBRestore") +} + +//+kubebuilder:rbac:groups=ovn.openstack.org,resources=ovndbrestores,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=ovn.openstack.org,resources=ovndbrestores/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=ovn.openstack.org,resources=ovndbrestores/finalizers,verbs=update;patch +//+kubebuilder:rbac:groups=ovn.openstack.org,resources=ovndbbackups,verbs=get;list;watch;update;patch +//+kubebuilder:rbac:groups=ovn.openstack.org,resources=ovndbclusters,verbs=get;list;watch;update;patch +//+kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch +//+kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;create;delete +//+kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;delete + +// Reconcile - OVN DB Restore +func (r *OVNDBRestoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, _err error) { + Log := r.GetLogger(ctx) + + instance := &ovnv1.OVNDBRestore{} + err := r.Get(ctx, req.NamespacedName, instance) + if err != nil { + if k8s_errors.IsNotFound(err) { + return ctrl.Result{}, nil + } + return ctrl.Result{}, err + } + + helper, err := helper.NewHelper( + instance, + r.Client, + r.Kclient, + r.Scheme, + Log, + ) + if err != nil { + return ctrl.Result{}, err + } + + if instance.Status.Conditions == nil { + instance.Status.Conditions = condition.Conditions{} + } + + savedConditions := instance.Status.Conditions.DeepCopy() + + cl := condition.CreateList( + condition.UnknownCondition(ovnv1.OVNDBBackupReadyCondition, condition.InitReason, ovnv1.OVNDBBackupReadyInitMessage), + condition.UnknownCondition(ovnv1.OVNDBClusterReadyCondition, condition.InitReason, ovnv1.OVNDBClusterReadyInitMessage), + condition.UnknownCondition(ovnv1.RestoreJobReadyCondition, condition.InitReason, ovnv1.RestoreJobReadyInitMessage), + condition.UnknownCondition(condition.ServiceAccountReadyCondition, condition.InitReason, condition.ServiceAccountReadyInitMessage), + condition.UnknownCondition(condition.RoleReadyCondition, condition.InitReason, condition.RoleReadyInitMessage), + condition.UnknownCondition(condition.RoleBindingReadyCondition, condition.InitReason, condition.RoleBindingReadyInitMessage), + ) + instance.Status.Conditions.Init(&cl) + instance.Status.ObservedGeneration = instance.Generation + + if instance.Status.Hash == nil { + instance.Status.Hash = map[string]string{} + } + + defer func() { + if r := recover(); r != nil { + Log.Info(fmt.Sprintf("panic during reconcile %v\n", r)) + panic(r) + } + if instance.Status.Conditions.AllSubConditionIsTrue() { + instance.Status.Conditions.MarkTrue( + condition.ReadyCondition, condition.ReadyMessage) + } else { + instance.Status.Conditions.MarkUnknown( + condition.ReadyCondition, condition.InitReason, condition.ReadyInitMessage) + instance.Status.Conditions.Set( + instance.Status.Conditions.Mirror(condition.ReadyCondition)) + } + condition.RestoreLastTransitionTimes(&instance.Status.Conditions, savedConditions) + err := helper.PatchInstance(ctx, instance) + if err != nil { + _err = err + return + } + }() + + if instance.DeletionTimestamp.IsZero() && controllerutil.AddFinalizer(instance, helper.GetFinalizer()) { + return ctrl.Result{}, nil + } + + if !instance.DeletionTimestamp.IsZero() { + return r.reconcileDelete(ctx, instance, helper) + } + + return r.reconcileNormal(ctx, instance, helper) +} + +func (r *OVNDBRestoreReconciler) reconcileDelete(ctx context.Context, instance *ovnv1.OVNDBRestore, helper *helper.Helper) (ctrl.Result, error) { + Log := r.GetLogger(ctx) + Log.Info("Reconciling OVNDBRestore delete") + + // Remove finalizer from backup if we added one + backup := &ovnv1.OVNDBBackup{} + err := r.Get(ctx, types.NamespacedName{ + Name: instance.Spec.BackupSource, + Namespace: instance.Namespace, + }, backup) + if err != nil { + if !k8s_errors.IsNotFound(err) { + return ctrl.Result{}, err + } + } else { + finalizerName := "ovn.openstack.org/restore-" + instance.Name + if controllerutil.RemoveFinalizer(backup, finalizerName) { + if err := r.Update(ctx, backup); err != nil { + return ctrl.Result{}, err + } + } + + // Remove restore annotation from OVNDBCluster if present + cluster := &ovnv1.OVNDBCluster{} + clusterErr := r.Get(ctx, types.NamespacedName{ + Name: backup.Spec.DatabaseInstance, + Namespace: instance.Namespace, + }, cluster) + if clusterErr == nil { + if _, ok := cluster.Annotations[ovnv1.RestoreInProgressAnnotation]; ok { + patch := client.MergeFrom(cluster.DeepCopy()) + delete(cluster.Annotations, ovnv1.RestoreInProgressAnnotation) + if patchErr := r.Patch(ctx, cluster, patch); patchErr != nil { + return ctrl.Result{}, patchErr + } + } + } else if !k8s_errors.IsNotFound(clusterErr) { + return ctrl.Result{}, clusterErr + } + } + + controllerutil.RemoveFinalizer(instance, helper.GetFinalizer()) + Log.Info("Reconciled OVNDBRestore delete successfully") + + return ctrl.Result{}, nil +} + +func (r *OVNDBRestoreReconciler) reconcileNormal(ctx context.Context, instance *ovnv1.OVNDBRestore, helper *helper.Helper) (ctrl.Result, error) { + Log := r.GetLogger(ctx) + Log.Info("Reconciling OVNDBRestore", "phase", instance.Status.Phase) + + // RBAC + rbacRules := []rbacv1.PolicyRule{ + { + APIGroups: []string{"security.openshift.io"}, + ResourceNames: []string{"restricted-v2"}, + Resources: []string{"securitycontextconstraints"}, + Verbs: []string{"use"}, + }, + } + rbacResult, err := common_rbac.ReconcileRbac(ctx, helper, instance, rbacRules) + if err != nil { + return rbacResult, err + } else if (rbacResult != ctrl.Result{}) { + return rbacResult, nil + } + + // If completed, nothing to do + if instance.Status.Phase == ovnv1.OVNDBRestorePhaseCompleted { + instance.Status.Conditions.MarkTrue(ovnv1.OVNDBBackupReadyCondition, ovnv1.OVNDBBackupReadyMessage) + instance.Status.Conditions.MarkTrue(ovnv1.OVNDBClusterReadyCondition, ovnv1.OVNDBClusterReadyMessage) + instance.Status.Conditions.MarkTrue(ovnv1.RestoreJobReadyCondition, ovnv1.RestoreJobReadyMessage) + return ctrl.Result{}, nil + } + + // Lookup OVNDBBackup + backup := &ovnv1.OVNDBBackup{} + err = r.Get(ctx, types.NamespacedName{ + Name: instance.Spec.BackupSource, + Namespace: instance.Namespace, + }, backup) + if err != nil { + if k8s_errors.IsNotFound(err) { + instance.Status.Conditions.Set(condition.FalseCondition( + ovnv1.OVNDBBackupReadyCondition, + condition.RequestedReason, + condition.SeverityWarning, + ovnv1.OVNDBBackupReadyErrorMessage, + fmt.Sprintf("OVNDBBackup %s not found", instance.Spec.BackupSource))) + return ctrl.Result{RequeueAfter: time.Second * 10}, nil + } + return ctrl.Result{}, err + } + + if !backup.IsReady() { + instance.Status.Conditions.Set(condition.FalseCondition( + ovnv1.OVNDBBackupReadyCondition, + condition.RequestedReason, + condition.SeverityInfo, + ovnv1.OVNDBBackupReadyInitMessage)) + return ctrl.Result{RequeueAfter: time.Second * 10}, nil + } + + // Add finalizer to backup to prevent deletion during restore + finalizerName := "ovn.openstack.org/restore-" + instance.Name + if controllerutil.AddFinalizer(backup, finalizerName) { + if err := r.Update(ctx, backup); err != nil { + return ctrl.Result{}, err + } + } + instance.Status.Conditions.MarkTrue(ovnv1.OVNDBBackupReadyCondition, ovnv1.OVNDBBackupReadyMessage) + + // Lookup OVNDBCluster + cluster := &ovnv1.OVNDBCluster{} + err = r.Get(ctx, types.NamespacedName{ + Name: backup.Spec.DatabaseInstance, + Namespace: instance.Namespace, + }, cluster) + if err != nil { + if k8s_errors.IsNotFound(err) { + instance.Status.Conditions.Set(condition.FalseCondition( + ovnv1.OVNDBClusterReadyCondition, + condition.RequestedReason, + condition.SeverityWarning, + ovnv1.OVNDBClusterReadyErrorMessage, + fmt.Sprintf("OVNDBCluster %s not found", backup.Spec.DatabaseInstance))) + return ctrl.Result{RequeueAfter: time.Second * 10}, nil + } + return ctrl.Result{}, err + } + instance.Status.Conditions.MarkTrue(ovnv1.OVNDBClusterReadyCondition, ovnv1.OVNDBClusterReadyMessage) + + // Run the phase-based state machine + switch instance.Status.Phase { + case "", ovnv1.OVNDBRestorePhaseValidating: + return r.phaseValidate(ctx, instance, cluster, Log) + case ovnv1.OVNDBRestorePhaseScalingDown: + return r.phaseScaleDown(ctx, instance, cluster, Log) + case ovnv1.OVNDBRestorePhaseRestoring: + return r.phaseRestore(ctx, instance, backup, cluster, helper, Log) + case ovnv1.OVNDBRestorePhaseScalingUp: + return r.phaseScaleUp(ctx, instance, cluster, Log) + case ovnv1.OVNDBRestorePhaseFailed: + return ctrl.Result{}, nil + } + + return ctrl.Result{}, nil +} + +func (r *OVNDBRestoreReconciler) phaseValidate( + _ context.Context, + instance *ovnv1.OVNDBRestore, + cluster *ovnv1.OVNDBCluster, + Log logr.Logger, +) (ctrl.Result, error) { + Log.Info("Phase: Validating") + + // Save original replica count + if instance.Status.OriginalReplicas == nil { + replicas := int32(1) + if cluster.Spec.Replicas != nil { + replicas = *cluster.Spec.Replicas + } + instance.Status.OriginalReplicas = &replicas + } + + instance.Status.Phase = ovnv1.OVNDBRestorePhaseScalingDown + return ctrl.Result{Requeue: true}, nil +} + +// setRestoreAnnotation sets the restore annotation on the OVNDBCluster with the +// desired replica count. The OVNDBCluster controller uses this value to override +// the StatefulSet replicas, preventing higher-level operators from interfering. +func (r *OVNDBRestoreReconciler) setRestoreAnnotation( + ctx context.Context, + cluster *ovnv1.OVNDBCluster, + desiredReplicas int32, +) error { + value := fmt.Sprintf("%d", desiredReplicas) + if cluster.Annotations != nil && cluster.Annotations[ovnv1.RestoreInProgressAnnotation] == value { + return nil + } + patch := client.MergeFrom(cluster.DeepCopy()) + if cluster.Annotations == nil { + cluster.Annotations = map[string]string{} + } + cluster.Annotations[ovnv1.RestoreInProgressAnnotation] = value + return r.Patch(ctx, cluster, patch) +} + +func (r *OVNDBRestoreReconciler) removeRestoreAnnotation( + ctx context.Context, + cluster *ovnv1.OVNDBCluster, +) error { + if _, ok := cluster.Annotations[ovnv1.RestoreInProgressAnnotation]; !ok { + return nil + } + patch := client.MergeFrom(cluster.DeepCopy()) + delete(cluster.Annotations, ovnv1.RestoreInProgressAnnotation) + return r.Patch(ctx, cluster, patch) +} + +func (r *OVNDBRestoreReconciler) phaseScaleDown( + ctx context.Context, + instance *ovnv1.OVNDBRestore, + cluster *ovnv1.OVNDBCluster, + Log logr.Logger, +) (ctrl.Result, error) { + Log.Info("Phase: ScalingDown") + + // Set restore annotation to 0 — this tells the OVNDBCluster controller to + // force the StatefulSet to 0 replicas regardless of what higher-level + // operators set in the spec. + if err := r.setRestoreAnnotation(ctx, cluster, 0); err != nil { + return ctrl.Result{}, err + } + + // Force-delete all pods for this StatefulSet. The preStop hooks try to + // leave the RAFT cluster gracefully, but that hangs when all pods are + // terminating simultaneously. Since we're restoring the DB from backup, + // RAFT membership state is irrelevant. + podList := &corev1.PodList{} + if err := r.List(ctx, podList, + client.InNamespace(cluster.Namespace), + client.MatchingLabels{common.AppSelector: ovndbbackup.StatefulSetName(cluster)}, + ); err != nil { + return ctrl.Result{}, err + } + + gracePeriod := int64(0) + for i := range podList.Items { + Log.Info("Force-deleting pod", "pod", podList.Items[i].Name) + if err := r.Delete(ctx, &podList.Items[i], &client.DeleteOptions{ + GracePeriodSeconds: &gracePeriod, + }); err != nil && !k8s_errors.IsNotFound(err) { + return ctrl.Result{}, err + } + } + + // Wait for all pods to be gone + if len(podList.Items) > 0 { + Log.Info("Waiting for pods to terminate", "remaining", len(podList.Items)) + return ctrl.Result{RequeueAfter: time.Second * 3}, nil + } + + // Delete non-pod-0 PVCs so pods start fresh without stale RAFT state. + // Force-delete skipped the preStop hook that normally cleans up DB files. + // The StatefulSet will recreate these PVCs when the pods restart. + originalReplicas := int32(1) + if instance.Status.OriginalReplicas != nil { + originalReplicas = *instance.Status.OriginalReplicas + } + stsName := ovndbbackup.StatefulSetName(cluster) + for i := int32(1); i < originalReplicas; i++ { + pvcName := fmt.Sprintf("%s%s-%s-%d", + cluster.Name, ovndbcluster.PVCSuffixEtcOVN, stsName, i) + pvc := &corev1.PersistentVolumeClaim{} + err := r.Get(ctx, types.NamespacedName{ + Name: pvcName, + Namespace: cluster.Namespace, + }, pvc) + if err != nil { + if k8s_errors.IsNotFound(err) { + continue + } + return ctrl.Result{}, err + } + Log.Info("Deleting non-pod-0 PVC", "pvc", pvcName) + if err := r.Delete(ctx, pvc); err != nil && !k8s_errors.IsNotFound(err) { + return ctrl.Result{}, err + } + } + + Log.Info("All pods terminated and non-pod-0 PVCs cleaned up") + instance.Status.Phase = ovnv1.OVNDBRestorePhaseRestoring + return ctrl.Result{Requeue: true}, nil +} + +func (r *OVNDBRestoreReconciler) phaseRestore( + ctx context.Context, + instance *ovnv1.OVNDBRestore, + backup *ovnv1.OVNDBBackup, + cluster *ovnv1.OVNDBCluster, + h *helper.Helper, + Log logr.Logger, +) (ctrl.Result, error) { + Log.Info("Phase: Restoring") + + serviceLabels := map[string]string{ + "app": "ovndbrestore", + "ovndbrestore": instance.Name, + } + + // Create restore scripts ConfigMap + err := r.generateRestoreConfigMaps(ctx, h, instance, cluster, serviceLabels) + if err != nil { + return ctrl.Result{}, fmt.Errorf("failed to create restore ConfigMap: %w", err) + } + + // Create or check restore Job + restoreJob := ovndbbackup.RestoreJob(instance, backup, cluster, serviceLabels) + foundJob := &batchv1.Job{} + err = r.Get(ctx, types.NamespacedName{Name: restoreJob.Name, Namespace: restoreJob.Namespace}, foundJob) + if err != nil { + if k8s_errors.IsNotFound(err) { + if err := controllerutil.SetControllerReference(instance, restoreJob, r.Scheme); err != nil { + return ctrl.Result{}, err + } + Log.Info("Creating restore Job", "Job.Name", restoreJob.Name) + if err := r.Create(ctx, restoreJob); err != nil { + instance.Status.Conditions.Set(condition.FalseCondition( + ovnv1.RestoreJobReadyCondition, + condition.ErrorReason, + condition.SeverityWarning, + ovnv1.RestoreJobReadyErrorMessage, + err.Error())) + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: time.Second * 5}, nil + } + return ctrl.Result{}, err + } + + // Check Job status + if foundJob.Status.Succeeded > 0 { + Log.Info("Restore Job completed successfully") + instance.Status.Phase = ovnv1.OVNDBRestorePhaseScalingUp + return ctrl.Result{Requeue: true}, nil + } + + if foundJob.Status.Failed > 0 { + Log.Error(nil, "Restore Job failed") + instance.Status.Phase = ovnv1.OVNDBRestorePhaseFailed + instance.Status.Conditions.Set(condition.FalseCondition( + ovnv1.RestoreJobReadyCondition, + condition.ErrorReason, + condition.SeverityError, + ovnv1.RestoreJobReadyErrorMessage, + "restore Job failed")) + return ctrl.Result{}, nil + } + + Log.Info("Waiting for restore Job to complete") + return ctrl.Result{RequeueAfter: time.Second * 5}, nil +} + +func (r *OVNDBRestoreReconciler) phaseScaleUp( + ctx context.Context, + instance *ovnv1.OVNDBRestore, + cluster *ovnv1.OVNDBCluster, + Log logr.Logger, +) (ctrl.Result, error) { + Log.Info("Phase: ScalingUp") + + originalReplicas := int32(1) + if instance.Status.OriginalReplicas != nil { + originalReplicas = *instance.Status.OriginalReplicas + } + + _, annotationPresent := cluster.Annotations[ovnv1.RestoreInProgressAnnotation] + + if annotationPresent { + // Annotation still set — we need pod-0 to bootstrap first. + if err := r.setRestoreAnnotation(ctx, cluster, 1); err != nil { + return ctrl.Result{}, err + } + + sts := &appsv1.StatefulSet{} + if err := r.Get(ctx, types.NamespacedName{ + Name: ovndbbackup.StatefulSetName(cluster), + Namespace: cluster.Namespace, + }, sts); err != nil { + return ctrl.Result{}, err + } + + if sts.Status.ReadyReplicas < 1 { + Log.Info("Waiting for pod-0 to become ready") + return ctrl.Result{RequeueAfter: time.Second * 10}, nil + } + + r.verifyRestoredDB(ctx, instance, cluster, Log) + + if err := r.removeRestoreAnnotation(ctx, cluster); err != nil { + return ctrl.Result{}, err + } + Log.Info("Removed restore annotation, cluster will scale to original replicas") + return ctrl.Result{RequeueAfter: time.Second * 10}, nil + } + + // Annotation already removed — wait for all replicas without re-setting it. + sts := &appsv1.StatefulSet{} + if err := r.Get(ctx, types.NamespacedName{ + Name: ovndbbackup.StatefulSetName(cluster), + Namespace: cluster.Namespace, + }, sts); err != nil { + return ctrl.Result{}, err + } + + if sts.Status.ReadyReplicas < originalReplicas { + Log.Info("Waiting for all replicas to become ready", + "ready", sts.Status.ReadyReplicas, "desired", originalReplicas) + return ctrl.Result{RequeueAfter: time.Second * 10}, nil + } + + Log.Info("All replicas are ready, restore completed") + instance.Status.Phase = ovnv1.OVNDBRestorePhaseCompleted + instance.Status.Conditions.MarkTrue(ovnv1.RestoreJobReadyCondition, ovnv1.RestoreJobReadyMessage) + return ctrl.Result{}, nil +} + +func (r *OVNDBRestoreReconciler) verifyRestoredDB( + ctx context.Context, + instance *ovnv1.OVNDBRestore, + cluster *ovnv1.OVNDBCluster, + Log logr.Logger, +) { + if r.RestConfig == nil { + Log.Info("RestConfig not available, skipping DB verification") + return + } + + pod0Name := ovndbbackup.Pod0Name(cluster) + dbType := strings.ToLower(cluster.Spec.DBType) + dbName := "OVN_Northbound" + if cluster.Spec.DBType == ovnv1.SBDBType { + dbName = "OVN_Southbound" + } + + containerName := ovndbbackup.ServiceName(cluster) + err := rsh.ExecInPod(ctx, r.Kclient, r.RestConfig, + types.NamespacedName{Namespace: instance.Namespace, Name: pod0Name}, + containerName, + []string{"ovsdb-client", "get-schema-version", + fmt.Sprintf("unix:/run/ovn/ovn%s_db.sock", dbType), dbName}, + func(stdout, _ *bytes.Buffer) error { + Log.Info("Restored DB schema version", "version", strings.TrimSpace(stdout.String())) + return nil + }, + ) + if err != nil { + Log.Info("DB verification failed (non-fatal)", "error", err) + } +} + +func (r *OVNDBRestoreReconciler) generateRestoreConfigMaps( + ctx context.Context, + h *helper.Helper, + instance *ovnv1.OVNDBRestore, + cluster *ovnv1.OVNDBCluster, + cmLabels map[string]string, +) error { + templateParameters := make(map[string]any) + templateParameters["DB_TYPE"] = strings.ToLower(cluster.Spec.DBType) + + cms := []util.Template{ + { + Name: fmt.Sprintf("%s-restore-scripts", instance.Name), + Namespace: instance.Namespace, + Type: util.TemplateTypeScripts, + InstanceType: "ovndbrestore", + Labels: cmLabels, + ConfigOptions: templateParameters, + }, + } + return configmap.EnsureConfigMaps(ctx, h, instance, cms, &map[string]env.Setter{}) +} + +// SetupWithManager sets up the controller with the Manager. +func (r *OVNDBRestoreReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&ovnv1.OVNDBRestore{}). + Owns(&batchv1.Job{}). + Owns(&corev1.ConfigMap{}). + Owns(&corev1.ServiceAccount{}). + Owns(&rbacv1.Role{}). + Owns(&rbacv1.RoleBinding{}). + Complete(r) +} diff --git a/internal/controller/ovnnorthd_controller.go b/internal/controller/ovnnorthd_controller.go index 8651514c..0d5682a1 100644 --- a/internal/controller/ovnnorthd_controller.go +++ b/internal/controller/ovnnorthd_controller.go @@ -86,7 +86,7 @@ func (r *OVNNorthdReconciler) GetLogger(ctx context.Context) logr.Logger { //+kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete; //+kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete; //+kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete; -//+kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;patch;update;delete; +//+kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;patch;update;delete; //+kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch;create;patch;update;delete; //+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list; diff --git a/internal/ovndbbackup/const.go b/internal/ovndbbackup/const.go new file mode 100644 index 00000000..896b6f20 --- /dev/null +++ b/internal/ovndbbackup/const.go @@ -0,0 +1,22 @@ +// Package ovndbbackup provides functionality for managing OVN database backups and restores +package ovndbbackup + +const ( + // BackupDataMountPath is the mount path for backup data in pods + BackupDataMountPath = "/backup/data" + + // BackupVolumeName is the name of the backup PVC volume + BackupVolumeName = "backup-data" + + // BackupScriptsVolumeName is the name of the backup scripts ConfigMap volume + BackupScriptsVolumeName = "backup-scripts" + + // BackupScriptsMountPath is the mount path for backup scripts + BackupScriptsMountPath = "/usr/local/bin/backup-scripts" + + // DBDataVolumeName is the name of the OVN DB data volume (for restore) + DBDataVolumeName = "ovn-db-data" + + // DBDataMountPath is the mount path for OVN DB data (for restore) + DBDataMountPath = "/etc/ovn" +) diff --git a/internal/ovndbbackup/cronjob.go b/internal/ovndbbackup/cronjob.go new file mode 100644 index 00000000..18cbeeda --- /dev/null +++ b/internal/ovndbbackup/cronjob.go @@ -0,0 +1,110 @@ +package ovndbbackup + +import ( + "fmt" + "strings" + + "github.com/openstack-k8s-operators/lib-common/modules/common/env" + ovnv1 "github.com/openstack-k8s-operators/ovn-operator/api/v1beta1" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// BackupCronJob returns the CronJob definition for scheduled backups +func BackupCronJob( + backup *ovnv1.OVNDBBackup, + cluster *ovnv1.OVNDBCluster, + labels map[string]string, + configHash string, +) *batchv1.CronJob { + var scriptsVolumeDefaultMode int32 = 0755 + concurrencyPolicy := batchv1.ForbidConcurrent + + retentionMinutes := "" + if backup.Spec.Retention != nil { + retentionMinutes = fmt.Sprintf("%d", int(backup.Spec.Retention.Minutes())) + } + + envVars := map[string]env.Setter{ + "DB_TYPE": env.SetValue(strings.ToLower(cluster.Spec.DBType)), + "DB_PORT": env.SetValue(fmt.Sprintf("%d", DBPort(cluster))), + "SERVICE_NAME": env.SetValue(ServiceName(cluster)), + "NAMESPACE": env.SetValue(backup.Namespace), + "BACKUP_DIR": env.SetValue(BackupDataMountPath), + "RETENTION": env.SetValue(retentionMinutes), + "CONFIG_HASH": env.SetValue(configHash), + } + + volumes := []corev1.Volume{ + { + Name: BackupVolumeName, + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: BackupPVCName(backup), + }, + }, + }, + { + Name: BackupScriptsVolumeName, + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + DefaultMode: &scriptsVolumeDefaultMode, + LocalObjectReference: corev1.LocalObjectReference{ + Name: BackupScriptsConfigMapName(backup), + }, + }, + }, + }, + } + + volumeMounts := []corev1.VolumeMount{ + { + Name: BackupVolumeName, + MountPath: BackupDataMountPath, + }, + { + Name: BackupScriptsVolumeName, + MountPath: BackupScriptsMountPath, + ReadOnly: true, + }, + } + + if cluster.Spec.TLS.Enabled() { + tlsVolumes, tlsVolumeMounts := tlsVolumesAndMounts(cluster) + volumes = append(volumes, tlsVolumes...) + volumeMounts = append(volumeMounts, tlsVolumeMounts...) + } + + return &batchv1.CronJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: BackupCronJobName(backup), + Namespace: backup.Namespace, + Labels: labels, + }, + Spec: batchv1.CronJobSpec{ + Schedule: backup.Spec.Schedule, + ConcurrencyPolicy: concurrencyPolicy, + JobTemplate: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyOnFailure, + ServiceAccountName: backup.RbacResourceName(), + Containers: []corev1.Container{ + { + Name: "ovndb-backup", + Image: cluster.Spec.ContainerImage, + Command: []string{"/bin/bash", BackupScriptsMountPath + "/backup_ovndb"}, + Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), + VolumeMounts: volumeMounts, + }, + }, + Volumes: volumes, + }, + }, + }, + }, + }, + } +} diff --git a/internal/ovndbbackup/pvc.go b/internal/ovndbbackup/pvc.go new file mode 100644 index 00000000..9e0524c4 --- /dev/null +++ b/internal/ovndbbackup/pvc.go @@ -0,0 +1,57 @@ +package ovndbbackup + +import ( + "github.com/openstack-k8s-operators/lib-common/modules/common/backup" + "github.com/openstack-k8s-operators/lib-common/modules/common/util" + ovnv1 "github.com/openstack-k8s-operators/ovn-operator/api/v1beta1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// BackupPVC returns the PVC definition for storing backups. +// The PVC is intentionally NOT owned by the OVNDBBackup CR so that +// backup data survives CR deletion. +func BackupPVC(instance *ovnv1.OVNDBBackup, cluster *ovnv1.OVNDBCluster) *corev1.PersistentVolumeClaim { + storageClass := instance.Spec.StorageClass + if storageClass == "" { + storageClass = cluster.Spec.StorageClass + } + + storageRequest := instance.Spec.StorageRequest + if storageRequest == "" { + storageRequest = cluster.Spec.StorageRequest + } + + pvc := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: BackupPVCName(instance), + Namespace: instance.Namespace, + Labels: util.MergeStringMaps( + map[string]string{ + "app": "ovndbbackup", + "ovndbbackup": instance.Name, + "ovn.openstack.org/dbcluster": instance.Spec.DatabaseInstance, + }, + backup.GetBackupLabels(backup.CategoryControlPlane), + backup.GetRestoreLabels(backup.RestoreOrder00, backup.CategoryControlPlane), + ), + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{ + corev1.ReadWriteOnce, + }, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse(storageRequest), + }, + }, + }, + } + + if storageClass != "" { + pvc.Spec.StorageClassName = &storageClass + } + + return pvc +} diff --git a/internal/ovndbbackup/restorejob.go b/internal/ovndbbackup/restorejob.go new file mode 100644 index 00000000..6c071cca --- /dev/null +++ b/internal/ovndbbackup/restorejob.go @@ -0,0 +1,102 @@ +package ovndbbackup + +import ( + "fmt" + "strings" + + "github.com/openstack-k8s-operators/lib-common/modules/common/env" + ovnv1 "github.com/openstack-k8s-operators/ovn-operator/api/v1beta1" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// RestoreJob returns a Job that restores an OVN DB from a backup. +// It mounts pod-0's PVC and the backup PVC, copies the standalone backup file, +// then lets ovn-ctl convert it to a RAFT cluster on pod startup. +func RestoreJob( + restore *ovnv1.OVNDBRestore, + backup *ovnv1.OVNDBBackup, + cluster *ovnv1.OVNDBCluster, + labels map[string]string, +) *batchv1.Job { + var scriptsVolumeDefaultMode int32 = 0755 + var backoffLimit int32 = 2 + + pod0PVCName := ClusterPod0PVCName(cluster) + + envVars := map[string]env.Setter{ + "DB_TYPE": env.SetValue(strings.ToLower(cluster.Spec.DBType)), + } + + return &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: RestoreJobName(restore), + Namespace: restore.Namespace, + Labels: labels, + }, + Spec: batchv1.JobSpec{ + BackoffLimit: &backoffLimit, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyOnFailure, + ServiceAccountName: restore.RbacResourceName(), + Containers: []corev1.Container{ + { + Name: "ovndb-restore", + Image: cluster.Spec.ContainerImage, + Command: []string{"/bin/bash", BackupScriptsMountPath + "/restore_ovndb"}, + Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), + VolumeMounts: []corev1.VolumeMount{ + { + Name: DBDataVolumeName, + MountPath: DBDataMountPath, + }, + { + Name: BackupVolumeName, + MountPath: BackupDataMountPath, + ReadOnly: true, + }, + { + Name: BackupScriptsVolumeName, + MountPath: BackupScriptsMountPath, + ReadOnly: true, + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: DBDataVolumeName, + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: pod0PVCName, + }, + }, + }, + { + Name: BackupVolumeName, + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: BackupPVCName(backup), + ReadOnly: true, + }, + }, + }, + { + Name: BackupScriptsVolumeName, + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + DefaultMode: &scriptsVolumeDefaultMode, + LocalObjectReference: corev1.LocalObjectReference{ + Name: fmt.Sprintf("%s-restore-scripts", restore.Name), + }, + }, + }, + }, + }, + }, + }, + }, + } +} diff --git a/internal/ovndbbackup/utils.go b/internal/ovndbbackup/utils.go new file mode 100644 index 00000000..daec66d4 --- /dev/null +++ b/internal/ovndbbackup/utils.go @@ -0,0 +1,74 @@ +package ovndbbackup + +import ( + "crypto/sha256" + "fmt" + + ovnv1 "github.com/openstack-k8s-operators/ovn-operator/api/v1beta1" + "github.com/openstack-k8s-operators/ovn-operator/internal/ovndbcluster" +) + +func truncateName(name string, maxLen int) string { + if len(name) <= maxLen { + return name + } + hash := fmt.Sprintf("%x", sha256.Sum256([]byte(name))) + return name[:maxLen-9] + "-" + hash[:8] +} + +// BackupCronJobName returns the name for the backup CronJob (max 52 chars for CronJobs) +func BackupCronJobName(backup *ovnv1.OVNDBBackup) string { + return truncateName(backup.Name+"-cronjob", 52) +} + +// BackupPVCName returns the name for the backup PVC +func BackupPVCName(backup *ovnv1.OVNDBBackup) string { + return truncateName(backup.Name+"-backup", 63) +} + +// RestoreJobName returns the name for the restore Job +func RestoreJobName(restore *ovnv1.OVNDBRestore) string { + return truncateName(restore.Name+"-restore", 63) +} + +// BackupScriptsConfigMapName returns the name for the backup scripts ConfigMap +func BackupScriptsConfigMapName(backup *ovnv1.OVNDBBackup) string { + return backup.Name + "-backup-scripts" +} + +// ClusterPod0PVCName returns the PVC name for pod-0 of a given OVNDBCluster. +// StatefulSet PVCs follow the pattern: --. +// The volume claim template is named -etc-ovn and the StatefulSet +// uses the service name (ovsdbserver-nb / ovsdbserver-sb), not the cluster CR name. +func ClusterPod0PVCName(cluster *ovnv1.OVNDBCluster) string { + stsName := ServiceName(cluster) + return cluster.Name + ovndbcluster.PVCSuffixEtcOVN + "-" + stsName + "-0" +} + +// Pod0Name returns the pod-0 name for a given OVNDBCluster. +// The StatefulSet is named after the service (ovsdbserver-nb / ovsdbserver-sb). +func Pod0Name(cluster *ovnv1.OVNDBCluster) string { + return ServiceName(cluster) + "-0" +} + +// StatefulSetName returns the StatefulSet name for a given OVNDBCluster. +// The StatefulSet is named after the service (ovsdbserver-nb / ovsdbserver-sb). +func StatefulSetName(cluster *ovnv1.OVNDBCluster) string { + return ServiceName(cluster) +} + +// ServiceName returns the headless service name for a given OVNDBCluster +func ServiceName(cluster *ovnv1.OVNDBCluster) string { + if cluster.Spec.DBType == ovnv1.SBDBType { + return ovnv1.ServiceNameSB + } + return ovnv1.ServiceNameNB +} + +// DBPort returns the DB port for a given OVNDBCluster +func DBPort(cluster *ovnv1.OVNDBCluster) int32 { + if cluster.Spec.DBType == ovnv1.SBDBType { + return ovndbcluster.DbPortSB + } + return ovndbcluster.DbPortNB +} diff --git a/internal/ovndbbackup/volumes.go b/internal/ovndbbackup/volumes.go new file mode 100644 index 00000000..37ce6c50 --- /dev/null +++ b/internal/ovndbbackup/volumes.go @@ -0,0 +1,56 @@ +package ovndbbackup + +import ( + ovnv1 "github.com/openstack-k8s-operators/ovn-operator/api/v1beta1" + ovn_common "github.com/openstack-k8s-operators/ovn-operator/internal/common" + corev1 "k8s.io/api/core/v1" +) + +func tlsVolumesAndMounts(cluster *ovnv1.OVNDBCluster) ([]corev1.Volume, []corev1.VolumeMount) { + var volumes []corev1.Volume + var mounts []corev1.VolumeMount + + if cluster.Spec.TLS.SecretName != nil { + volumes = append(volumes, corev1.Volume{ + Name: "tls-certs", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: *cluster.Spec.TLS.SecretName, + }, + }, + }) + mounts = append(mounts, + corev1.VolumeMount{ + Name: "tls-certs", + MountPath: ovn_common.OVNDbCertPath, + SubPath: "tls.crt", + ReadOnly: true, + }, + corev1.VolumeMount{ + Name: "tls-certs", + MountPath: ovn_common.OVNDbKeyPath, + SubPath: "tls.key", + ReadOnly: true, + }, + ) + } + + if cluster.Spec.TLS.CaBundleSecretName != "" { + volumes = append(volumes, corev1.Volume{ + Name: "tls-ca-bundle", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: cluster.Spec.TLS.CaBundleSecretName, + }, + }, + }) + mounts = append(mounts, corev1.VolumeMount{ + Name: "tls-ca-bundle", + MountPath: ovn_common.OVNDbCaCertPath, + SubPath: "tls-ca-bundle.pem", + ReadOnly: true, + }) + } + + return volumes, mounts +} diff --git a/internal/webhook/v1beta1/ovndbbackup_webhook.go b/internal/webhook/v1beta1/ovndbbackup_webhook.go new file mode 100644 index 00000000..f31196d8 --- /dev/null +++ b/internal/webhook/v1beta1/ovndbbackup_webhook.go @@ -0,0 +1,108 @@ +/* + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + "context" + "fmt" + + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/webhook" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + + ovnv1beta1 "github.com/openstack-k8s-operators/ovn-operator/api/v1beta1" +) + +// nolint:unused +var ovndbbackuplog = logf.Log.WithName("ovndbbackup-resource") + +// SetupOVNDBBackupWebhookWithManager registers the webhook for OVNDBBackup in the manager. +func SetupOVNDBBackupWebhookWithManager(mgr ctrl.Manager) error { + return ctrl.NewWebhookManagedBy(mgr).For(&ovnv1beta1.OVNDBBackup{}). + WithValidator(&OVNDBBackupCustomValidator{}). + WithDefaulter(&OVNDBBackupCustomDefaulter{}). + Complete() +} + +// +kubebuilder:webhook:path=/mutate-ovn-openstack-org-v1beta1-ovndbbackup,mutating=true,failurePolicy=fail,sideEffects=None,groups=ovn.openstack.org,resources=ovndbbackups,verbs=create;update,versions=v1beta1,name=movndbbackup-v1beta1.kb.io,admissionReviewVersions=v1 + +// OVNDBBackupCustomDefaulter struct is responsible for setting default values on the custom resource of the +// Kind OVNDBBackup when those are created or updated. +// +// NOTE: The +kubebuilder:object:generate=false marker prevents controller-gen from generating DeepCopy methods, +// as it is used only for temporary operations and does not need to be deeply copied. +type OVNDBBackupCustomDefaulter struct{} + +var _ webhook.CustomDefaulter = &OVNDBBackupCustomDefaulter{} + +// Default implements webhook.CustomDefaulter so a webhook will be registered for the Kind OVNDBBackup. +func (d *OVNDBBackupCustomDefaulter) Default(_ context.Context, obj runtime.Object) error { + ovndbbackup, ok := obj.(*ovnv1beta1.OVNDBBackup) + if !ok { + return fmt.Errorf("expected an OVNDBBackup object but got %T: %w", obj, ErrInvalidObjectType) + } + ovndbbackuplog.Info("Defaulting for OVNDBBackup", "name", ovndbbackup.GetName()) + + ovndbbackup.Default() + + return nil +} + +// +kubebuilder:webhook:path=/validate-ovn-openstack-org-v1beta1-ovndbbackup,mutating=false,failurePolicy=fail,sideEffects=None,groups=ovn.openstack.org,resources=ovndbbackups,verbs=create;update,versions=v1beta1,name=vovndbbackup-v1beta1.kb.io,admissionReviewVersions=v1 + +// OVNDBBackupCustomValidator struct is responsible for validating the OVNDBBackup resource +// when it is created, updated, or deleted. +// +// NOTE: The +kubebuilder:object:generate=false marker prevents controller-gen from generating DeepCopy methods, +// as this struct is used only for temporary operations and does not need to be deeply copied. +type OVNDBBackupCustomValidator struct{} + +var _ webhook.CustomValidator = &OVNDBBackupCustomValidator{} + +// ValidateCreate implements webhook.CustomValidator so a webhook will be registered for the type OVNDBBackup. +func (v *OVNDBBackupCustomValidator) ValidateCreate(_ context.Context, obj runtime.Object) (admission.Warnings, error) { + ovndbbackup, ok := obj.(*ovnv1beta1.OVNDBBackup) + if !ok { + return nil, fmt.Errorf("expected a OVNDBBackup object but got %T: %w", obj, ErrInvalidObjectType) + } + ovndbbackuplog.Info("Validation for OVNDBBackup upon creation", "name", ovndbbackup.GetName()) + + return ovndbbackup.ValidateCreate() +} + +// ValidateUpdate implements webhook.CustomValidator so a webhook will be registered for the type OVNDBBackup. +func (v *OVNDBBackupCustomValidator) ValidateUpdate(_ context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) { + ovndbbackup, ok := newObj.(*ovnv1beta1.OVNDBBackup) + if !ok { + return nil, fmt.Errorf("expected a OVNDBBackup object for the newObj but got %T: %w", newObj, ErrInvalidObjectType) + } + ovndbbackuplog.Info("Validation for OVNDBBackup upon update", "name", ovndbbackup.GetName()) + + return ovndbbackup.ValidateUpdate(oldObj) +} + +// ValidateDelete implements webhook.CustomValidator so a webhook will be registered for the type OVNDBBackup. +func (v *OVNDBBackupCustomValidator) ValidateDelete(_ context.Context, obj runtime.Object) (admission.Warnings, error) { + ovndbbackup, ok := obj.(*ovnv1beta1.OVNDBBackup) + if !ok { + return nil, fmt.Errorf("expected a OVNDBBackup object but got %T: %w", obj, ErrInvalidObjectType) + } + ovndbbackuplog.Info("Validation for OVNDBBackup upon deletion", "name", ovndbbackup.GetName()) + + return ovndbbackup.ValidateDelete() +} diff --git a/internal/webhook/v1beta1/ovndbrestore_webhook.go b/internal/webhook/v1beta1/ovndbrestore_webhook.go new file mode 100644 index 00000000..36565b00 --- /dev/null +++ b/internal/webhook/v1beta1/ovndbrestore_webhook.go @@ -0,0 +1,108 @@ +/* + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + "context" + "fmt" + + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/webhook" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + + ovnv1beta1 "github.com/openstack-k8s-operators/ovn-operator/api/v1beta1" +) + +// nolint:unused +var ovndbrestorelog = logf.Log.WithName("ovndbrestore-resource") + +// SetupOVNDBRestoreWebhookWithManager registers the webhook for OVNDBRestore in the manager. +func SetupOVNDBRestoreWebhookWithManager(mgr ctrl.Manager) error { + return ctrl.NewWebhookManagedBy(mgr).For(&ovnv1beta1.OVNDBRestore{}). + WithValidator(&OVNDBRestoreCustomValidator{}). + WithDefaulter(&OVNDBRestoreCustomDefaulter{}). + Complete() +} + +// +kubebuilder:webhook:path=/mutate-ovn-openstack-org-v1beta1-ovndbrestore,mutating=true,failurePolicy=fail,sideEffects=None,groups=ovn.openstack.org,resources=ovndbrestores,verbs=create;update,versions=v1beta1,name=movndbrestore-v1beta1.kb.io,admissionReviewVersions=v1 + +// OVNDBRestoreCustomDefaulter struct is responsible for setting default values on the custom resource of the +// Kind OVNDBRestore when those are created or updated. +// +// NOTE: The +kubebuilder:object:generate=false marker prevents controller-gen from generating DeepCopy methods, +// as it is used only for temporary operations and does not need to be deeply copied. +type OVNDBRestoreCustomDefaulter struct{} + +var _ webhook.CustomDefaulter = &OVNDBRestoreCustomDefaulter{} + +// Default implements webhook.CustomDefaulter so a webhook will be registered for the Kind OVNDBRestore. +func (d *OVNDBRestoreCustomDefaulter) Default(_ context.Context, obj runtime.Object) error { + ovndbrestore, ok := obj.(*ovnv1beta1.OVNDBRestore) + if !ok { + return fmt.Errorf("expected an OVNDBRestore object but got %T: %w", obj, ErrInvalidObjectType) + } + ovndbrestorelog.Info("Defaulting for OVNDBRestore", "name", ovndbrestore.GetName()) + + ovndbrestore.Default() + + return nil +} + +// +kubebuilder:webhook:path=/validate-ovn-openstack-org-v1beta1-ovndbrestore,mutating=false,failurePolicy=fail,sideEffects=None,groups=ovn.openstack.org,resources=ovndbrestores,verbs=create;update,versions=v1beta1,name=vovndbrestore-v1beta1.kb.io,admissionReviewVersions=v1 + +// OVNDBRestoreCustomValidator struct is responsible for validating the OVNDBRestore resource +// when it is created, updated, or deleted. +// +// NOTE: The +kubebuilder:object:generate=false marker prevents controller-gen from generating DeepCopy methods, +// as this struct is used only for temporary operations and does not need to be deeply copied. +type OVNDBRestoreCustomValidator struct{} + +var _ webhook.CustomValidator = &OVNDBRestoreCustomValidator{} + +// ValidateCreate implements webhook.CustomValidator so a webhook will be registered for the type OVNDBRestore. +func (v *OVNDBRestoreCustomValidator) ValidateCreate(_ context.Context, obj runtime.Object) (admission.Warnings, error) { + ovndbrestore, ok := obj.(*ovnv1beta1.OVNDBRestore) + if !ok { + return nil, fmt.Errorf("expected a OVNDBRestore object but got %T: %w", obj, ErrInvalidObjectType) + } + ovndbrestorelog.Info("Validation for OVNDBRestore upon creation", "name", ovndbrestore.GetName()) + + return ovndbrestore.ValidateCreate() +} + +// ValidateUpdate implements webhook.CustomValidator so a webhook will be registered for the type OVNDBRestore. +func (v *OVNDBRestoreCustomValidator) ValidateUpdate(_ context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) { + ovndbrestore, ok := newObj.(*ovnv1beta1.OVNDBRestore) + if !ok { + return nil, fmt.Errorf("expected a OVNDBRestore object for the newObj but got %T: %w", newObj, ErrInvalidObjectType) + } + ovndbrestorelog.Info("Validation for OVNDBRestore upon update", "name", ovndbrestore.GetName()) + + return ovndbrestore.ValidateUpdate(oldObj) +} + +// ValidateDelete implements webhook.CustomValidator so a webhook will be registered for the type OVNDBRestore. +func (v *OVNDBRestoreCustomValidator) ValidateDelete(_ context.Context, obj runtime.Object) (admission.Warnings, error) { + ovndbrestore, ok := obj.(*ovnv1beta1.OVNDBRestore) + if !ok { + return nil, fmt.Errorf("expected a OVNDBRestore object but got %T: %w", obj, ErrInvalidObjectType) + } + ovndbrestorelog.Info("Validation for OVNDBRestore upon deletion", "name", ovndbrestore.GetName()) + + return ovndbrestore.ValidateDelete() +} diff --git a/templates/ovndbbackup/bin/backup_ovndb b/templates/ovndbbackup/bin/backup_ovndb new file mode 100755 index 00000000..e3df0097 --- /dev/null +++ b/templates/ovndbbackup/bin/backup_ovndb @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +set -ex + +DB_TYPE="${DB_TYPE}" +DB_PORT="${DB_PORT}" +SERVICE_NAME="${SERVICE_NAME}" +NAMESPACE="${NAMESPACE}" +BACKUP_DIR="${BACKUP_DIR:-/backup/data}" +RETENTION="${RETENTION}" + +DB_NAME="OVN_Northbound" +if [[ "${DB_TYPE}" == "sb" ]]; then + DB_NAME="OVN_Southbound" +fi + +TIMESTAMP=$(date +%Y%m%d-%H%M%S) +BACKUP_FILE="${BACKUP_DIR}/${TIMESTAMP}-ovn${DB_TYPE}_db.backup" + +SCHEME="tcp" +SSL_ARGS="" +{{- if .TLS }} +SCHEME="ssl" +SSL_ARGS="--private-key={{ .OVNDB_KEY_PATH }} --certificate={{ .OVNDB_CERT_PATH }} --ca-cert={{ .OVNDB_CACERT_PATH }}" +{{- end }} + +DB_ENDPOINT="${SCHEME}:${SERVICE_NAME}.${NAMESPACE}.svc.cluster.local:${DB_PORT}" + +echo "Starting backup of ${DB_NAME} from ${DB_ENDPOINT}" + +SCHEMA_VERSION=$(ovsdb-client get-schema-version ${SSL_ARGS} "${DB_ENDPOINT}" "${DB_NAME}" || echo "unknown") +echo "Schema version: ${SCHEMA_VERSION}" + +mkdir -p "${BACKUP_DIR}" +ovsdb-client backup ${SSL_ARGS} "${DB_ENDPOINT}" "${DB_NAME}" > "${BACKUP_FILE}" + +echo "Backup completed: $(ls -la "${BACKUP_FILE}")" + +if [[ -n "${RETENTION}" && "${RETENTION}" != "0" ]]; then + echo "Applying retention policy: ${RETENTION} minutes" + find "${BACKUP_DIR}" -name "*.backup" -mmin +"${RETENTION}" -delete -print +fi + +echo "Backup finished successfully" diff --git a/templates/ovndbrestore/bin/restore_ovndb b/templates/ovndbrestore/bin/restore_ovndb new file mode 100755 index 00000000..605030c2 --- /dev/null +++ b/templates/ovndbrestore/bin/restore_ovndb @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +set -ex + +DB_TYPE="${DB_TYPE}" +DB_FILE="/etc/ovn/ovn${DB_TYPE}_db.db" +BACKUP_DIR="/backup/data" + +BACKUP_FILE=$(ls -t "${BACKUP_DIR}"/*-ovn${DB_TYPE}_db.backup 2>/dev/null | head -1) +if [[ -z "${BACKUP_FILE}" ]]; then + echo "ERROR: No backup files found in ${BACKUP_DIR}" + exit 1 +fi + +echo "Restoring from: ${BACKUP_FILE}" +echo "Target DB: ${DB_FILE}" + +rm -f "${DB_FILE}" + +# Copy the standalone backup as the DB file. When pod-0 starts, ovn-ctl +# detects the standalone format and converts it to a RAFT cluster using +# the correct local address from its own startup arguments. +cp "${BACKUP_FILE}" "${DB_FILE}" + +echo "Restore completed successfully" diff --git a/test/functional/base_test.go b/test/functional/base_test.go index 8e7b56b5..f1a03ad4 100644 --- a/test/functional/base_test.go +++ b/test/functional/base_test.go @@ -248,6 +248,49 @@ func OVNControllerConditionGetter(name types.NamespacedName) condition.Condition return instance.Status.Conditions } +func GetDefaultOVNDBBackupSpec() ovnv1.OVNDBBackupSpec { + return ovnv1.OVNDBBackupSpec{ + DatabaseInstance: "ovsdbserver-nb", + Schedule: "@daily", + StorageRequest: "10G", + StorageClass: "local-storage", + } +} + +func CreateOVNDBBackup(namespace string, spec ovnv1.OVNDBBackupSpec) client.Object { + name := ovn.CreateOVNDBBackup(nil, namespace, spec) + return ovn.GetOVNDBBackup(name) +} + +func GetOVNDBBackup(name types.NamespacedName) *ovnv1.OVNDBBackup { + return ovn.GetOVNDBBackup(name) +} + +func OVNDBBackupConditionGetter(name types.NamespacedName) condition.Conditions { + instance := ovn.GetOVNDBBackup(name) + return instance.Status.Conditions +} + +func GetDefaultOVNDBRestoreSpec() ovnv1.OVNDBRestoreSpec { + return ovnv1.OVNDBRestoreSpec{ + BackupSource: "ovndbbackup-nb-sample", + } +} + +func CreateOVNDBRestore(namespace string, spec ovnv1.OVNDBRestoreSpec) client.Object { + name := ovn.CreateOVNDBRestore(nil, namespace, spec) + return ovn.GetOVNDBRestore(name) +} + +func GetOVNDBRestore(name types.NamespacedName) *ovnv1.OVNDBRestore { + return ovn.GetOVNDBRestore(name) +} + +func OVNDBRestoreConditionGetter(name types.NamespacedName) condition.Conditions { + instance := ovn.GetOVNDBRestore(name) + return instance.Status.Conditions +} + func SimulateDaemonsetNumberReadyWithPods(name types.NamespacedName, networkIPs map[string][]string) { ds := GetDaemonSet(name) diff --git a/test/functional/ovndbbackup_controller_test.go b/test/functional/ovndbbackup_controller_test.go new file mode 100644 index 00000000..d022639d --- /dev/null +++ b/test/functional/ovndbbackup_controller_test.go @@ -0,0 +1,281 @@ +/* + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package functional_test + +import ( + "fmt" + + . "github.com/onsi/ginkgo/v2" //revive:disable:dot-imports + . "github.com/onsi/gomega" //revive:disable:dot-imports + + //revive:disable-next-line:dot-imports + . "github.com/openstack-k8s-operators/lib-common/modules/common/test/helpers" + + condition "github.com/openstack-k8s-operators/lib-common/modules/common/condition" + ovnv1 "github.com/openstack-k8s-operators/ovn-operator/api/v1beta1" + "github.com/openstack-k8s-operators/ovn-operator/internal/ovndbbackup" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" +) + +var _ = Describe("OVNDBBackup controller", func() { + + When("OVNDBBackup CR is created with non-existent OVNDBCluster", func() { + var backupName types.NamespacedName + + BeforeEach(func() { + spec := GetDefaultOVNDBBackupSpec() + spec.DatabaseInstance = "nonexistent-cluster" + instance := CreateOVNDBBackup(namespace, spec) + backupName = types.NamespacedName{Name: instance.GetName(), Namespace: instance.GetNamespace()} + DeferCleanup(th.DeleteInstance, instance) + }) + + It("should set OVNDBClusterReady condition to False", func() { + th.ExpectConditionWithDetails( + backupName, + ConditionGetterFunc(OVNDBBackupConditionGetter), + ovnv1.OVNDBClusterReadyCondition, + corev1.ConditionFalse, + condition.RequestedReason, + fmt.Sprintf(ovnv1.OVNDBClusterReadyErrorMessage, "OVNDBCluster nonexistent-cluster not found"), + ) + }) + + It("should not be ready", func() { + th.ExpectCondition( + backupName, + ConditionGetterFunc(OVNDBBackupConditionGetter), + condition.ReadyCondition, + corev1.ConditionFalse, + ) + }) + }) + + When("OVNDBBackup CR is created with an existing OVNDBCluster", func() { + var backupName types.NamespacedName + + BeforeEach(func() { + dbSpec := GetDefaultOVNDBClusterSpec() + dbCluster := CreateOVNDBCluster(namespace, dbSpec) + DeferCleanup(th.DeleteInstance, dbCluster) + + spec := GetDefaultOVNDBBackupSpec() + spec.DatabaseInstance = dbCluster.GetName() + instance := CreateOVNDBBackup(namespace, spec) + backupName = types.NamespacedName{Name: instance.GetName(), Namespace: instance.GetNamespace()} + DeferCleanup(th.DeleteInstance, instance) + }) + + It("should mark OVNDBClusterReady condition as True", func() { + th.ExpectCondition( + backupName, + ConditionGetterFunc(OVNDBBackupConditionGetter), + ovnv1.OVNDBClusterReadyCondition, + corev1.ConditionTrue, + ) + }) + + It("should create a backup PVC", func() { + backup := GetOVNDBBackup(backupName) + pvcName := types.NamespacedName{ + Namespace: namespace, + Name: ovndbbackup.BackupPVCName(backup), + } + Eventually(func(g Gomega) { + pvc := &corev1.PersistentVolumeClaim{} + g.Expect(k8sClient.Get(ctx, pvcName, pvc)).Should(Succeed()) + g.Expect(pvc.Spec.AccessModes).To(ContainElement(corev1.ReadWriteOnce)) + }, timeout, interval).Should(Succeed()) + }) + + It("should mark PersistentVolumeClaimReady condition as True", func() { + th.ExpectCondition( + backupName, + ConditionGetterFunc(OVNDBBackupConditionGetter), + ovnv1.PersistentVolumeClaimReadyCondition, + corev1.ConditionTrue, + ) + }) + + It("should create a CronJob", func() { + backup := GetOVNDBBackup(backupName) + cronJobName := types.NamespacedName{ + Namespace: namespace, + Name: ovndbbackup.BackupCronJobName(backup), + } + Eventually(func(g Gomega) { + cronJob := &batchv1.CronJob{} + g.Expect(k8sClient.Get(ctx, cronJobName, cronJob)).Should(Succeed()) + g.Expect(cronJob.Spec.Schedule).To(Equal("@daily")) + g.Expect(cronJob.Spec.ConcurrencyPolicy).To(Equal(batchv1.ForbidConcurrent)) + }, timeout, interval).Should(Succeed()) + }) + + It("should mark CronJobReady condition as True", func() { + th.ExpectCondition( + backupName, + ConditionGetterFunc(OVNDBBackupConditionGetter), + ovnv1.CronJobReadyCondition, + corev1.ConditionTrue, + ) + }) + + It("should become Ready when all conditions are met", func() { + th.ExpectCondition( + backupName, + ConditionGetterFunc(OVNDBBackupConditionGetter), + condition.ReadyCondition, + corev1.ConditionTrue, + ) + }) + + It("should create a CronJob owned by the CR", func() { + backup := GetOVNDBBackup(backupName) + cronJobName := types.NamespacedName{ + Namespace: namespace, + Name: ovndbbackup.BackupCronJobName(backup), + } + Eventually(func(g Gomega) { + cronJob := &batchv1.CronJob{} + g.Expect(k8sClient.Get(ctx, cronJobName, cronJob)).Should(Succeed()) + g.Expect(cronJob.OwnerReferences).To(HaveLen(1)) + g.Expect(cronJob.OwnerReferences[0].Name).To(Equal(backup.Name)) + }, timeout, interval).Should(Succeed()) + }) + + It("should create a PVC NOT owned by the CR", func() { + backup := GetOVNDBBackup(backupName) + pvcName := types.NamespacedName{ + Namespace: namespace, + Name: ovndbbackup.BackupPVCName(backup), + } + Eventually(func(g Gomega) { + pvc := &corev1.PersistentVolumeClaim{} + g.Expect(k8sClient.Get(ctx, pvcName, pvc)).Should(Succeed()) + g.Expect(pvc.OwnerReferences).To(BeEmpty()) + }, timeout, interval).Should(Succeed()) + }) + }) + + When("OVNDBBackup CR uses storage defaults from OVNDBCluster", func() { + var backupName types.NamespacedName + + BeforeEach(func() { + dbSpec := GetDefaultOVNDBClusterSpec() + dbCluster := CreateOVNDBCluster(namespace, dbSpec) + DeferCleanup(th.DeleteInstance, dbCluster) + + spec := ovnv1.OVNDBBackupSpec{ + DatabaseInstance: dbCluster.GetName(), + Schedule: "@daily", + } + instance := CreateOVNDBBackup(namespace, spec) + backupName = types.NamespacedName{Name: instance.GetName(), Namespace: instance.GetNamespace()} + DeferCleanup(th.DeleteInstance, instance) + }) + + It("should create a PVC using the cluster's storage settings", func() { + backup := GetOVNDBBackup(backupName) + pvcName := types.NamespacedName{ + Namespace: namespace, + Name: ovndbbackup.BackupPVCName(backup), + } + Eventually(func(g Gomega) { + pvc := &corev1.PersistentVolumeClaim{} + g.Expect(k8sClient.Get(ctx, pvcName, pvc)).Should(Succeed()) + storageReq := pvc.Spec.Resources.Requests[corev1.ResourceStorage] + g.Expect(storageReq.String()).To(Equal("1G")) + }, timeout, interval).Should(Succeed()) + }) + }) + + When("OVNDBBackup CR is deleted", func() { + var backupName types.NamespacedName + + BeforeEach(func() { + dbSpec := GetDefaultOVNDBClusterSpec() + dbCluster := CreateOVNDBCluster(namespace, dbSpec) + DeferCleanup(th.DeleteInstance, dbCluster) + + spec := GetDefaultOVNDBBackupSpec() + spec.DatabaseInstance = dbCluster.GetName() + instance := CreateOVNDBBackup(namespace, spec) + backupName = types.NamespacedName{Name: instance.GetName(), Namespace: instance.GetNamespace()} + + // Wait for it to be ready + th.ExpectCondition( + backupName, + ConditionGetterFunc(OVNDBBackupConditionGetter), + condition.ReadyCondition, + corev1.ConditionTrue, + ) + }) + + It("should preserve the PVC after CR deletion", func() { + backup := GetOVNDBBackup(backupName) + pvcName := types.NamespacedName{ + Namespace: namespace, + Name: ovndbbackup.BackupPVCName(backup), + } + + th.DeleteInstance(backup) + + // PVC should still exist (not owned by CR) + pvc := &corev1.PersistentVolumeClaim{} + Expect(k8sClient.Get(ctx, pvcName, pvc)).Should(Succeed()) + Expect(pvc.OwnerReferences).To(BeEmpty()) + }) + }) + + When("OVNDBBackup CR configmap is created", func() { + var backupName types.NamespacedName + + BeforeEach(func() { + dbSpec := GetDefaultOVNDBClusterSpec() + dbCluster := CreateOVNDBCluster(namespace, dbSpec) + DeferCleanup(th.DeleteInstance, dbCluster) + + spec := GetDefaultOVNDBBackupSpec() + spec.DatabaseInstance = dbCluster.GetName() + instance := CreateOVNDBBackup(namespace, spec) + backupName = types.NamespacedName{Name: instance.GetName(), Namespace: instance.GetNamespace()} + DeferCleanup(th.DeleteInstance, instance) + }) + + It("should create a scripts ConfigMap", func() { + backup := GetOVNDBBackup(backupName) + cmName := types.NamespacedName{ + Namespace: namespace, + Name: ovndbbackup.BackupScriptsConfigMapName(backup), + } + Eventually(func(g Gomega) { + cm := &corev1.ConfigMap{} + g.Expect(k8sClient.Get(ctx, cmName, cm)).Should(Succeed()) + }, timeout, interval).Should(Succeed()) + }) + + It("should mark ServiceConfigReady as True", func() { + th.ExpectCondition( + backupName, + ConditionGetterFunc(OVNDBBackupConditionGetter), + condition.ServiceConfigReadyCondition, + corev1.ConditionTrue, + ) + }) + }) +}) diff --git a/test/functional/ovndbrestore_controller_test.go b/test/functional/ovndbrestore_controller_test.go new file mode 100644 index 00000000..c5e17cc7 --- /dev/null +++ b/test/functional/ovndbrestore_controller_test.go @@ -0,0 +1,141 @@ +/* + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package functional_test + +import ( + "fmt" + + . "github.com/onsi/ginkgo/v2" //revive:disable:dot-imports + . "github.com/onsi/gomega" //revive:disable:dot-imports + + //revive:disable-next-line:dot-imports + . "github.com/openstack-k8s-operators/lib-common/modules/common/test/helpers" + + condition "github.com/openstack-k8s-operators/lib-common/modules/common/condition" + ovnv1 "github.com/openstack-k8s-operators/ovn-operator/api/v1beta1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" +) + +var _ = Describe("OVNDBRestore controller", func() { + + When("OVNDBRestore CR is created with non-existent OVNDBBackup", func() { + var restoreName types.NamespacedName + + BeforeEach(func() { + spec := GetDefaultOVNDBRestoreSpec() + spec.BackupSource = "nonexistent-backup" + instance := CreateOVNDBRestore(namespace, spec) + restoreName = types.NamespacedName{Name: instance.GetName(), Namespace: instance.GetNamespace()} + DeferCleanup(th.DeleteInstance, instance) + }) + + It("should set OVNDBBackupReady condition to False", func() { + th.ExpectConditionWithDetails( + restoreName, + ConditionGetterFunc(OVNDBRestoreConditionGetter), + ovnv1.OVNDBBackupReadyCondition, + corev1.ConditionFalse, + condition.RequestedReason, + fmt.Sprintf(ovnv1.OVNDBBackupReadyErrorMessage, "OVNDBBackup nonexistent-backup not found"), + ) + }) + + It("should not be ready", func() { + th.ExpectCondition( + restoreName, + ConditionGetterFunc(OVNDBRestoreConditionGetter), + condition.ReadyCondition, + corev1.ConditionFalse, + ) + }) + }) + + When("OVNDBRestore CR is created with a ready OVNDBBackup", func() { + var restoreName types.NamespacedName + + BeforeEach(func() { + // Create OVNDBCluster + dbSpec := GetDefaultOVNDBClusterSpec() + dbCluster := CreateOVNDBCluster(namespace, dbSpec) + DeferCleanup(th.DeleteInstance, dbCluster) + + // Create OVNDBBackup referencing the cluster + backupSpec := GetDefaultOVNDBBackupSpec() + backupSpec.DatabaseInstance = dbCluster.GetName() + backupInstance := CreateOVNDBBackup(namespace, backupSpec) + DeferCleanup(th.DeleteInstance, backupInstance) + + // Wait for backup to be ready + backupNN := types.NamespacedName{Name: backupInstance.GetName(), Namespace: backupInstance.GetNamespace()} + th.ExpectCondition( + backupNN, + ConditionGetterFunc(OVNDBBackupConditionGetter), + condition.ReadyCondition, + corev1.ConditionTrue, + ) + + spec := ovnv1.OVNDBRestoreSpec{ + BackupSource: backupInstance.GetName(), + } + instance := CreateOVNDBRestore(namespace, spec) + restoreName = types.NamespacedName{Name: instance.GetName(), Namespace: instance.GetNamespace()} + DeferCleanup(th.DeleteInstance, instance) + }) + + It("should mark OVNDBBackupReady as True", func() { + th.ExpectCondition( + restoreName, + ConditionGetterFunc(OVNDBRestoreConditionGetter), + ovnv1.OVNDBBackupReadyCondition, + corev1.ConditionTrue, + ) + }) + + It("should mark OVNDBClusterReady as True", func() { + th.ExpectCondition( + restoreName, + ConditionGetterFunc(OVNDBRestoreConditionGetter), + ovnv1.OVNDBClusterReadyCondition, + corev1.ConditionTrue, + ) + }) + + It("should save original replicas and move to ScalingDown phase", func() { + Eventually(func(g Gomega) { + restore := GetOVNDBRestore(restoreName) + g.Expect(restore.Status.OriginalReplicas).NotTo(BeNil()) + g.Expect(restore.Status.Phase).To(BeElementOf( + ovnv1.OVNDBRestorePhaseScalingDown, + ovnv1.OVNDBRestorePhaseRestoring, + )) + }, timeout, interval).Should(Succeed()) + }) + + It("should add a finalizer to the backup CR", func() { + restore := GetOVNDBRestore(restoreName) + backupName := types.NamespacedName{ + Name: restore.Spec.BackupSource, + Namespace: namespace, + } + Eventually(func(g Gomega) { + backup := GetOVNDBBackup(backupName) + finalizerName := "ovn.openstack.org/restore-" + restore.Name + g.Expect(backup.Finalizers).To(ContainElement(finalizerName)) + }, timeout, interval).Should(Succeed()) + }) + }) +}) diff --git a/test/functional/suite_test.go b/test/functional/suite_test.go index 31127265..bddf0b22 100644 --- a/test/functional/suite_test.go +++ b/test/functional/suite_test.go @@ -214,6 +214,21 @@ var _ = BeforeSuite(func() { }).SetupWithManager(k8sManager) Expect(err).ToNot(HaveOccurred()) + err = (&controllers.OVNDBBackupReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + Kclient: kclient, + }).SetupWithManager(k8sManager) + Expect(err).ToNot(HaveOccurred()) + + err = (&controllers.OVNDBRestoreReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + Kclient: kclient, + RestConfig: cfg, + }).SetupWithManager(k8sManager) + Expect(err).ToNot(HaveOccurred()) + // Acquire environmental defaults and initialize operator defaults with them ovnv1.SetupDefaults() @@ -226,6 +241,12 @@ var _ = BeforeSuite(func() { err = webhooksv1.SetupOVNControllerWebhookWithManager(k8sManager) Expect(err).NotTo(HaveOccurred()) + err = webhooksv1.SetupOVNDBBackupWebhookWithManager(k8sManager) + Expect(err).NotTo(HaveOccurred()) + + err = webhooksv1.SetupOVNDBRestoreWebhookWithManager(k8sManager) + Expect(err).NotTo(HaveOccurred()) + go func() { defer GinkgoRecover() err = k8sManager.Start(ctx) From fc0db6aee5fc9461fdaa3e324a79cc81abd76657 Mon Sep 17 00:00:00 2001 From: Luca Miccini Date: Tue, 21 Apr 2026 08:09:04 +0200 Subject: [PATCH 2/4] Add kuttl test for OVN DB backup and restore workflow Tests the full backup/restore lifecycle: deploy OVN with 3 replicas, seed test data, create backup, trigger manual backup job, restore from backup, and verify data survives the restore. Force-deletes pods during cleanup to avoid preStop hook hangs when the entire cluster is torn down. Co-Authored-By: Claude Opus 4.6 --- .../tests/ovn_backup_restore/01-assert.yaml | 45 +++++++++++++ .../ovn_backup_restore/01-deploy-ovn.yaml | 6 ++ .../tests/ovn_backup_restore/02-assert.yaml | 40 ++++++++++++ .../ovn_backup_restore/02-scale-and-seed.yaml | 7 ++ .../tests/ovn_backup_restore/03-assert.yaml | 11 ++++ .../ovn_backup_restore/03-seed-testdata.yaml | 6 ++ .../tests/ovn_backup_restore/04-assert.yaml | 16 +++++ .../ovn_backup_restore/04-create-backup.yaml | 9 +++ .../tests/ovn_backup_restore/05-assert.yaml | 18 +++++ .../ovn_backup_restore/05-trigger-backup.yaml | 5 ++ .../tests/ovn_backup_restore/06-assert.yaml | 24 +++++++ .../ovn_backup_restore/06-create-restore.yaml | 6 ++ .../tests/ovn_backup_restore/07-assert.yaml | 11 ++++ .../ovn_backup_restore/07-verify-data.yaml | 12 ++++ .../tests/ovn_backup_restore/08-cleanup.yaml | 14 ++++ .../ovn_backup_restore/09-cleanup-pvcs.yaml | 24 +++++++ .../tests/ovn_backup_restore/09-errors.yaml | 65 +++++++++++++++++++ 17 files changed, 319 insertions(+) create mode 100644 test/kuttl/tests/ovn_backup_restore/01-assert.yaml create mode 100644 test/kuttl/tests/ovn_backup_restore/01-deploy-ovn.yaml create mode 100644 test/kuttl/tests/ovn_backup_restore/02-assert.yaml create mode 100644 test/kuttl/tests/ovn_backup_restore/02-scale-and-seed.yaml create mode 100644 test/kuttl/tests/ovn_backup_restore/03-assert.yaml create mode 100644 test/kuttl/tests/ovn_backup_restore/03-seed-testdata.yaml create mode 100644 test/kuttl/tests/ovn_backup_restore/04-assert.yaml create mode 100644 test/kuttl/tests/ovn_backup_restore/04-create-backup.yaml create mode 100644 test/kuttl/tests/ovn_backup_restore/05-assert.yaml create mode 100644 test/kuttl/tests/ovn_backup_restore/05-trigger-backup.yaml create mode 100644 test/kuttl/tests/ovn_backup_restore/06-assert.yaml create mode 100644 test/kuttl/tests/ovn_backup_restore/06-create-restore.yaml create mode 100644 test/kuttl/tests/ovn_backup_restore/07-assert.yaml create mode 100644 test/kuttl/tests/ovn_backup_restore/07-verify-data.yaml create mode 100644 test/kuttl/tests/ovn_backup_restore/08-cleanup.yaml create mode 100644 test/kuttl/tests/ovn_backup_restore/09-cleanup-pvcs.yaml create mode 100644 test/kuttl/tests/ovn_backup_restore/09-errors.yaml diff --git a/test/kuttl/tests/ovn_backup_restore/01-assert.yaml b/test/kuttl/tests/ovn_backup_restore/01-assert.yaml new file mode 100644 index 00000000..f6dd066d --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/01-assert.yaml @@ -0,0 +1,45 @@ +# +# Check for: +# +# - OVNDBCluster NB ready +# - OVNDBCluster SB ready +# - ovsdbserver-nb-0 Pod running +# - ovsdbserver-sb-0 Pod running +# +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBCluster +metadata: + name: ovndbcluster-nb-sample +spec: + dbType: NB + replicas: 1 +status: + readyCount: 1 +--- +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBCluster +metadata: + name: ovndbcluster-sb-sample +spec: + dbType: SB + replicas: 1 +status: + readyCount: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + service: ovsdbserver-nb + name: ovsdbserver-nb-0 +status: + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + service: ovsdbserver-sb + name: ovsdbserver-sb-0 +status: + phase: Running diff --git a/test/kuttl/tests/ovn_backup_restore/01-deploy-ovn.yaml b/test/kuttl/tests/ovn_backup_restore/01-deploy-ovn.yaml new file mode 100644 index 00000000..eac7693f --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/01-deploy-ovn.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- ../../../../config/samples/ovn_v1beta1_ovndbcluster.yaml +- ../../../../config/samples/ovn_v1beta1_ovnnorthd.yaml +- ../../../../config/samples/ovn_v1beta1_ovncontroller.yaml diff --git a/test/kuttl/tests/ovn_backup_restore/02-assert.yaml b/test/kuttl/tests/ovn_backup_restore/02-assert.yaml new file mode 100644 index 00000000..c5626706 --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/02-assert.yaml @@ -0,0 +1,40 @@ +# +# Check for: +# +# - 3 replicas for NB and SB clusters +# +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBCluster +metadata: + name: ovndbcluster-nb-sample +spec: + replicas: 3 +status: + readyCount: 3 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: ovsdbserver-nb +spec: + replicas: 3 +status: + readyReplicas: 3 +--- +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBCluster +metadata: + name: ovndbcluster-sb-sample +spec: + replicas: 3 +status: + readyCount: 3 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: ovsdbserver-sb +spec: + replicas: 3 +status: + readyReplicas: 3 diff --git a/test/kuttl/tests/ovn_backup_restore/02-scale-and-seed.yaml b/test/kuttl/tests/ovn_backup_restore/02-scale-and-seed.yaml new file mode 100644 index 00000000..b6f5bfb6 --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/02-scale-and-seed.yaml @@ -0,0 +1,7 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + oc patch OVNDBCluster -n $NAMESPACE ovndbcluster-nb-sample --type='json' -p='[{"op": "replace", "path": "/spec/replicas", "value":3}]' + - script: | + oc patch OVNDBCluster -n $NAMESPACE ovndbcluster-sb-sample --type='json' -p='[{"op": "replace", "path": "/spec/replicas", "value":3}]' diff --git a/test/kuttl/tests/ovn_backup_restore/03-assert.yaml b/test/kuttl/tests/ovn_backup_restore/03-assert.yaml new file mode 100644 index 00000000..0c681a8e --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/03-assert.yaml @@ -0,0 +1,11 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: + - script: | + oc exec -n $NAMESPACE ovsdbserver-nb-0 -- ovn-nbctl ls-list | grep -q kuttl-test-switch + if [ $? -ne 0 ]; then + echo "Test data 'kuttl-test-switch' not found in NB database" + exit 1 + fi + echo "Test data seeded successfully" + exit 0 diff --git a/test/kuttl/tests/ovn_backup_restore/03-seed-testdata.yaml b/test/kuttl/tests/ovn_backup_restore/03-seed-testdata.yaml new file mode 100644 index 00000000..62db3c03 --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/03-seed-testdata.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + oc exec -n $NAMESPACE ovsdbserver-nb-0 -- ovn-nbctl ls-add kuttl-test-switch + oc exec -n $NAMESPACE ovsdbserver-nb-0 -- ovn-nbctl ls-list | grep -q kuttl-test-switch diff --git a/test/kuttl/tests/ovn_backup_restore/04-assert.yaml b/test/kuttl/tests/ovn_backup_restore/04-assert.yaml new file mode 100644 index 00000000..57961628 --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/04-assert.yaml @@ -0,0 +1,16 @@ +# +# Check for: +# +# - OVNDBBackup CR is Ready +# - Backup PVC created +# - Backup CronJob created +# +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: + - script: | + oc get ovndbbackup -n $NAMESPACE ovndbbackup-nb-sample -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' | grep -q True + - script: | + oc get pvc -n $NAMESPACE ovndbbackup-nb-sample-backup + - script: | + oc get cronjob -n $NAMESPACE ovndbbackup-nb-sample-cronjob diff --git a/test/kuttl/tests/ovn_backup_restore/04-create-backup.yaml b/test/kuttl/tests/ovn_backup_restore/04-create-backup.yaml new file mode 100644 index 00000000..fc221c25 --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/04-create-backup.yaml @@ -0,0 +1,9 @@ +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBBackup +metadata: + name: ovndbbackup-nb-sample +spec: + databaseInstance: ovndbcluster-nb-sample + schedule: "@daily" + storageRequest: "10G" + retention: "168h" diff --git a/test/kuttl/tests/ovn_backup_restore/05-assert.yaml b/test/kuttl/tests/ovn_backup_restore/05-assert.yaml new file mode 100644 index 00000000..ba514336 --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/05-assert.yaml @@ -0,0 +1,18 @@ +# +# Check for: +# +# - Manual backup job completed +# - Backup file exists on the PVC +# +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +commands: + - script: | + oc wait -n $NAMESPACE job/ovndbbackup-nb-manual --for=condition=complete --timeout=90s + - script: | + pod=$(oc get pods -n $NAMESPACE -l job-name=ovndbbackup-nb-manual -o name | head -1) + logs=$(oc logs -n $NAMESPACE $pod) + echo "$logs" + echo "$logs" | grep -q "Backup finished successfully" || exit 1 + exit 0 diff --git a/test/kuttl/tests/ovn_backup_restore/05-trigger-backup.yaml b/test/kuttl/tests/ovn_backup_restore/05-trigger-backup.yaml new file mode 100644 index 00000000..f4f6a629 --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/05-trigger-backup.yaml @@ -0,0 +1,5 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + oc create job -n $NAMESPACE --from=cronjob/ovndbbackup-nb-sample-cronjob ovndbbackup-nb-manual diff --git a/test/kuttl/tests/ovn_backup_restore/06-assert.yaml b/test/kuttl/tests/ovn_backup_restore/06-assert.yaml new file mode 100644 index 00000000..9f337477 --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/06-assert.yaml @@ -0,0 +1,24 @@ +# +# Check for: +# +# - OVNDBRestore CR reaches Completed phase +# - OVNDBCluster is back to 3 replicas and healthy +# +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +commands: + - script: | + phase=$(oc get ovndbrestore -n $NAMESPACE ovndbrestore-nb-sample -o jsonpath='{.status.phase}') + if [ "$phase" != "Completed" ]; then + echo "Restore phase is '$phase', expected 'Completed'" + exit 1 + fi + echo "Restore phase: Completed" + - script: | + ready=$(oc get ovndbcluster -n $NAMESPACE ovndbcluster-nb-sample -o jsonpath='{.status.readyCount}') + if [ "$ready" != "3" ]; then + echo "OVNDBCluster readyCount is '$ready', expected '3'" + exit 1 + fi + echo "OVNDBCluster NB readyCount: 3" diff --git a/test/kuttl/tests/ovn_backup_restore/06-create-restore.yaml b/test/kuttl/tests/ovn_backup_restore/06-create-restore.yaml new file mode 100644 index 00000000..e604a1c1 --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/06-create-restore.yaml @@ -0,0 +1,6 @@ +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBRestore +metadata: + name: ovndbrestore-nb-sample +spec: + backupSource: ovndbbackup-nb-sample diff --git a/test/kuttl/tests/ovn_backup_restore/07-assert.yaml b/test/kuttl/tests/ovn_backup_restore/07-assert.yaml new file mode 100644 index 00000000..9658ece1 --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/07-assert.yaml @@ -0,0 +1,11 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: + - script: | + oc exec -n $NAMESPACE ovsdbserver-nb-0 -- ovn-nbctl ls-list | grep -q kuttl-test-switch + if [ $? -ne 0 ]; then + echo "Test data 'kuttl-test-switch' not found after restore" + exit 1 + fi + echo "Test data verified successfully after restore" + exit 0 diff --git a/test/kuttl/tests/ovn_backup_restore/07-verify-data.yaml b/test/kuttl/tests/ovn_backup_restore/07-verify-data.yaml new file mode 100644 index 00000000..6d12aac3 --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/07-verify-data.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + echo "Verifying test data survived the restore..." + oc exec -n $NAMESPACE ovsdbserver-nb-0 -- ovn-nbctl ls-list | grep -q kuttl-test-switch + if [ $? -ne 0 ]; then + echo "FAIL: Test data 'kuttl-test-switch' not found after restore" + exit 1 + fi + echo "PASS: Test data preserved after restore" + exit 0 diff --git a/test/kuttl/tests/ovn_backup_restore/08-cleanup.yaml b/test/kuttl/tests/ovn_backup_restore/08-cleanup.yaml new file mode 100644 index 00000000..204f7121 --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/08-cleanup.yaml @@ -0,0 +1,14 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + oc exec -n $NAMESPACE ovsdbserver-nb-0 -- ovn-nbctl ls-del kuttl-test-switch || true + oc delete ovndbrestore -n $NAMESPACE ovndbrestore-nb-sample --ignore-not-found || true + oc delete ovndbbackup -n $NAMESPACE ovndbbackup-nb-sample --ignore-not-found || true + oc delete job -n $NAMESPACE ovndbbackup-nb-manual --ignore-not-found || true + oc delete ovndbcluster -n $NAMESPACE ovndbcluster-nb-sample --ignore-not-found || true + oc delete ovndbcluster -n $NAMESPACE ovndbcluster-sb-sample --ignore-not-found || true + oc delete ovnnorthd -n $NAMESPACE ovnnorthd-sample --ignore-not-found || true + oc delete ovncontroller -n $NAMESPACE ovncontroller-sample --ignore-not-found || true + sleep 10 + oc delete pods -n $NAMESPACE --all --grace-period=0 --force 2>/dev/null || true diff --git a/test/kuttl/tests/ovn_backup_restore/09-cleanup-pvcs.yaml b/test/kuttl/tests/ovn_backup_restore/09-cleanup-pvcs.yaml new file mode 100644 index 00000000..370721e7 --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/09-cleanup-pvcs.yaml @@ -0,0 +1,24 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +delete: +- apiVersion: v1 + kind: PersistentVolumeClaim + name: ovndbbackup-nb-sample-backup +- apiVersion: v1 + kind: PersistentVolumeClaim + name: ovndbcluster-nb-sample-etc-ovn-ovsdbserver-nb-0 +- apiVersion: v1 + kind: PersistentVolumeClaim + name: ovndbcluster-nb-sample-etc-ovn-ovsdbserver-nb-1 +- apiVersion: v1 + kind: PersistentVolumeClaim + name: ovndbcluster-nb-sample-etc-ovn-ovsdbserver-nb-2 +- apiVersion: v1 + kind: PersistentVolumeClaim + name: ovndbcluster-sb-sample-etc-ovn-ovsdbserver-sb-0 +- apiVersion: v1 + kind: PersistentVolumeClaim + name: ovndbcluster-sb-sample-etc-ovn-ovsdbserver-sb-1 +- apiVersion: v1 + kind: PersistentVolumeClaim + name: ovndbcluster-sb-sample-etc-ovn-ovsdbserver-sb-2 diff --git a/test/kuttl/tests/ovn_backup_restore/09-errors.yaml b/test/kuttl/tests/ovn_backup_restore/09-errors.yaml new file mode 100644 index 00000000..54af6bcd --- /dev/null +++ b/test/kuttl/tests/ovn_backup_restore/09-errors.yaml @@ -0,0 +1,65 @@ +# +# Check for: +# +# No OVNDBBackup CR +# No OVNDBRestore CR +# No backup PVC +# No OVNDBCluster CRs +# No OVN pods +# +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBBackup +metadata: + name: ovndbbackup-nb-sample +--- +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBRestore +metadata: + name: ovndbrestore-nb-sample +--- +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBCluster +metadata: + name: ovndbcluster-nb-sample +--- +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBCluster +metadata: + name: ovndbcluster-sb-sample +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + service: ovsdbserver-nb +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + service: ovsdbserver-sb +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ovndbbackup-nb-sample-backup +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ovndbcluster-nb-sample-etc-ovn-ovsdbserver-nb-0 +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ovndbcluster-sb-sample-etc-ovn-ovsdbserver-sb-0 +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ovndbcluster-sb-sample-etc-ovn-ovsdbserver-sb-1 +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ovndbcluster-sb-sample-etc-ovn-ovsdbserver-sb-2 From f06640f7fc4bb293c156b2e3e91374d3f16b85a3 Mon Sep 17 00:00:00 2001 From: Luca Miccini Date: Tue, 21 Apr 2026 11:59:36 +0200 Subject: [PATCH 3/4] Fix restore Job scheduling failure with local-storage PVs Delete pod-0's PVC in phaseScaleDown and recreate it in phaseRestore before creating the restore Job. With local-storage, pod-0's PVC may be bound to a PV on a different node than the backup PVC, causing a volume node affinity conflict that prevents the restore Job pod from scheduling. Recreating the PVC lets WaitForFirstConsumer bind it to a PV on the same node as the backup PVC. Co-Authored-By: Claude Opus 4.6 --- .../controller/ovndbrestore_controller.go | 60 +++++++++++++++++-- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/internal/controller/ovndbrestore_controller.go b/internal/controller/ovndbrestore_controller.go index a9190129..f1f141a2 100644 --- a/internal/controller/ovndbrestore_controller.go +++ b/internal/controller/ovndbrestore_controller.go @@ -49,6 +49,8 @@ import ( corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" k8s_errors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // OVNDBRestoreReconciler reconciles a OVNDBRestore object @@ -412,15 +414,19 @@ func (r *OVNDBRestoreReconciler) phaseScaleDown( return ctrl.Result{RequeueAfter: time.Second * 3}, nil } - // Delete non-pod-0 PVCs so pods start fresh without stale RAFT state. - // Force-delete skipped the preStop hook that normally cleans up DB files. - // The StatefulSet will recreate these PVCs when the pods restart. + // Delete ALL PVCs so pods start fresh without stale RAFT state. + // Pod-0's PVC is also deleted because: + // 1. Its data will be overwritten by the backup anyway + // 2. With local-storage, pod-0's PVC may be bound to a different node + // than the backup PVC, causing a volume node affinity conflict when + // the restore Job tries to mount both + // The PVC will be recreated in phaseRestore before the restore Job. originalReplicas := int32(1) if instance.Status.OriginalReplicas != nil { originalReplicas = *instance.Status.OriginalReplicas } stsName := ovndbbackup.StatefulSetName(cluster) - for i := int32(1); i < originalReplicas; i++ { + for i := int32(0); i < originalReplicas; i++ { pvcName := fmt.Sprintf("%s%s-%s-%d", cluster.Name, ovndbcluster.PVCSuffixEtcOVN, stsName, i) pvc := &corev1.PersistentVolumeClaim{} @@ -434,13 +440,13 @@ func (r *OVNDBRestoreReconciler) phaseScaleDown( } return ctrl.Result{}, err } - Log.Info("Deleting non-pod-0 PVC", "pvc", pvcName) + Log.Info("Deleting PVC", "pvc", pvcName) if err := r.Delete(ctx, pvc); err != nil && !k8s_errors.IsNotFound(err) { return ctrl.Result{}, err } } - Log.Info("All pods terminated and non-pod-0 PVCs cleaned up") + Log.Info("All pods terminated and PVCs cleaned up") instance.Status.Phase = ovnv1.OVNDBRestorePhaseRestoring return ctrl.Result{Requeue: true}, nil } @@ -466,6 +472,48 @@ func (r *OVNDBRestoreReconciler) phaseRestore( return ctrl.Result{}, fmt.Errorf("failed to create restore ConfigMap: %w", err) } + // Ensure pod-0's PVC exists. It was deleted in phaseScaleDown to avoid + // volume node affinity conflicts with local-storage: recreating it here + // lets WaitForFirstConsumer bind it to a PV on the same node as the + // backup PVC. + pod0PVCName := ovndbbackup.ClusterPod0PVCName(cluster) + pod0PVC := &corev1.PersistentVolumeClaim{} + err = r.Get(ctx, types.NamespacedName{Name: pod0PVCName, Namespace: cluster.Namespace}, pod0PVC) + if err == nil { + if pod0PVC.DeletionTimestamp != nil { + Log.Info("Waiting for old pod-0 PVC to be fully deleted", "pvc", pod0PVCName) + return ctrl.Result{RequeueAfter: time.Second * 3}, nil + } + } else if k8s_errors.IsNotFound(err) { + storageRequest, parseErr := resource.ParseQuantity(cluster.Spec.StorageRequest) + if parseErr != nil { + return ctrl.Result{}, fmt.Errorf("failed to parse StorageRequest: %w", parseErr) + } + pod0PVC = &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pod0PVCName, + Namespace: cluster.Namespace, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{ + corev1.ReadWriteOnce, + }, + StorageClassName: &cluster.Spec.StorageClass, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: storageRequest, + }, + }, + }, + } + Log.Info("Creating pod-0 PVC for restore", "pvc", pod0PVCName) + if err = r.Create(ctx, pod0PVC); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to create pod-0 PVC: %w", err) + } + } else { + return ctrl.Result{}, err + } + // Create or check restore Job restoreJob := ovndbbackup.RestoreJob(instance, backup, cluster, serviceLabels) foundJob := &batchv1.Job{} From ba138123ec583273e902e8d1fe680c374beb252e Mon Sep 17 00:00:00 2001 From: Luca Miccini Date: Wed, 22 Apr 2026 12:01:06 +0200 Subject: [PATCH 4/4] Add backupTimestamp support for coordinated backup and restore Allow specifying a BACKUP_TIMESTAMP on backup jobs and a backupTimestamp field on OVNDBRestore so that OVN DB backups can participate in a coordinated full-environment backup/restore workflow alongside Galera and OADP using a single shared timestamp. Co-Authored-By: Claude Opus 4.6 --- .../ovn.openstack.org_ovndbrestores.yaml | 7 ++ api/v1beta1/ovndbrestore_types.go | 7 ++ .../ovn.openstack.org_ovndbrestores.yaml | 7 ++ docs/backup-restore.md | 115 +++++++++++++++++- internal/ovndbbackup/restorejob.go | 3 + templates/ovndbbackup/bin/backup_ovndb | 2 +- templates/ovndbrestore/bin/restore_ovndb | 17 ++- 7 files changed, 148 insertions(+), 10 deletions(-) diff --git a/api/bases/ovn.openstack.org_ovndbrestores.yaml b/api/bases/ovn.openstack.org_ovndbrestores.yaml index b9975130..1b104ed2 100644 --- a/api/bases/ovn.openstack.org_ovndbrestores.yaml +++ b/api/bases/ovn.openstack.org_ovndbrestores.yaml @@ -56,6 +56,13 @@ spec: description: BackupSource - Name of the OVNDBBackup CR to restore from type: string + backupTimestamp: + description: |- + BackupTimestamp - specific backup timestamp to restore (format: YYYYMMDD-HHMMSS). + Must match the timestamp prefix of a backup file on the backup PVC. + If empty, the most recent backup is used. + pattern: ^\d{8}-\d{6}$ + type: string required: - backupSource type: object diff --git a/api/v1beta1/ovndbrestore_types.go b/api/v1beta1/ovndbrestore_types.go index f8f33b76..19f8ffce 100644 --- a/api/v1beta1/ovndbrestore_types.go +++ b/api/v1beta1/ovndbrestore_types.go @@ -43,6 +43,13 @@ type OVNDBRestoreSpec struct { // +kubebuilder:validation:Required // BackupSource - Name of the OVNDBBackup CR to restore from BackupSource string `json:"backupSource"` + + // +kubebuilder:validation:Optional + // +kubebuilder:validation:Pattern=`^\d{8}-\d{6}$` + // BackupTimestamp - specific backup timestamp to restore (format: YYYYMMDD-HHMMSS). + // Must match the timestamp prefix of a backup file on the backup PVC. + // If empty, the most recent backup is used. + BackupTimestamp string `json:"backupTimestamp,omitempty"` } // OVNDBRestoreStatus defines the observed state of OVNDBRestore diff --git a/config/crd/bases/ovn.openstack.org_ovndbrestores.yaml b/config/crd/bases/ovn.openstack.org_ovndbrestores.yaml index b9975130..1b104ed2 100644 --- a/config/crd/bases/ovn.openstack.org_ovndbrestores.yaml +++ b/config/crd/bases/ovn.openstack.org_ovndbrestores.yaml @@ -56,6 +56,13 @@ spec: description: BackupSource - Name of the OVNDBBackup CR to restore from type: string + backupTimestamp: + description: |- + BackupTimestamp - specific backup timestamp to restore (format: YYYYMMDD-HHMMSS). + Must match the timestamp prefix of a backup file on the backup PVC. + If empty, the most recent backup is used. + pattern: ^\d{8}-\d{6}$ + type: string required: - backupSource type: object diff --git a/docs/backup-restore.md b/docs/backup-restore.md index 9d581dd6..3da37b67 100644 --- a/docs/backup-restore.md +++ b/docs/backup-restore.md @@ -61,6 +61,35 @@ spec: retention: "168h" ``` +### Triggering an On-Demand Backup + +The CronJob runs on a schedule, but you can trigger an immediate backup at any time by creating a one-off Job from the CronJob: + +```bash +CRONJOB_NAME=$(oc get cronjob -l app=ovndbbackup,ovndbbackup=ovndbbackup-nb -o name) +oc create job ovndbbackup-nb-manual --from="${CRONJOB_NAME}" +``` + +#### Using a Coordinated Timestamp + +When performing a full environment backup (OVN + Galera + OADP), all components should use the same timestamp so that a restore can target a single consistent point in time. Set the `BACKUP_TIMESTAMP` environment variable to override the auto-generated timestamp: + +```bash +BACKUP_TIMESTAMP=$(date +%Y%m%d-%H%M%S) + +# Trigger OVN NB backup +NB_CRONJOB=$(oc get cronjob -l app=ovndbbackup,ovndbbackup=ovndbbackup-nb -o name) +oc create job ovndbbackup-nb-${BACKUP_TIMESTAMP} --from="${NB_CRONJOB}" +oc set env job/ovndbbackup-nb-${BACKUP_TIMESTAMP} BACKUP_TIMESTAMP=${BACKUP_TIMESTAMP} + +# Trigger OVN SB backup +SB_CRONJOB=$(oc get cronjob -l app=ovndbbackup,ovndbbackup=ovndbbackup-sb -o name) +oc create job ovndbbackup-sb-${BACKUP_TIMESTAMP} --from="${SB_CRONJOB}" +oc set env job/ovndbbackup-sb-${BACKUP_TIMESTAMP} BACKUP_TIMESTAMP=${BACKUP_TIMESTAMP} +``` + +The backup files will be named `-ovn_db.backup`. If `BACKUP_TIMESTAMP` is not set, the job generates its own timestamp at runtime. + ### Monitoring Backup Status ```bash @@ -91,9 +120,22 @@ spec: backupSource: ovndbbackup-nb ``` -| Field | Required | Description | -|----------------|----------|-------------| -| `backupSource` | Yes | Name of the `OVNDBBackup` CR to restore from. The backup must be in `Ready` state. | +To restore from a specific point in time, set `backupTimestamp` to the timestamp prefix of the backup file (format: `YYYYMMDD-HHMMSS`): + +```yaml +apiVersion: ovn.openstack.org/v1beta1 +kind: OVNDBRestore +metadata: + name: ovndbrestore-nb +spec: + backupSource: ovndbbackup-nb + backupTimestamp: "20260422-120000" +``` + +| Field | Required | Description | +|-------------------|----------|-------------| +| `backupSource` | Yes | Name of the `OVNDBBackup` CR to restore from. The backup must be in `Ready` state. | +| `backupTimestamp` | No | Timestamp of the specific backup to restore (format: `YYYYMMDD-HHMMSS`). Must match the prefix of a backup file on the backup PVC. If omitted, the most recent backup is used. | ### Restore Phases @@ -103,7 +145,7 @@ The restore proceeds through a state machine: |--------------|-------------| | `Validating` | Validates the backup source and saves the current replica count. | | `ScalingDown`| Sets a restore annotation on the `OVNDBCluster` to override replicas to 0, force-deletes all pods (preStop hooks hang when all RAFT members terminate simultaneously), and deletes non-pod-0 PVCs to prevent stale RAFT state. | -| `Restoring` | Runs a Job that copies the latest standalone backup onto pod-0's PVC. When pod-0 starts, `ovn-ctl` automatically converts the standalone file to a RAFT cluster. | +| `Restoring` | Runs a Job that copies a standalone backup onto pod-0's PVC. If `backupTimestamp` is set, the exact matching backup file is used; otherwise the most recent backup is selected. When pod-0 starts, `ovn-ctl` automatically converts the standalone file to a RAFT cluster. | | `ScalingUp` | Scales to 1 replica first (pod-0 bootstraps the restored DB), verifies the DB, then removes the restore annotation so the cluster scales to the original replica count. Remaining pods join the cluster with fresh PVCs. | | `Completed` | Restore finished successfully. | | `Failed` | Restore job failed. Check the job logs for details. | @@ -127,7 +169,7 @@ oc logs job/ 1. A finalizer is added to the `OVNDBBackup` CR to prevent its deletion during the restore. 2. A restore-in-progress annotation is set on the `OVNDBCluster` to override the StatefulSet replica count to 0, preventing higher-level operators (e.g. OpenStackControlPlane) from interfering. 3. All pods are force-deleted (graceful RAFT shutdown hangs when all members terminate simultaneously). Non-pod-0 PVCs are deleted to prevent stale RAFT membership state on restart. -4. A Job mounts pod-0's PVC and the backup PVC, removes the old database file, and copies the standalone backup in its place. +4. A Job mounts pod-0's PVC and the backup PVC, removes the old database file, and copies the selected backup in its place. When `backupTimestamp` is set, the job looks for a file named `-ovn_db.backup`; otherwise it picks the most recent backup by modification time. 5. The annotation is updated to allow 1 replica. When pod-0 starts, `ovn-ctl` detects the standalone database file and automatically converts it to a RAFT cluster. After pod-0 is ready, the DB schema version is verified via `ovsdb-client get-schema-version`. 6. The annotation is removed, allowing the cluster to scale to its original replica count. The remaining pods start with fresh PVCs and join the RAFT cluster. 7. The finalizer on the `OVNDBBackup` CR is removed when the `OVNDBRestore` is deleted. @@ -139,3 +181,66 @@ The `OVNDBRestore` CR can be deleted after the restore completes. Deleting it re ```bash oc delete ovndbrestore ovndbrestore-nb ``` + +## Coordinated Backup and Restore + +When backing up an entire OpenStack control plane, all components (Galera, OVN, OADP) should share a single timestamp so that any restore targets a consistent point in time. + +### Coordinated Backup + +```bash +# Generate a shared timestamp +BACKUP_TIMESTAMP=$(date +%Y%m%d-%H%M%S) + +# 1. Trigger Galera database dumps with the shared timestamp +# (see dev-docs/backup-restore/user-guide.md, Step 2) + +# 2. Trigger OVN database backups with the same timestamp +NB_CRONJOB=$(oc get cronjob -l app=ovndbbackup,ovndbbackup=ovndbbackup-nb -o name) +oc create job ovndbbackup-nb-${BACKUP_TIMESTAMP} --from="${NB_CRONJOB}" +oc set env job/ovndbbackup-nb-${BACKUP_TIMESTAMP} BACKUP_TIMESTAMP=${BACKUP_TIMESTAMP} + +SB_CRONJOB=$(oc get cronjob -l app=ovndbbackup,ovndbbackup=ovndbbackup-sb -o name) +oc create job ovndbbackup-sb-${BACKUP_TIMESTAMP} --from="${SB_CRONJOB}" +oc set env job/ovndbbackup-sb-${BACKUP_TIMESTAMP} BACKUP_TIMESTAMP=${BACKUP_TIMESTAMP} + +# Wait for OVN backup jobs to complete +oc wait --for=condition=complete job/ovndbbackup-nb-${BACKUP_TIMESTAMP} --timeout=120s +oc wait --for=condition=complete job/ovndbbackup-sb-${BACKUP_TIMESTAMP} --timeout=120s + +# 3. Trigger OADP backups +# (see dev-docs/backup-restore/user-guide.md, Steps 3-4) +``` + +### Coordinated Restore + +Use the same `BACKUP_TIMESTAMP` from the backup step to restore all components from the same point in time: + +```bash +BACKUP_TIMESTAMP= + +# Restore OVN NB +cat </dev/null | head -1) -if [[ -z "${BACKUP_FILE}" ]]; then - echo "ERROR: No backup files found in ${BACKUP_DIR}" - exit 1 +if [[ -n "${BACKUP_TIMESTAMP}" ]]; then + BACKUP_FILE="${BACKUP_DIR}/${BACKUP_TIMESTAMP}-ovn${DB_TYPE}_db.backup" + if [[ ! -f "${BACKUP_FILE}" ]]; then + echo "ERROR: Backup file not found: ${BACKUP_FILE}" + ls -la "${BACKUP_DIR}"/*-ovn${DB_TYPE}_db.backup 2>/dev/null || echo "No backup files available" + exit 1 + fi +else + BACKUP_FILE=$(ls -t "${BACKUP_DIR}"/*-ovn${DB_TYPE}_db.backup 2>/dev/null | head -1) + if [[ -z "${BACKUP_FILE}" ]]; then + echo "ERROR: No backup files found in ${BACKUP_DIR}" + exit 1 + fi fi echo "Restoring from: ${BACKUP_FILE}"