From bbe67e9c3f98d9d115fb902901fe476e8f71fb54 Mon Sep 17 00:00:00 2001 From: Divyam pateriya Date: Fri, 5 Jun 2026 20:17:16 +0530 Subject: [PATCH] feat(RFE-9399): Add DR CLI lifecycle commands, backup verification, and Agent namespace auto-detection Add get/destroy lifecycle commands, backup integrity verification, and Agent platform namespace auto-detection to the HyperShift DR CLI. Get commands (hypershift get oadp-backups/restores/schedules): - List Velero resources with table/json/yaml output - Filter by --hc-name and --hc-namespace - Sort by creation timestamp (newest first) Destroy commands (hypershift destroy oadp-backup/restore/schedule): - Delete specified Velero resource by name - Validates existence before deletion Backup verification (hypershift verify oadp-backup): - Checks phase, expiration, items count, BSL availability - Also available as --verify flag on oadp-restore to block restore creation when backup integrity checks fail Agent namespace auto-detection: - Reads spec.platform.agent.agentNamespace from HostedCluster - Automatically includes it in backup/restore/schedule namespaces - Deduplicates if already specified via --include-additional-namespaces Signed-off-by: Divyam pateriya Co-authored-by: Cursor --- cmd/destroy/destroy.go | 4 + cmd/get/get.go | 21 +++ cmd/oadp/backup.go | 5 +- cmd/oadp/common.go | 16 ++ cmd/oadp/destroy.go | 115 ++++++++++++++ cmd/oadp/get.go | 272 ++++++++++++++++++++++++++++++++ cmd/oadp/lifecycle_test.go | 308 +++++++++++++++++++++++++++++++++++++ cmd/oadp/restore.go | 23 +++ cmd/oadp/schedule.go | 5 +- cmd/oadp/types.go | 1 + cmd/oadp/verify_backup.go | 208 +++++++++++++++++++++++++ cmd/verify/verify.go | 19 +++ main.go | 4 + support/oadp/validate.go | 33 +++- 14 files changed, 1024 insertions(+), 10 deletions(-) create mode 100644 cmd/get/get.go create mode 100644 cmd/oadp/destroy.go create mode 100644 cmd/oadp/get.go create mode 100644 cmd/oadp/lifecycle_test.go create mode 100644 cmd/oadp/verify_backup.go create mode 100644 cmd/verify/verify.go diff --git a/cmd/destroy/destroy.go b/cmd/destroy/destroy.go index 657079c202b..53798b85003 100644 --- a/cmd/destroy/destroy.go +++ b/cmd/destroy/destroy.go @@ -4,6 +4,7 @@ import ( "github.com/openshift/hypershift/cmd/bastion" "github.com/openshift/hypershift/cmd/cluster" "github.com/openshift/hypershift/cmd/infra" + "github.com/openshift/hypershift/cmd/oadp" "github.com/spf13/cobra" ) @@ -19,6 +20,9 @@ func NewCommand() *cobra.Command { destroyCmd.AddCommand(infra.NewDestroyCommand()) destroyCmd.AddCommand(infra.NewDestroyIAMCommand()) destroyCmd.AddCommand(bastion.NewDestroyCommand()) + destroyCmd.AddCommand(oadp.NewDestroyBackupCommand()) + destroyCmd.AddCommand(oadp.NewDestroyRestoreCommand()) + destroyCmd.AddCommand(oadp.NewDestroyScheduleCommand()) return destroyCmd } diff --git a/cmd/get/get.go b/cmd/get/get.go new file mode 100644 index 00000000000..28ce4557589 --- /dev/null +++ b/cmd/get/get.go @@ -0,0 +1,21 @@ +package get + +import ( + "github.com/openshift/hypershift/cmd/oadp" + + "github.com/spf13/cobra" +) + +func NewCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "get", + Short: "Commands for listing HyperShift resources", + SilenceUsage: true, + } + + cmd.AddCommand(oadp.NewGetBackupsCommand()) + cmd.AddCommand(oadp.NewGetRestoresCommand()) + cmd.AddCommand(oadp.NewGetSchedulesCommand()) + + return cmd +} diff --git a/cmd/oadp/backup.go b/cmd/oadp/backup.go index 927cdd88128..a969f00f879 100644 --- a/cmd/oadp/backup.go +++ b/cmd/oadp/backup.go @@ -116,7 +116,7 @@ func (o *CreateOptions) RunBackup(ctx context.Context) error { if o.Client != nil { // Step 1: Validate HostedCluster exists and get platform o.Log.Info("Validating HostedCluster...") - detectedPlatform, err := oadp.ValidateAndGetHostedClusterPlatform(ctx, o.Client, o.HCName, o.HCNamespace) + platformInfo, err := oadp.ValidateAndGetHostedClusterPlatformInfo(ctx, o.Client, o.HCName, o.HCNamespace) if err != nil { if o.Render { o.Log.Info("Warning: HostedCluster validation failed, using default platform (AWS)", "error", err.Error()) @@ -125,7 +125,8 @@ func (o *CreateOptions) RunBackup(ctx context.Context) error { return fmt.Errorf("HostedCluster validation failed: %w", err) } } else { - platform = detectedPlatform + platform = platformInfo.Type + autoIncludeAgentNamespace(o, platformInfo) } if !o.Render { diff --git a/cmd/oadp/common.go b/cmd/oadp/common.go index 1ff8e33994c..719a15ea0b3 100644 --- a/cmd/oadp/common.go +++ b/cmd/oadp/common.go @@ -6,10 +6,12 @@ import ( "encoding/hex" "fmt" "os" + "slices" "strings" "github.com/openshift/hypershift/api/hypershift/v1beta1" "github.com/openshift/hypershift/support/netutil" + "github.com/openshift/hypershift/support/oadp" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -265,3 +267,17 @@ func buildIncludedNamespaces(hcNamespace, hcName string, additionalNamespaces [] return namespaces } + +// autoIncludeAgentNamespace appends the Agent platform's agentNamespace to the +// IncludeNamespaces list when the platform is AGENT, deduplicating if the user +// already specified it explicitly. +func autoIncludeAgentNamespace(o *CreateOptions, info *oadp.PlatformInfo) { + if info.Type != "AGENT" || info.AgentNamespace == "" { + return + } + if slices.Contains(o.IncludeNamespaces, info.AgentNamespace) { + return + } + o.IncludeNamespaces = append(o.IncludeNamespaces, info.AgentNamespace) + o.Log.Info("Auto-detected agent namespace, including in operation", "namespace", info.AgentNamespace) +} diff --git a/cmd/oadp/destroy.go b/cmd/oadp/destroy.go new file mode 100644 index 00000000000..17bff6482a3 --- /dev/null +++ b/cmd/oadp/destroy.go @@ -0,0 +1,115 @@ +package oadp + +import ( + "context" + "fmt" + + "github.com/openshift/hypershift/cmd/log" + "github.com/openshift/hypershift/cmd/util" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/go-logr/logr" + "github.com/spf13/cobra" +) + +// DestroyOptions holds common configuration for destroy commands. +type DestroyOptions struct { + Name string + OADPNamespace string + Log logr.Logger + Client client.Client +} + +func NewDestroyBackupCommand() *cobra.Command { + opts := &DestroyOptions{Log: log.Log} + cmd := &cobra.Command{ + Use: "oadp-backup", + Short: "Delete an OADP backup", + Long: `Delete a Velero backup resource from the OADP namespace. + +Examples: + # Delete a specific backup + hypershift destroy oadp-backup --name example-clusters-lkbtzw`, + SilenceUsage: true, + RunE: func(cmd *cobra.Command, args []string) error { + return opts.runDestroy(cmd.Context(), "Backup") + }, + } + addDestroyFlags(cmd, opts) + _ = cmd.MarkFlagRequired("name") + return cmd +} + +func NewDestroyRestoreCommand() *cobra.Command { + opts := &DestroyOptions{Log: log.Log} + cmd := &cobra.Command{ + Use: "oadp-restore", + Short: "Delete an OADP restore", + Long: `Delete a Velero restore resource from the OADP namespace. + +Examples: + # Delete a specific restore + hypershift destroy oadp-restore --name restore-example-clusters-abc123`, + SilenceUsage: true, + RunE: func(cmd *cobra.Command, args []string) error { + return opts.runDestroy(cmd.Context(), "Restore") + }, + } + addDestroyFlags(cmd, opts) + _ = cmd.MarkFlagRequired("name") + return cmd +} + +func NewDestroyScheduleCommand() *cobra.Command { + opts := &DestroyOptions{Log: log.Log} + cmd := &cobra.Command{ + Use: "oadp-schedule", + Short: "Delete an OADP schedule", + Long: `Delete a Velero schedule resource from the OADP namespace. + +Examples: + # Delete a specific schedule + hypershift destroy oadp-schedule --name example-clusters-daily`, + SilenceUsage: true, + RunE: func(cmd *cobra.Command, args []string) error { + return opts.runDestroy(cmd.Context(), "Schedule") + }, + } + addDestroyFlags(cmd, opts) + _ = cmd.MarkFlagRequired("name") + return cmd +} + +func addDestroyFlags(cmd *cobra.Command, opts *DestroyOptions) { + cmd.Flags().StringVar(&opts.Name, "name", "", "Name of the resource to delete (required)") + cmd.Flags().StringVar(&opts.OADPNamespace, "oadp-namespace", "openshift-adp", "Namespace where OADP operator is installed") +} + +func (o *DestroyOptions) runDestroy(ctx context.Context, kind string) error { + if o.Client == nil { + var err error + o.Client, err = util.GetClient() + if err != nil { + return fmt.Errorf("failed to create kubernetes client: %w", err) + } + } + + obj := &unstructured.Unstructured{} + obj.SetAPIVersion("velero.io/v1") + obj.SetKind(kind) + + key := client.ObjectKey{Name: o.Name, Namespace: o.OADPNamespace} + if err := o.Client.Get(ctx, key, obj); err != nil { + return fmt.Errorf("%s '%s' not found in namespace '%s': %w", kind, o.Name, o.OADPNamespace, err) + } + + if err := o.Client.Delete(ctx, obj); err != nil { + return fmt.Errorf("failed to delete %s '%s': %w", kind, o.Name, err) + } + + o.Log.Info(fmt.Sprintf("%s deleted successfully", kind), "name", o.Name, "namespace", o.OADPNamespace) + return nil +} diff --git a/cmd/oadp/get.go b/cmd/oadp/get.go new file mode 100644 index 00000000000..bd93421d20c --- /dev/null +++ b/cmd/oadp/get.go @@ -0,0 +1,272 @@ +package oadp + +import ( + "context" + "encoding/json" + "fmt" + "io" + "os" + "sort" + "strings" + "text/tabwriter" + "time" + + "github.com/openshift/hypershift/cmd/log" + "github.com/openshift/hypershift/cmd/util" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" + + "github.com/go-logr/logr" + "github.com/spf13/cobra" +) + +// GetOptions holds common configuration for get commands. +type GetOptions struct { + OADPNamespace string + HCName string + HCNamespace string + Output string + Log logr.Logger + Client client.Client +} + +func NewGetBackupsCommand() *cobra.Command { + opts := &GetOptions{Log: log.Log} + cmd := &cobra.Command{ + Use: "oadp-backups", + Short: "List OADP backups for hosted clusters", + Long: `List Velero backup resources in the OADP namespace. + +Examples: + # List all backups + hypershift get oadp-backups + + # Filter by hosted cluster + hypershift get oadp-backups --hc-name example --hc-namespace clusters + + # Output as JSON + hypershift get oadp-backups -o json`, + SilenceUsage: true, + RunE: func(cmd *cobra.Command, args []string) error { + return opts.runGet(cmd.Context(), "Backup") + }, + } + addGetFlags(cmd, opts) + return cmd +} + +func NewGetRestoresCommand() *cobra.Command { + opts := &GetOptions{Log: log.Log} + cmd := &cobra.Command{ + Use: "oadp-restores", + Short: "List OADP restores for hosted clusters", + Long: `List Velero restore resources in the OADP namespace. + +Examples: + # List all restores + hypershift get oadp-restores + + # Filter by hosted cluster + hypershift get oadp-restores --hc-name example --hc-namespace clusters`, + SilenceUsage: true, + RunE: func(cmd *cobra.Command, args []string) error { + return opts.runGet(cmd.Context(), "Restore") + }, + } + addGetFlags(cmd, opts) + return cmd +} + +func NewGetSchedulesCommand() *cobra.Command { + opts := &GetOptions{Log: log.Log} + cmd := &cobra.Command{ + Use: "oadp-schedules", + Short: "List OADP schedules for hosted clusters", + Long: `List Velero schedule resources in the OADP namespace. + +Examples: + # List all schedules + hypershift get oadp-schedules + + # Filter by hosted cluster + hypershift get oadp-schedules --hc-name example --hc-namespace clusters`, + SilenceUsage: true, + RunE: func(cmd *cobra.Command, args []string) error { + return opts.runGet(cmd.Context(), "Schedule") + }, + } + addGetFlags(cmd, opts) + return cmd +} + +func addGetFlags(cmd *cobra.Command, opts *GetOptions) { + cmd.Flags().StringVar(&opts.OADPNamespace, "oadp-namespace", "openshift-adp", "Namespace where OADP operator is installed") + cmd.Flags().StringVar(&opts.HCName, "hc-name", "", "Filter by hosted cluster name") + cmd.Flags().StringVar(&opts.HCNamespace, "hc-namespace", "", "Filter by hosted cluster namespace") + cmd.Flags().StringVarP(&opts.Output, "output", "o", "table", "Output format: table, json, yaml") +} + +func (o *GetOptions) runGet(ctx context.Context, kind string) error { + if o.Client == nil { + var err error + o.Client, err = util.GetClient() + if err != nil { + return fmt.Errorf("failed to create kubernetes client: %w", err) + } + } + + list := &unstructured.UnstructuredList{} + list.SetGroupVersionKind(schema.GroupVersionKind{ + Group: "velero.io", + Version: "v1", + Kind: kind + "List", + }) + + if err := o.Client.List(ctx, list, client.InNamespace(o.OADPNamespace)); err != nil { + return fmt.Errorf("failed to list %s resources: %w", kind, err) + } + + items := o.filterItems(list.Items) + + sort.Slice(items, func(i, j int) bool { + ti := items[i].GetCreationTimestamp() + tj := items[j].GetCreationTimestamp() + return ti.After(tj.Time) + }) + + switch strings.ToLower(o.Output) { + case "json": + return outputJSON(os.Stdout, items) + case "yaml": + return outputYAML(os.Stdout, items) + default: + return outputTable(os.Stdout, items, kind) + } +} + +func (o *GetOptions) filterItems(items []unstructured.Unstructured) []unstructured.Unstructured { + if o.HCName == "" && o.HCNamespace == "" { + return items + } + + prefix := "" + if o.HCName != "" && o.HCNamespace != "" { + prefix = fmt.Sprintf("%s-%s", o.HCName, o.HCNamespace) + } + + var filtered []unstructured.Unstructured + for _, item := range items { + name := item.GetName() + labels := item.GetLabels() + + if matchesByLabel(labels, o.HCName, o.HCNamespace) || (prefix != "" && strings.HasPrefix(name, prefix)) { + filtered = append(filtered, item) + } + } + return filtered +} + +func matchesByLabel(labels map[string]string, hcName, hcNamespace string) bool { + if labels == nil { + return false + } + if hcName != "" { + if v, ok := labels["hypershift.openshift.io/hosted-cluster"]; ok && v == hcName { + if hcNamespace == "" { + return true + } + if v2, ok2 := labels["hypershift.openshift.io/hosted-cluster-namespace"]; ok2 && v2 == hcNamespace { + return true + } + } + } + return false +} + +func outputTable(w io.Writer, items []unstructured.Unstructured, kind string) error { + tw := tabwriter.NewWriter(w, 0, 4, 2, ' ', 0) + + switch kind { + case "Schedule": + fmt.Fprintln(tw, "NAME\tSTATUS\tSCHEDULE\tLAST BACKUP\tAGE") + default: + fmt.Fprintln(tw, "NAME\tSTATUS\tAGE") + } + + for _, item := range items { + name := item.GetName() + phase, _, _ := unstructured.NestedString(item.Object, "status", "phase") + if phase == "" { + phase = "Unknown" + } + age := formatAge(item.GetCreationTimestamp().Time) + + switch kind { + case "Schedule": + schedule, _, _ := unstructured.NestedString(item.Object, "spec", "schedule") + lastBackup, _, _ := unstructured.NestedString(item.Object, "status", "lastBackup") + lastBackupAge := "" + if lastBackup != "" { + if t, err := time.Parse(time.RFC3339, lastBackup); err == nil { + lastBackupAge = formatAge(t) + } + } + fmt.Fprintf(tw, "%s\t%s\t%s\t%s\t%s\n", name, phase, schedule, lastBackupAge, age) + default: + fmt.Fprintf(tw, "%s\t%s\t%s\n", name, phase, age) + } + } + + return tw.Flush() +} + +func outputJSON(w io.Writer, items []unstructured.Unstructured) error { + objs := make([]map[string]interface{}, len(items)) + for i, item := range items { + objs[i] = item.Object + } + data, err := json.MarshalIndent(objs, "", " ") + if err != nil { + return err + } + _, err = w.Write(data) + if err != nil { + return err + } + _, err = fmt.Fprintln(w) + return err +} + +func outputYAML(w io.Writer, items []unstructured.Unstructured) error { + for i, item := range items { + if i > 0 { + fmt.Fprintln(w, "---") + } + data, err := yaml.Marshal(item.Object) + if err != nil { + return err + } + if _, err := w.Write(data); err != nil { + return err + } + } + return nil +} + +func formatAge(t time.Time) string { + d := time.Since(t) + switch { + case d < time.Minute: + return fmt.Sprintf("%ds", int(d.Seconds())) + case d < time.Hour: + return fmt.Sprintf("%dm", int(d.Minutes())) + case d < 24*time.Hour: + return fmt.Sprintf("%dh", int(d.Hours())) + default: + return fmt.Sprintf("%dd", int(d.Hours()/24)) + } +} diff --git a/cmd/oadp/lifecycle_test.go b/cmd/oadp/lifecycle_test.go new file mode 100644 index 00000000000..1d92e3251cb --- /dev/null +++ b/cmd/oadp/lifecycle_test.go @@ -0,0 +1,308 @@ +package oadp + +import ( + "bytes" + "context" + "testing" + "time" + + "github.com/openshift/hypershift/cmd/log" + supportoadp "github.com/openshift/hypershift/support/oadp" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestVerifyBackup(t *testing.T) { + tests := []struct { + name string + backup map[string]interface{} + expectError bool + failChecks []string + }{ + { + name: "When backup is completed with valid items it should pass all checks", + backup: map[string]interface{}{ + "apiVersion": "velero.io/v1", + "kind": "Backup", + "metadata": map[string]interface{}{ + "name": "test-backup", + "namespace": "openshift-adp", + "creationTimestamp": "2026-06-01T10:00:00Z", + }, + "spec": map[string]interface{}{ + "storageLocation": "default", + }, + "status": map[string]interface{}{ + "phase": "Completed", + "expiration": time.Now().Add(24 * time.Hour).Format(time.RFC3339), + "progress": map[string]interface{}{ + "itemsBackedUp": int64(142), + "totalItems": int64(142), + }, + }, + }, + expectError: false, + }, + { + name: "When backup has failed phase it should fail the phase check", + backup: map[string]interface{}{ + "apiVersion": "velero.io/v1", + "kind": "Backup", + "metadata": map[string]interface{}{ + "name": "failed-backup", + "namespace": "openshift-adp", + "creationTimestamp": "2026-06-01T10:00:00Z", + }, + "spec": map[string]interface{}{}, + "status": map[string]interface{}{ + "phase": "Failed", + "progress": map[string]interface{}{ + "itemsBackedUp": int64(0), + "totalItems": int64(100), + }, + }, + }, + expectError: false, + failChecks: []string{"phase", "items"}, + }, + { + name: "When backup has expired it should fail the expiration check", + backup: map[string]interface{}{ + "apiVersion": "velero.io/v1", + "kind": "Backup", + "metadata": map[string]interface{}{ + "name": "expired-backup", + "namespace": "openshift-adp", + "creationTimestamp": "2026-06-01T10:00:00Z", + }, + "spec": map[string]interface{}{}, + "status": map[string]interface{}{ + "phase": "Completed", + "expiration": time.Now().Add(-2 * time.Hour).Format(time.RFC3339), + "progress": map[string]interface{}{ + "itemsBackedUp": int64(142), + "totalItems": int64(142), + }, + }, + }, + expectError: false, + failChecks: []string{"expiration"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + scheme := runtime.NewScheme() + + backupObj := &unstructured.Unstructured{Object: tt.backup} + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(backupObj). + Build() + + results, err := VerifyBackup(ctx, fakeClient, backupObj.GetName(), "openshift-adp", log.Log) + if tt.expectError { + if err == nil { + t.Errorf("expected error but got none") + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + failedChecks := make(map[string]bool) + for _, r := range results { + if !r.Passed { + failedChecks[r.Check] = true + } + } + + for _, expected := range tt.failChecks { + if !failedChecks[expected] { + t.Errorf("expected check '%s' to fail, but it passed", expected) + } + } + }) + } +} + +func TestAutoIncludeAgentNamespace(t *testing.T) { + tests := []struct { + name string + platform string + agentNamespace string + existingNS []string + expectedNS []string + expectLogMessage bool + }{ + { + name: "When platform is AGENT with namespace it should auto-include", + platform: "AGENT", + agentNamespace: "my-agent-ns", + existingNS: nil, + expectedNS: []string{"my-agent-ns"}, + expectLogMessage: true, + }, + { + name: "When platform is AGENT and namespace already included it should not duplicate", + platform: "AGENT", + agentNamespace: "my-agent-ns", + existingNS: []string{"my-agent-ns"}, + expectedNS: []string{"my-agent-ns"}, + expectLogMessage: false, + }, + { + name: "When platform is AWS it should not include agent namespace", + platform: "AWS", + agentNamespace: "", + existingNS: nil, + expectedNS: nil, + expectLogMessage: false, + }, + { + name: "When platform is AGENT but namespace is empty it should skip", + platform: "AGENT", + agentNamespace: "", + existingNS: nil, + expectedNS: nil, + expectLogMessage: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + opts := &CreateOptions{ + Log: log.Log, + IncludeNamespaces: tt.existingNS, + } + info := &supportoadp.PlatformInfo{ + Type: tt.platform, + AgentNamespace: tt.agentNamespace, + } + + autoIncludeAgentNamespace(opts, info) + + if len(tt.expectedNS) == 0 && len(opts.IncludeNamespaces) == 0 { + return + } + if len(opts.IncludeNamespaces) != len(tt.expectedNS) { + t.Errorf("expected namespaces %v, got %v", tt.expectedNS, opts.IncludeNamespaces) + } + for i, ns := range tt.expectedNS { + if i >= len(opts.IncludeNamespaces) || opts.IncludeNamespaces[i] != ns { + t.Errorf("expected namespace[%d]=%q, got %v", i, ns, opts.IncludeNamespaces) + } + } + }) + } +} + +func TestGetOutputTable(t *testing.T) { + items := []unstructured.Unstructured{ + { + Object: map[string]interface{}{ + "apiVersion": "velero.io/v1", + "kind": "Backup", + "metadata": map[string]interface{}{ + "name": "test-backup-1", + "namespace": "openshift-adp", + "creationTimestamp": time.Now().Add(-2 * time.Hour).Format(time.RFC3339), + }, + "status": map[string]interface{}{ + "phase": "Completed", + }, + }, + }, + { + Object: map[string]interface{}{ + "apiVersion": "velero.io/v1", + "kind": "Backup", + "metadata": map[string]interface{}{ + "name": "test-backup-2", + "namespace": "openshift-adp", + "creationTimestamp": time.Now().Add(-48 * time.Hour).Format(time.RFC3339), + }, + "status": map[string]interface{}{ + "phase": "Failed", + }, + }, + }, + } + + var buf bytes.Buffer + err := outputTable(&buf, items, "Backup") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + output := buf.String() + if !bytes.Contains([]byte(output), []byte("NAME")) { + t.Error("table output should contain NAME header") + } + if !bytes.Contains([]byte(output), []byte("test-backup-1")) { + t.Error("table output should contain test-backup-1") + } + if !bytes.Contains([]byte(output), []byte("Completed")) { + t.Error("table output should contain Completed status") + } +} + +func TestDestroyOptions(t *testing.T) { + ctx := context.Background() + scheme := runtime.NewScheme() + + backup := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "velero.io/v1", + "kind": "Backup", + "metadata": map[string]interface{}{ + "name": "to-delete", + "namespace": "openshift-adp", + }, + "spec": map[string]interface{}{}, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(backup). + Build() + + opts := &DestroyOptions{ + Name: "to-delete", + OADPNamespace: "openshift-adp", + Log: log.Log, + Client: fakeClient, + } + + err := opts.runDestroy(ctx, "Backup") + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } +} + +func TestDestroyOptionsNotFound(t *testing.T) { + ctx := context.Background() + scheme := runtime.NewScheme() + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + Build() + + opts := &DestroyOptions{ + Name: "nonexistent", + OADPNamespace: "openshift-adp", + Log: log.Log, + Client: fakeClient, + } + + err := opts.runDestroy(ctx, "Backup") + if err == nil { + t.Fatal("expected error for nonexistent backup, got none") + } +} diff --git a/cmd/oadp/restore.go b/cmd/oadp/restore.go index 5134db9761b..f0bc6497582 100644 --- a/cmd/oadp/restore.go +++ b/cmd/oadp/restore.go @@ -87,6 +87,7 @@ https://hypershift.pages.dev/how-to/disaster-recovery/dr-cli/`, cmd.Flags().BoolVar(&restorePVs, "restore-pvs", true, "Restore persistent volumes") cmd.Flags().BoolVar(&preserveNodePorts, "preserve-node-ports", true, "Preserve NodePort assignments during restore") cmd.Flags().BoolVar(&opts.UseEtcdSnapshot, "use-etcd-snapshot", false, "Use etcd snapshot mode: etcd is backed up via HCPEtcdBackup CRD snapshots instead of PV volume snapshots") + cmd.Flags().BoolVar(&opts.VerifyBackup, "verify", false, "Run backup integrity verification before creating the restore") // Mark required flags - note that we'll validate backup OR schedule in Run() _ = cmd.MarkFlagRequired("hc-name") @@ -198,6 +199,28 @@ func (o *CreateOptions) RunRestore(ctx context.Context) error { return fmt.Errorf("restore generation failed: %w", err) } + // Step 3.5: Run backup integrity verification if --verify is set + if o.VerifyBackup && o.BackupName != "" && o.Client != nil { + o.Log.Info("Running backup integrity verification...") + results, verifyErr := VerifyBackup(ctx, o.Client, o.BackupName, o.OADPNamespace, o.Log) + if verifyErr != nil { + return fmt.Errorf("backup verification failed: %w", verifyErr) + } + allPassed := true + for _, r := range results { + if r.Passed { + o.Log.Info("Backup verification", "check", r.Check, "result", "pass", "detail", r.Detail) + } else { + o.Log.Info("Backup verification", "check", r.Check, "result", "FAIL", "detail", r.Detail) + allPassed = false + } + } + if !allPassed { + return fmt.Errorf("backup verification failed for '%s' — restore aborted", o.BackupName) + } + o.Log.Info("Backup verification passed, proceeding with restore") + } + if o.Render { // Render mode: output YAML to STDOUT err := renderYAMLObject(restore) diff --git a/cmd/oadp/schedule.go b/cmd/oadp/schedule.go index decc65e5d95..ce6a0e4d766 100644 --- a/cmd/oadp/schedule.go +++ b/cmd/oadp/schedule.go @@ -147,7 +147,7 @@ func (o *CreateOptions) RunSchedule(ctx context.Context) error { if o.Client != nil { // Validate HostedCluster exists and get platform o.Log.Info("Detecting hosted cluster platform...") - detectedPlatform, err := oadp.ValidateAndGetHostedClusterPlatform(ctx, o.Client, o.HCName, o.HCNamespace) + platformInfo, err := oadp.ValidateAndGetHostedClusterPlatformInfo(ctx, o.Client, o.HCName, o.HCNamespace) if err != nil { if o.Render { o.Log.Info("Warning: HostedCluster validation failed, using default platform (AWS)", "error", err.Error()) @@ -156,7 +156,8 @@ func (o *CreateOptions) RunSchedule(ctx context.Context) error { return fmt.Errorf("platform detection failed: %w", err) } } else { - platform = detectedPlatform + platform = platformInfo.Type + autoIncludeAgentNamespace(o, platformInfo) o.Log.Info("Detected platform", "platform", platform) } diff --git a/cmd/oadp/types.go b/cmd/oadp/types.go index 95d907f47a4..79e4b2fa50e 100644 --- a/cmd/oadp/types.go +++ b/cmd/oadp/types.go @@ -42,6 +42,7 @@ type CreateOptions struct { IncludeNamespaces []string RestorePVs *bool PreserveNodePorts *bool + VerifyBackup bool // Schedule-specific optional flags Paused bool diff --git a/cmd/oadp/verify_backup.go b/cmd/oadp/verify_backup.go new file mode 100644 index 00000000000..fb379683dc3 --- /dev/null +++ b/cmd/oadp/verify_backup.go @@ -0,0 +1,208 @@ +package oadp + +import ( + "context" + "fmt" + "time" + + "github.com/openshift/hypershift/cmd/log" + "github.com/openshift/hypershift/cmd/util" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/go-logr/logr" + "github.com/spf13/cobra" +) + +// VerifyOptions holds configuration for the verify oadp-backup command. +type VerifyOptions struct { + BackupName string + OADPNamespace string + Log logr.Logger + Client client.Client +} + +// VerifyResult captures the outcome of a single verification check. +type VerifyResult struct { + Check string + Passed bool + Detail string +} + +func NewVerifyBackupCommand() *cobra.Command { + opts := &VerifyOptions{ + Log: log.Log, + } + + cmd := &cobra.Command{ + Use: "oadp-backup", + Short: "Verify integrity of an OADP backup before restoring", + Long: `Verify the integrity of a Velero backup created for a hosted cluster. + +Runs a series of pre-restore checks to ensure the backup is usable: + - Backup exists and phase is Completed + - Backup has not expired + - Backup contains backed-up items + - Backup storage location is available + - Reports any warnings or errors from the backup + +Examples: + # Verify a specific backup + hypershift verify oadp-backup --name example-clusters-lkbtzw + + # Verify with custom OADP namespace + hypershift verify oadp-backup --name example-clusters-lkbtzw --oadp-namespace custom-adp`, + SilenceUsage: true, + RunE: func(cmd *cobra.Command, args []string) error { + return opts.Run(cmd.Context()) + }, + } + + cmd.Flags().StringVar(&opts.BackupName, "name", "", "Name of the backup to verify (required)") + cmd.Flags().StringVar(&opts.OADPNamespace, "oadp-namespace", "openshift-adp", "Namespace where OADP operator is installed") + + _ = cmd.MarkFlagRequired("name") + + return cmd +} + +func (o *VerifyOptions) Run(ctx context.Context) error { + if o.Client == nil { + var err error + o.Client, err = util.GetClient() + if err != nil { + return fmt.Errorf("failed to create kubernetes client: %w", err) + } + } + + results, err := VerifyBackup(ctx, o.Client, o.BackupName, o.OADPNamespace, o.Log) + if err != nil { + return err + } + + allPassed := true + for _, r := range results { + if r.Passed { + o.Log.Info("Backup verification", "check", r.Check, "result", "pass", "detail", r.Detail) + } else { + o.Log.Info("Backup verification", "check", r.Check, "result", "FAIL", "detail", r.Detail) + allPassed = false + } + } + + if !allPassed { + return fmt.Errorf("backup verification failed for '%s'", o.BackupName) + } + + o.Log.Info("Backup verification passed", "backup", o.BackupName) + return nil +} + +// VerifyBackup runs all integrity checks on a backup and returns the results. +// This function is shared between the standalone verify command and the --verify flag on restore. +func VerifyBackup(ctx context.Context, c client.Client, backupName, oadpNamespace string, _ logr.Logger) ([]VerifyResult, error) { + var results []VerifyResult + + backup := &unstructured.Unstructured{} + backup.SetAPIVersion("velero.io/v1") + backup.SetKind("Backup") + + key := client.ObjectKey{Name: backupName, Namespace: oadpNamespace} + if err := c.Get(ctx, key, backup); err != nil { + return nil, fmt.Errorf("backup '%s' not found in namespace '%s': %w", backupName, oadpNamespace, err) + } + results = append(results, VerifyResult{Check: "exists", Passed: true, Detail: "backup found"}) + + // Check phase + phase, _, _ := unstructured.NestedString(backup.Object, "status", "phase") + switch phase { + case "Completed": + results = append(results, VerifyResult{Check: "phase", Passed: true, Detail: phase}) + case "PartiallyFailed": + results = append(results, VerifyResult{Check: "phase", Passed: false, Detail: fmt.Sprintf("phase is %s — some items may not have been backed up", phase)}) + case "": + results = append(results, VerifyResult{Check: "phase", Passed: false, Detail: "phase not set — backup may still be in progress"}) + default: + results = append(results, VerifyResult{Check: "phase", Passed: false, Detail: fmt.Sprintf("phase is %s", phase)}) + } + + // Check expiration + expirationStr, expFound, _ := unstructured.NestedString(backup.Object, "status", "expiration") + if expFound && expirationStr != "" { + expTime, err := time.Parse(time.RFC3339, expirationStr) + if err == nil { + remaining := time.Until(expTime) + if remaining <= 0 { + results = append(results, VerifyResult{Check: "expiration", Passed: false, Detail: fmt.Sprintf("backup expired %s ago", (-remaining).Truncate(time.Minute))}) + } else if remaining < time.Hour { + results = append(results, VerifyResult{Check: "expiration", Passed: true, Detail: fmt.Sprintf("expires in %s (less than 1h remaining)", remaining.Truncate(time.Minute))}) + } else { + results = append(results, VerifyResult{Check: "expiration", Passed: true, Detail: fmt.Sprintf("expires in %s", remaining.Truncate(time.Minute))}) + } + } + } + + // Check items backed up + itemsBackedUp, itemsFound, _ := unstructured.NestedFieldNoCopy(backup.Object, "status", "progress", "itemsBackedUp") + if itemsFound { + count := toInt64(itemsBackedUp) + if count > 0 { + results = append(results, VerifyResult{Check: "items", Passed: true, Detail: fmt.Sprintf("%d items backed up", count)}) + } else { + results = append(results, VerifyResult{Check: "items", Passed: false, Detail: "zero items backed up"}) + } + } else { + results = append(results, VerifyResult{Check: "items", Passed: false, Detail: "no progress information available"}) + } + + // Check backup storage location + storageLocation, slFound, _ := unstructured.NestedString(backup.Object, "spec", "storageLocation") + if slFound && storageLocation != "" { + bsl := &unstructured.Unstructured{} + bsl.SetGroupVersionKind(schema.GroupVersionKind{ + Group: "velero.io", + Version: "v1", + Kind: "BackupStorageLocation", + }) + bslKey := client.ObjectKey{Name: storageLocation, Namespace: oadpNamespace} + if err := c.Get(ctx, bslKey, bsl); err != nil { + results = append(results, VerifyResult{Check: "storage_location", Passed: false, Detail: fmt.Sprintf("BSL '%s' not found: %v", storageLocation, err)}) + } else { + bslPhase, _, _ := unstructured.NestedString(bsl.Object, "status", "phase") + if bslPhase == "Available" { + results = append(results, VerifyResult{Check: "storage_location", Passed: true, Detail: fmt.Sprintf("BSL '%s' is Available", storageLocation)}) + } else { + results = append(results, VerifyResult{Check: "storage_location", Passed: false, Detail: fmt.Sprintf("BSL '%s' phase is '%s' (expected Available)", storageLocation, bslPhase)}) + } + } + } + + // Check warnings and errors + warnings, _, _ := unstructured.NestedFieldNoCopy(backup.Object, "status", "warnings") + errors, _, _ := unstructured.NestedFieldNoCopy(backup.Object, "status", "errors") + warnCount := toInt64(warnings) + errCount := toInt64(errors) + if errCount > 0 { + results = append(results, VerifyResult{Check: "errors", Passed: false, Detail: fmt.Sprintf("%d errors, %d warnings", errCount, warnCount)}) + } else if warnCount > 0 { + results = append(results, VerifyResult{Check: "warnings", Passed: true, Detail: fmt.Sprintf("%d warnings (no errors)", warnCount)}) + } + + return results, nil +} + +func toInt64(val interface{}) int64 { + switch v := val.(type) { + case int64: + return v + case float64: + return int64(v) + case int: + return int64(v) + default: + return 0 + } +} diff --git a/cmd/verify/verify.go b/cmd/verify/verify.go new file mode 100644 index 00000000000..4d024f2f789 --- /dev/null +++ b/cmd/verify/verify.go @@ -0,0 +1,19 @@ +package verify + +import ( + "github.com/openshift/hypershift/cmd/oadp" + + "github.com/spf13/cobra" +) + +func NewCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "verify", + Short: "Commands for verifying HyperShift resources", + SilenceUsage: true, + } + + cmd.AddCommand(oadp.NewVerifyBackupCommand()) + + return cmd +} diff --git a/main.go b/main.go index 3287a92bd90..f42855c6f19 100644 --- a/main.go +++ b/main.go @@ -28,7 +28,9 @@ import ( destroycmd "github.com/openshift/hypershift/cmd/destroy" dumpcmd "github.com/openshift/hypershift/cmd/dump" fixcmd "github.com/openshift/hypershift/cmd/fix" + getcmd "github.com/openshift/hypershift/cmd/get" installcmd "github.com/openshift/hypershift/cmd/install" + verifycmd "github.com/openshift/hypershift/cmd/verify" cliversion "github.com/openshift/hypershift/cmd/version" "github.com/openshift/hypershift/support/supportedversion" @@ -64,6 +66,8 @@ func main() { cmd.AddCommand(installcmd.NewCommand()) cmd.AddCommand(createcmd.NewCommand()) cmd.AddCommand(destroycmd.NewCommand()) + cmd.AddCommand(getcmd.NewCommand()) + cmd.AddCommand(verifycmd.NewCommand()) cmd.AddCommand(dumpcmd.NewCommand()) cmd.AddCommand(fixcmd.NewCommand()) cmd.AddCommand(consolelogs.NewCommand()) diff --git a/support/oadp/validate.go b/support/oadp/validate.go index 98a0e05284f..992bc41e8f6 100644 --- a/support/oadp/validate.go +++ b/support/oadp/validate.go @@ -106,9 +106,24 @@ func VerifyDPAStatus(ctx context.Context, c client.Client, namespace string) err return fmt.Errorf("no ready DataProtectionApplication found in namespace %s", namespace) } +// PlatformInfo contains platform metadata extracted from a HostedCluster. +type PlatformInfo struct { + Type string + AgentNamespace string +} + // ValidateAndGetHostedClusterPlatform validates that the HostedCluster exists and returns its platform func ValidateAndGetHostedClusterPlatform(ctx context.Context, c client.Client, hcName, hcNamespace string) (string, error) { - // Get the HostedCluster resource using typed API + info, err := ValidateAndGetHostedClusterPlatformInfo(ctx, c, hcName, hcNamespace) + if err != nil { + return "", err + } + return info.Type, nil +} + +// ValidateAndGetHostedClusterPlatformInfo validates that the HostedCluster exists and returns +// platform metadata including the agent namespace for Agent platform clusters. +func ValidateAndGetHostedClusterPlatformInfo(ctx context.Context, c client.Client, hcName, hcNamespace string) (*PlatformInfo, error) { hostedCluster := &hypershiftv1beta1.HostedCluster{} err := c.Get(ctx, types.NamespacedName{ @@ -116,17 +131,23 @@ func ValidateAndGetHostedClusterPlatform(ctx context.Context, c client.Client, h Namespace: hcNamespace, }, hostedCluster) if err != nil { - return "", fmt.Errorf("HostedCluster '%s' not found in namespace '%s': %w", hcName, hcNamespace, err) + return nil, fmt.Errorf("HostedCluster '%s' not found in namespace '%s': %w", hcName, hcNamespace, err) } - // Extract the platform from the spec platformSpec := hostedCluster.Spec.Platform if platformSpec.Type == "" { - return "", fmt.Errorf("platform type not found in HostedCluster '%s' spec", hcName) + return nil, fmt.Errorf("platform type not found in HostedCluster '%s' spec", hcName) + } + + info := &PlatformInfo{ + Type: strings.ToUpper(string(platformSpec.Type)), + } + + if platformSpec.Type == hypershiftv1beta1.AgentPlatform && platformSpec.Agent != nil { + info.AgentNamespace = platformSpec.Agent.AgentNamespace } - // Normalize platform name to uppercase for consistency - return strings.ToUpper(string(platformSpec.Type)), nil + return info, nil } // CheckDPAHypershiftPlugin checks if the hypershift plugin is configured in DataProtectionApplication resources