diff --git a/cmd/main.go b/cmd/main.go index a7fcbef9..af80522d 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -151,10 +151,11 @@ func main() { os.Exit(1) } if err = (&controller.StoreExecReconciler{ - Client: nsClient, - Logger: logger, - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorderFor(fmt.Sprintf("shopware-controller-%s", cfg.Namespace)), + Client: nsClient, + Logger: logger, + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor(fmt.Sprintf("shopware-controller-%s", cfg.Namespace)), + CleanupGracePeriod: cfg.SuccessfulCRCleanupGracePeriod, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create exec controller", "controller", "StoreExec") os.Exit(1) @@ -184,10 +185,11 @@ func main() { os.Exit(1) } if err = (&controller.StoreDebugInstanceReconciler{ - Client: nsClient, - Logger: logger, - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorderFor(fmt.Sprintf("shopware-controller-%s", cfg.Namespace)), + Client: nsClient, + Logger: logger, + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor(fmt.Sprintf("shopware-controller-%s", cfg.Namespace)), + CleanupGracePeriod: cfg.SuccessfulCRCleanupGracePeriod, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create instance controller", "controller", "StoreDebugInstance") os.Exit(1) diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml index 28a1e05b..6d28c623 100644 --- a/helm/templates/deployment.yaml +++ b/helm/templates/deployment.yaml @@ -79,6 +79,8 @@ spec: value: "{{ .Values.logFormat | default "json" }}" - name: DISABLE_CHECKS value: "{{ .Values.disableChecks | default "false" }}" + - name: SUCCESSFUL_CR_CLEANUP_GRACE_PERIOD + value: "{{ .Values.successfulCRCleanupGracePeriod | default "1h" }}" {{- if and (hasKey .Values "events") (hasKey .Values.events "nats") (.Values.events.nats.enable) }} - name: NATS_ENABLE value: "true" diff --git a/helm/values.yaml b/helm/values.yaml index c552c801..f2367bb2 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -94,3 +94,5 @@ logFormat: json # Disable check for s3/database/fastly and Opensearch checks. Useful if network access is not given for one of the services. # This is a global level. You can also control this per store. disableChecks: false +# Grace period before successful StoreExec and StoreDebugInstance CRs are deleted. Set to "0" to disable cleanup. +successfulCRCleanupGracePeriod: 1h diff --git a/internal/config/config.go b/internal/config/config.go index 48861daa..7ae9e96a 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "strings" + "time" "github.com/sethvargo/go-envconfig" ) @@ -65,6 +66,8 @@ type StoreConfig struct { EnableLeaderElection bool `env:"LEADER_ELECT, default=true"` DisableChecks bool `env:"DISABLE_CHECKS, default=false"` Namespace string `env:"NAMESPACE, default=default"` + + SuccessfulCRCleanupGracePeriod time.Duration `env:"SUCCESSFUL_CR_CLEANUP_GRACE_PERIOD, default=1h"` } type Config struct { diff --git a/internal/controller/storedebuginstance_controller.go b/internal/controller/storedebuginstance_controller.go index f22d7e0d..b4cab36e 100644 --- a/internal/controller/storedebuginstance_controller.go +++ b/internal/controller/storedebuginstance_controller.go @@ -40,9 +40,10 @@ import ( // StoreDebugInstanceReconciler reconciles a StoreDebugInstance object type StoreDebugInstanceReconciler struct { client.Client - Scheme *runtime.Scheme - Recorder record.EventRecorder - Logger *zap.SugaredLogger + Scheme *runtime.Scheme + Recorder record.EventRecorder + Logger *zap.SugaredLogger + CleanupGracePeriod time.Duration } // +kubebuilder:rbac:groups=shop.shopware.com,namespace=default,resources=storedebuginstances,verbs=get;list;watch;create;update;patch;delete @@ -59,8 +60,12 @@ func (r *StoreDebugInstanceReconciler) Reconcile(ctx context.Context, req ctrl.R var store *shopv1.Store var storeDebugInstance *shopv1.StoreDebugInstance + var skipStatusUpdate bool rr = ctrl.Result{RequeueAfter: 10 * time.Second} defer func() { + if skipStatusUpdate { + return + } if err := r.reconcileCRStatus(ctx, store, storeDebugInstance, err); err != nil { log.Errorw("failed to update status", zap.Error(err)) } @@ -80,6 +85,16 @@ func (r *StoreDebugInstanceReconciler) Reconcile(ctx context.Context, req ctrl.R return rr, fmt.Errorf("invalid duration: %w", err) } + if result, deleted, cleanupErr := r.deleteSuccessfulStoreDebugInstanceIfCleanupDue(ctx, storeDebugInstance); deleted || cleanupErr != nil { + if cleanupErr != nil { + log.Errorw("failed to cleanup successful store debug instance", zap.Error(cleanupErr)) + skipStatusUpdate = true + return rr, nil + } + skipStatusUpdate = true + return result, nil + } + store, err = k8s.GetStore(ctx, r.Client, types.NamespacedName{ Namespace: req.Namespace, Name: storeDebugInstance.Spec.StoreRef, @@ -106,6 +121,15 @@ func (r *StoreDebugInstanceReconciler) Reconcile(ctx context.Context, req ctrl.R return rr, nil } + if result, handled, cleanupErr := r.reconcileSuccessfulStoreDebugInstanceCleanup(ctx, storeDebugInstance); handled || cleanupErr != nil { + if cleanupErr != nil { + log.Errorw("failed to cleanup successful store debug instance", zap.Error(cleanupErr)) + return rr, nil + } + skipStatusUpdate = true + return result, nil + } + rr.Requeue = false return rr, nil } @@ -181,6 +205,63 @@ func (r *StoreDebugInstanceReconciler) reconcilePod(ctx context.Context, store * return nil } +func (r *StoreDebugInstanceReconciler) deleteSuccessfulStoreDebugInstanceIfCleanupDue( + ctx context.Context, + storeDebugInstance *shopv1.StoreDebugInstance, +) (ctrl.Result, bool, error) { + if !r.isStoreDebugInstanceCleanupEligible(storeDebugInstance) { + return ctrl.Result{}, false, nil + } + + if remaining := r.storeDebugInstanceCleanupRemaining(storeDebugInstance); remaining > 0 { + return ctrl.Result{}, false, nil + } + + return r.deleteSuccessfulStoreDebugInstance(ctx, storeDebugInstance) +} + +func (r *StoreDebugInstanceReconciler) reconcileSuccessfulStoreDebugInstanceCleanup( + ctx context.Context, + storeDebugInstance *shopv1.StoreDebugInstance, +) (ctrl.Result, bool, error) { + if !r.isStoreDebugInstanceCleanupEligible(storeDebugInstance) { + return ctrl.Result{}, false, nil + } + + if remaining := r.storeDebugInstanceCleanupRemaining(storeDebugInstance); remaining > 0 { + return ctrl.Result{RequeueAfter: remaining}, true, nil + } + + return r.deleteSuccessfulStoreDebugInstance(ctx, storeDebugInstance) +} + +func (r *StoreDebugInstanceReconciler) isStoreDebugInstanceCleanupEligible( + storeDebugInstance *shopv1.StoreDebugInstance, +) bool { + return r.CleanupGracePeriod > 0 && + storeDebugInstance.DeletionTimestamp == nil && + storeDebugInstance.IsState(shopv1.StoreDebugInstanceStateDone) +} + +func (r *StoreDebugInstanceReconciler) storeDebugInstanceCleanupRemaining( + storeDebugInstance *shopv1.StoreDebugInstance, +) time.Duration { + duration, _ := time.ParseDuration(storeDebugInstance.Spec.Duration) + deleteAfter := storeDebugInstance.CreationTimestamp.Add(duration).Add(r.CleanupGracePeriod) + return time.Until(deleteAfter) +} + +func (r *StoreDebugInstanceReconciler) deleteSuccessfulStoreDebugInstance( + ctx context.Context, + storeDebugInstance *shopv1.StoreDebugInstance, +) (ctrl.Result, bool, error) { + if err := r.Delete(ctx, storeDebugInstance); err != nil && !k8serrors.IsNotFound(err) { + return ctrl.Result{}, false, fmt.Errorf("delete successful StoreDebugInstance: %w", err) + } + + return ctrl.Result{Requeue: false}, true, nil +} + func (r *StoreDebugInstanceReconciler) reconcileService(ctx context.Context, store *shopv1.Store, storeDebugInstance *shopv1.StoreDebugInstance) error { svc := service.DebugService(*store, *storeDebugInstance) diff --git a/internal/controller/storeexec_controller.go b/internal/controller/storeexec_controller.go index c702e2a4..3b55baaa 100644 --- a/internal/controller/storeexec_controller.go +++ b/internal/controller/storeexec_controller.go @@ -20,9 +20,10 @@ import ( type StoreExecReconciler struct { client.Client - Scheme *runtime.Scheme - Recorder record.EventRecorder - Logger *zap.SugaredLogger + Scheme *runtime.Scheme + Recorder record.EventRecorder + Logger *zap.SugaredLogger + CleanupGracePeriod time.Duration } // +kubebuilder:rbac:groups=shop.shopware.com,namespace=default,resources=storeexecs,verbs=get;list;watch;create;update;patch;delete @@ -42,7 +43,11 @@ func (r *StoreExecReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( var ex *v1.StoreExec var store *v1.Store + var skipStatusUpdate bool defer func() { + if skipStatusUpdate { + return + } if err := r.reconcileCRStatus(ctx, store, ex, err); err != nil { log.Errorw("failed to update status", zap.Error(err)) } @@ -57,6 +62,16 @@ func (r *StoreExecReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return rr, nil } + if result, handled, cleanupErr := r.reconcileSuccessfulStoreExecCleanup(ctx, ex); handled || cleanupErr != nil { + if cleanupErr != nil { + log.Errorw("failed to cleanup successful store-exec", zap.Error(cleanupErr)) + skipStatusUpdate = true + return rr, nil + } + skipStatusUpdate = true + return result, nil + } + store, err = k8s.GetStore(ctx, r.Client, types.NamespacedName{ Namespace: req.Namespace, Name: ex.Spec.StoreRef, @@ -131,6 +146,49 @@ func (r *StoreExecReconciler) reconcileJob(ctx context.Context, store *v1.Store, return nil } +func (r *StoreExecReconciler) reconcileSuccessfulStoreExecCleanup( + ctx context.Context, + ex *v1.StoreExec, +) (ctrl.Result, bool, error) { + if r.CleanupGracePeriod <= 0 || + ex.DeletionTimestamp != nil || + ex.Spec.CronSchedule != "" || + !ex.IsState(v1.ExecStateDone) { + return ctrl.Result{}, false, nil + } + + deleteAfter := storeExecFinishedAt(ex).Add(r.CleanupGracePeriod) + if remaining := time.Until(deleteAfter); remaining > 0 { + return ctrl.Result{RequeueAfter: remaining}, true, nil + } + + if err := r.Delete(ctx, ex); err != nil && !k8serrors.IsNotFound(err) { + return ctrl.Result{}, false, fmt.Errorf("delete successful StoreExec: %w", err) + } + + return ctrl.Result{Requeue: false}, true, nil +} + +func storeExecFinishedAt(ex *v1.StoreExec) time.Time { + for i := len(ex.Status.Conditions) - 1; i >= 0; i-- { + if !ex.Status.Conditions[i].LastTransitionTime.IsZero() { + return ex.Status.Conditions[i].LastTransitionTime.Time + } + } + + for i := len(ex.Status.Conditions) - 1; i >= 0; i-- { + if !ex.Status.Conditions[i].LastUpdateTime.IsZero() { + return ex.Status.Conditions[i].LastUpdateTime.Time + } + } + + if !ex.CreationTimestamp.IsZero() { + return ex.CreationTimestamp.Time + } + + return time.Now() +} + func (r *StoreExecReconciler) reconcileCronJob(ctx context.Context, store *v1.Store, exec *v1.StoreExec) (err error) { var changed bool obj := job.CommandCronJob(*store, *exec) diff --git a/internal/controller/successful_cleanup_test.go b/internal/controller/successful_cleanup_test.go new file mode 100644 index 00000000..dcbd4467 --- /dev/null +++ b/internal/controller/successful_cleanup_test.go @@ -0,0 +1,249 @@ +package controller + +import ( + "context" + "testing" + "time" + + shopv1 "github.com/shopware/shopware-operator/api/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +const cleanupTestNamespace = "default" + +func TestStoreExecSuccessfulCleanupDeletesDoneOneShotAfterGracePeriod(t *testing.T) { + ctx := context.Background() + ex := storeExecForCleanup("done-old", shopv1.ExecStateDone, time.Now().Add(-2*time.Hour)) + cl := fake.NewClientBuilder(). + WithScheme(cleanupTestScheme(t)). + WithObjects(ex). + Build() + + reconciler := StoreExecReconciler{ + Client: cl, + CleanupGracePeriod: time.Hour, + } + + result, handled, err := reconciler.reconcileSuccessfulStoreExecCleanup(ctx, ex) + + require.NoError(t, err) + assert.True(t, handled) + assert.False(t, result.Requeue) + + got := &shopv1.StoreExec{} + err = cl.Get(ctx, types.NamespacedName{Namespace: cleanupTestNamespace, Name: ex.Name}, got) + assert.True(t, k8serrors.IsNotFound(err)) +} + +func TestStoreExecSuccessfulCleanupRetainsDoneOneShotBeforeGracePeriod(t *testing.T) { + ctx := context.Background() + ex := storeExecForCleanup("done-new", shopv1.ExecStateDone, time.Now().Add(-30*time.Minute)) + cl := fake.NewClientBuilder(). + WithScheme(cleanupTestScheme(t)). + WithObjects(ex). + Build() + + reconciler := StoreExecReconciler{ + Client: cl, + CleanupGracePeriod: time.Hour, + } + + result, handled, err := reconciler.reconcileSuccessfulStoreExecCleanup(ctx, ex) + + require.NoError(t, err) + assert.True(t, handled) + assert.Greater(t, result.RequeueAfter, time.Duration(0)) + + got := &shopv1.StoreExec{} + require.NoError(t, cl.Get(ctx, types.NamespacedName{Namespace: cleanupTestNamespace, Name: ex.Name}, got)) +} + +func TestStoreExecSuccessfulCleanupRetainsCronAndFailedResources(t *testing.T) { + ctx := context.Background() + scheme := cleanupTestScheme(t) + + tests := []struct { + name string + ex *shopv1.StoreExec + }{ + { + name: "cron done", + ex: func() *shopv1.StoreExec { + ex := storeExecForCleanup("cron-done", shopv1.ExecStateDone, time.Now().Add(-2*time.Hour)) + ex.Spec.CronSchedule = "*/5 * * * *" + return ex + }(), + }, + { + name: "error", + ex: storeExecForCleanup("error", shopv1.ExecStateError, time.Now().Add(-2*time.Hour)), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cl := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(tt.ex). + Build() + + reconciler := StoreExecReconciler{ + Client: cl, + CleanupGracePeriod: time.Hour, + } + + result, handled, err := reconciler.reconcileSuccessfulStoreExecCleanup(ctx, tt.ex) + + require.NoError(t, err) + assert.False(t, handled) + assert.Equal(t, time.Duration(0), result.RequeueAfter) + + got := &shopv1.StoreExec{} + require.NoError(t, cl.Get(ctx, types.NamespacedName{Namespace: cleanupTestNamespace, Name: tt.ex.Name}, got)) + }) + } +} + +func TestStoreDebugInstanceSuccessfulCleanupUsesDurationAndGracePeriod(t *testing.T) { + ctx := context.Background() + scheme := cleanupTestScheme(t) + + tests := []struct { + name string + objectName string + creationTime time.Time + expectDeleted bool + }{ + { + name: "after duration and grace", + objectName: "debug-done-old", + creationTime: time.Now().Add(-3 * time.Hour), + expectDeleted: true, + }, + { + name: "before duration and grace", + objectName: "debug-done-new", + creationTime: time.Now().Add(-90 * time.Minute), + expectDeleted: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + debugInstance := storeDebugInstanceForCleanup(tt.objectName, tt.creationTime) + cl := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(debugInstance). + Build() + + reconciler := StoreDebugInstanceReconciler{ + Client: cl, + CleanupGracePeriod: time.Hour, + } + + result, handled, err := reconciler.reconcileSuccessfulStoreDebugInstanceCleanup(ctx, debugInstance) + + require.NoError(t, err) + assert.True(t, handled) + + got := &shopv1.StoreDebugInstance{} + err = cl.Get(ctx, types.NamespacedName{Namespace: cleanupTestNamespace, Name: debugInstance.Name}, got) + if tt.expectDeleted { + assert.True(t, k8serrors.IsNotFound(err)) + assert.False(t, result.Requeue) + return + } + + require.NoError(t, err) + assert.Greater(t, result.RequeueAfter, time.Duration(0)) + }) + } +} + +func TestSuccessfulCleanupDisabledWithZeroGracePeriod(t *testing.T) { + ctx := context.Background() + scheme := cleanupTestScheme(t) + + ex := storeExecForCleanup("disabled-exec", shopv1.ExecStateDone, time.Now().Add(-2*time.Hour)) + debugInstance := storeDebugInstanceForCleanup("disabled-debug", time.Now().Add(-3*time.Hour)) + cl := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(ex, debugInstance). + Build() + + execReconciler := StoreExecReconciler{Client: cl} + execResult, execHandled, err := execReconciler.reconcileSuccessfulStoreExecCleanup(ctx, ex) + require.NoError(t, err) + assert.False(t, execHandled) + assert.Equal(t, time.Duration(0), execResult.RequeueAfter) + + debugReconciler := StoreDebugInstanceReconciler{Client: cl} + debugResult, debugHandled, err := debugReconciler.reconcileSuccessfulStoreDebugInstanceCleanup(ctx, debugInstance) + require.NoError(t, err) + assert.False(t, debugHandled) + assert.Equal(t, time.Duration(0), debugResult.RequeueAfter) + + gotExec := &shopv1.StoreExec{} + require.NoError(t, cl.Get(ctx, types.NamespacedName{Namespace: cleanupTestNamespace, Name: ex.Name}, gotExec)) + + gotDebugInstance := &shopv1.StoreDebugInstance{} + require.NoError(t, cl.Get(ctx, types.NamespacedName{Namespace: cleanupTestNamespace, Name: debugInstance.Name}, gotDebugInstance)) +} + +func cleanupTestScheme(t *testing.T) *runtime.Scheme { + t.Helper() + + scheme := runtime.NewScheme() + require.NoError(t, shopv1.AddToScheme(scheme)) + + return scheme +} + +func storeExecForCleanup(name string, state shopv1.StatefulState, finishedAt time.Time) *shopv1.StoreExec { + return &shopv1.StoreExec{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: cleanupTestNamespace, + CreationTimestamp: metav1.NewTime(finishedAt.Add(-time.Hour)), + }, + Spec: shopv1.StoreExecSpec{ + StoreRef: "test", + Script: "echo test", + }, + Status: shopv1.StoreExecStatus{ + State: state, + Conditions: []shopv1.ExecCondition{ + { + Type: shopv1.ExecStateRunning, + LastTransitionTime: metav1.NewTime(finishedAt), + LastUpdateTime: metav1.NewTime(finishedAt), + Message: "Command finished", + Status: "True", + }, + }, + }, + } +} + +func storeDebugInstanceForCleanup(name string, creationTime time.Time) *shopv1.StoreDebugInstance { + return &shopv1.StoreDebugInstance{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: cleanupTestNamespace, + CreationTimestamp: metav1.NewTime(creationTime), + }, + Spec: shopv1.StoreDebugInstanceSpec{ + StoreRef: "test", + Duration: "1h", + }, + Status: shopv1.StoreDebugInstanceStatus{ + State: shopv1.StoreDebugInstanceStateDone, + }, + } +}