From f8b200e5a14a0b47fa4a1b6df923ee0d5314b86d Mon Sep 17 00:00:00 2001 From: Michael Shen Date: Wed, 17 Jun 2026 15:03:18 -0700 Subject: [PATCH] fix(helm): enable graceful termination and overrides for celery worker Currently, the worker startup command runs celery without `exec`. When the shell script is the container's entrypoint, the shell stays PID 1 and celery runs as its child. Since the shell installs no handler for SIGTERM, the signal delivered on Kubernetes pod termination never reaches celery, causing it to get SIGKILLed when the grace period expires. This drops in-flight tasks and leaves the worker status as online. Adding `exec` before the celery command ensures that celery replaces the shell as PID 1, allowing it to receive and handle termination signals directly. Additionally, this adds support for overriding `lifecycle` hooks and `terminationGracePeriodSeconds` for the superset worker deployment, matching the server (supersetNode) capabilities to allow custom shutdown commands (such as celery control shutdown) and extended drain periods." Signed-off-by: Michael Shen --- helm/superset/Chart.yaml | 2 +- helm/superset/README.md | 6 +++++- helm/superset/templates/deployment-worker.yaml | 6 ++++++ helm/superset/templates/deployment.yaml | 6 ++++++ helm/superset/values.yaml | 12 +++++++++++- 5 files changed, 29 insertions(+), 3 deletions(-) diff --git a/helm/superset/Chart.yaml b/helm/superset/Chart.yaml index 0018ba4a1562..a464380d2460 100644 --- a/helm/superset/Chart.yaml +++ b/helm/superset/Chart.yaml @@ -29,7 +29,7 @@ maintainers: - name: craig-rueda email: craig@craigrueda.com url: https://github.com/craig-rueda -version: 0.16.2 # See [README](https://github.com/apache/superset/blob/master/helm/superset/README.md#versioning) for version details. +version: 0.16.3 # See [README](https://github.com/apache/superset/blob/master/helm/superset/README.md#versioning) for version details. dependencies: - name: postgresql version: 16.7.27 diff --git a/helm/superset/README.md b/helm/superset/README.md index d638e08c5a19..1caf36460a82 100644 --- a/helm/superset/README.md +++ b/helm/superset/README.md @@ -23,7 +23,7 @@ NOTE: This file is generated by helm-docs: https://github.com/norwoodj/helm-docs # superset -![Version: 0.16.2](https://img.shields.io/badge/Version-0.16.2-informational?style=flat-square) +![Version: 0.16.3](https://img.shields.io/badge/Version-0.16.3-informational?style=flat-square) Apache Superset is a modern, enterprise-ready business intelligence web application @@ -219,6 +219,7 @@ On helm this can be set on `extraSecretEnv.SUPERSET_SECRET_KEY` or `configOverri | supersetNode.extraContainers | list | `[]` | Launch additional containers into supersetNode pod | | supersetNode.forceReload | bool | `false` | If true, forces deployment to reload on each upgrade | | supersetNode.initContainers | list | a container waiting for postgres | Init containers | +| supersetNode.lifecycle | object | `{}` | Container lifecycle hooks, e.g. a preStop sleep so the Service/Ingress stops routing to the pod before gunicorn receives SIGTERM | | supersetNode.livenessProbe.failureThreshold | int | `3` | | | supersetNode.livenessProbe.httpGet.path | string | `"/health"` | | | supersetNode.livenessProbe.httpGet.port | string | `"http"` | | @@ -251,6 +252,7 @@ On helm this can be set on `extraSecretEnv.SUPERSET_SECRET_KEY` or `configOverri | supersetNode.startupProbe.successThreshold | int | `1` | | | supersetNode.startupProbe.timeoutSeconds | int | `1` | | | supersetNode.strategy | object | `{}` | | +| supersetNode.terminationGracePeriodSeconds | string | `nil` | Pod termination grace period (seconds). Set greater than GUNICORN_TIMEOUT so in-flight requests can drain before SIGKILL | | supersetNode.topologySpreadConstraints | list | `[]` | TopologySpreadConstrains to be added to supersetNode deployments | | supersetWebsockets.affinity | object | `{}` | Affinity to be added to supersetWebsockets deployment | | supersetWebsockets.command | list | `[]` | | @@ -314,6 +316,7 @@ On helm this can be set on `extraSecretEnv.SUPERSET_SECRET_KEY` or `configOverri | supersetWorker.extraContainers | list | `[]` | Launch additional containers into supersetWorker pod | | supersetWorker.forceReload | bool | `false` | If true, forces deployment to reload on each upgrade | | supersetWorker.initContainers | list | a container waiting for postgres and redis | Init container | +| supersetWorker.lifecycle | object | `{}` | Container lifecycle hooks for the worker pod | | supersetWorker.livenessProbe.exec.command | list | a `celery inspect ping` command | Liveness probe command | | supersetWorker.livenessProbe.failureThreshold | int | `3` | | | supersetWorker.livenessProbe.initialDelaySeconds | int | `120` | | @@ -334,6 +337,7 @@ On helm this can be set on `extraSecretEnv.SUPERSET_SECRET_KEY` or `configOverri | supersetWorker.resources | object | `{}` | Resource settings for the supersetWorker pods - these settings overwrite might existing values from the global resources object defined above. | | supersetWorker.startupProbe | object | `{}` | No startup/readiness probes by default since we don't really care about its startup time (it doesn't serve traffic) | | supersetWorker.strategy | object | `{}` | | +| supersetWorker.terminationGracePeriodSeconds | string | `nil` | Pod termination grace period (seconds) for the worker pod so in-flight tasks can drain before SIGKILL | | supersetWorker.topologySpreadConstraints | list | `[]` | TopologySpreadConstrains to be added to supersetWorker deployments | | tolerations | list | `[]` | | | topologySpreadConstraints | list | `[]` | TopologySpreadConstrains to be added to all deployments | diff --git a/helm/superset/templates/deployment-worker.yaml b/helm/superset/templates/deployment-worker.yaml index d559b7f9e902..5c0bf65e3efc 100644 --- a/helm/superset/templates/deployment-worker.yaml +++ b/helm/superset/templates/deployment-worker.yaml @@ -134,6 +134,9 @@ spec: {{- if .Values.supersetWorker.livenessProbe }} livenessProbe: {{- .Values.supersetWorker.livenessProbe | toYaml | nindent 12 }} {{- end }} + {{- if .Values.supersetWorker.lifecycle }} + lifecycle: {{- .Values.supersetWorker.lifecycle | toYaml | nindent 12 }} + {{- end }} resources: {{- if .Values.supersetWorker.resources }} {{- toYaml .Values.supersetWorker.resources | nindent 12 }} @@ -170,6 +173,9 @@ spec: {{- with .Values.tolerations }} tolerations: {{- toYaml . | nindent 8 }} {{- end }} + {{- if .Values.supersetWorker.terminationGracePeriodSeconds }} + terminationGracePeriodSeconds: {{ .Values.supersetWorker.terminationGracePeriodSeconds }} + {{- end }} {{- if .Values.imagePullSecrets }} imagePullSecrets: {{- toYaml .Values.imagePullSecrets | nindent 8 }} {{- end }} diff --git a/helm/superset/templates/deployment.yaml b/helm/superset/templates/deployment.yaml index c9c4e19db5f5..08ceee07f2b5 100644 --- a/helm/superset/templates/deployment.yaml +++ b/helm/superset/templates/deployment.yaml @@ -144,6 +144,9 @@ spec: {{- if .Values.supersetNode.livenessProbe }} livenessProbe: {{- .Values.supersetNode.livenessProbe | toYaml | nindent 12 }} {{- end }} + {{- if .Values.supersetNode.lifecycle }} + lifecycle: {{- .Values.supersetNode.lifecycle | toYaml | nindent 12 }} + {{- end }} resources: {{- if .Values.supersetNode.resources }} {{- toYaml .Values.supersetNode.resources | nindent 12 }} @@ -180,6 +183,9 @@ spec: {{- with .Values.tolerations }} tolerations: {{- toYaml . | nindent 8 }} {{- end }} + {{- if .Values.supersetNode.terminationGracePeriodSeconds }} + terminationGracePeriodSeconds: {{ .Values.supersetNode.terminationGracePeriodSeconds }} + {{- end }} {{- if .Values.imagePullSecrets }} imagePullSecrets: {{- toYaml .Values.imagePullSecrets | nindent 8 }} {{- end }} diff --git a/helm/superset/values.yaml b/helm/superset/values.yaml index fc5124004f76..7a8fa970e04c 100644 --- a/helm/superset/values.yaml +++ b/helm/superset/values.yaml @@ -360,6 +360,12 @@ supersetNode: failureThreshold: 3 periodSeconds: 15 successThreshold: 1 + # -- Container lifecycle hooks, e.g. a preStop sleep so the Service/Ingress + # stops routing to the pod before gunicorn receives SIGTERM + lifecycle: {} + # -- Pod termination grace period (seconds). Set greater than GUNICORN_TIMEOUT so + # in-flight requests can drain before SIGKILL + terminationGracePeriodSeconds: ~ # -- Resource settings for the supersetNode pods - these settings overwrite might existing values from the global resources object defined above. resources: {} # limits: @@ -400,7 +406,7 @@ supersetWorker: command: - "/bin/sh" - "-c" - - ". {{ .Values.configMountPath }}/superset_bootstrap.sh; celery --app=superset.tasks.celery_app:app worker" + - ". {{ .Values.configMountPath }}/superset_bootstrap.sh; exec celery --app=superset.tasks.celery_app:app worker" # -- If true, forces deployment to reload on each upgrade forceReload: false # -- Init container @@ -464,6 +470,10 @@ supersetWorker: failureThreshold: 3 periodSeconds: 60 successThreshold: 1 + # -- Container lifecycle hooks for the worker pod + lifecycle: {} + # -- Pod termination grace period (seconds) for the worker pod so in-flight tasks can drain before SIGKILL + terminationGracePeriodSeconds: ~ # -- No startup/readiness probes by default since we don't really care about its startup time (it doesn't serve traffic) startupProbe: {} # -- No startup/readiness probes by default since we don't really care about its startup time (it doesn't serve traffic)