diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index eb422ac238a..cc0cd0c73bf 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -7,6 +7,7 @@ ### CLI ### Bundles +* `bundle validate` now warns when a job defines a `continuous` block without `continuous.task_retry_mode`, since it defaults to `NEVER` and the continuous job's tasks are not retried on failure ([#5691](https://github.com/databricks/cli/pull/5691)). * Add documentation for the common bundle resource fields `permissions`, `lifecycle`, and `grants` in the JSON schema, so they surface in editor completions and the docs. * `bundle run` now prints the modern job run URL (`/jobs//runs/`) so that non-admin users permitted to view the run are taken to the run instead of the workspace homepage. * References to a registered model's `registered_model_id` now resolve under the direct engine, matching Terraform behavior ([#5621](https://github.com/databricks/cli/pull/5621)). diff --git a/acceptance/bundle/validate/continuous_task_retry_mode/databricks.yml b/acceptance/bundle/validate/continuous_task_retry_mode/databricks.yml new file mode 100644 index 00000000000..edb0de3f531 --- /dev/null +++ b/acceptance/bundle/validate/continuous_task_retry_mode/databricks.yml @@ -0,0 +1,25 @@ +bundle: + name: test-bundle + +resources: + jobs: + # Continuous job without task_retry_mode: should warn (defaults to NEVER). + continuous_no_retry: + name: continuous_no_retry + continuous: + pause_status: UNPAUSED + tasks: + - task_key: run_task + run_job_task: + job_id: 1234 + + # Continuous job with task_retry_mode set: should not warn. + continuous_with_retry: + name: continuous_with_retry + continuous: + pause_status: UNPAUSED + task_retry_mode: ON_FAILURE + tasks: + - task_key: run_task + run_job_task: + job_id: 1234 diff --git a/acceptance/bundle/validate/continuous_task_retry_mode/script b/acceptance/bundle/validate/continuous_task_retry_mode/script new file mode 100644 index 00000000000..5350876150f --- /dev/null +++ b/acceptance/bundle/validate/continuous_task_retry_mode/script @@ -0,0 +1 @@ +trace $CLI bundle validate diff --git a/bundle/config/validate/continuous_task_retry_mode.go b/bundle/config/validate/continuous_task_retry_mode.go new file mode 100644 index 00000000000..e0297608ecc --- /dev/null +++ b/bundle/config/validate/continuous_task_retry_mode.go @@ -0,0 +1,56 @@ +package validate + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/log" +) + +// ContinuousTaskRetryMode warns when a continuous job does not set task_retry_mode. +// task_retry_mode defaults to NEVER, so the tasks of a continuous job are not +// retried on failure unless it is explicitly set to ON_FAILURE. +func ContinuousTaskRetryMode() bundle.ReadOnlyMutator { + return &continuousTaskRetryMode{} +} + +type continuousTaskRetryMode struct{ bundle.RO } + +func (v *continuousTaskRetryMode) Name() string { + return "validate:continuous_task_retry_mode" +} + +const continuousTaskRetryWarningSummary = "Continuous job does not set task_retry_mode" + +const continuousTaskRetryWarningDetail = `task_retry_mode is not set on this continuous job, so it defaults to NEVER and the job's tasks are not retried on failure. +Set continuous.task_retry_mode to ON_FAILURE to enable task-level retries, or to NEVER to silence this warning.` + +func (v *continuousTaskRetryMode) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + diags := diag.Diagnostics{} + + pattern := dyn.NewPattern(dyn.Key("resources"), dyn.Key("jobs"), dyn.AnyKey(), dyn.Key("continuous")) + + _, err := dyn.MapByPattern(b.Config.Value(), pattern, func(p dyn.Path, continuous dyn.Value) (dyn.Value, error) { + // An explicit task_retry_mode (NEVER or ON_FAILURE) is a deliberate + // choice, so we don't warn. + if continuous.Get("task_retry_mode").IsValid() { + return continuous, nil + } + + diags = append(diags, diag.Diagnostic{ + Severity: diag.Warning, + Summary: continuousTaskRetryWarningSummary, + Detail: continuousTaskRetryWarningDetail, + Locations: continuous.Locations(), + Paths: []dyn.Path{p}, + }) + return continuous, nil + }) + if err != nil { + log.Debugf(ctx, "Error while applying continuous task retry mode validation: %s", err) + } + + return diags +} diff --git a/bundle/config/validate/continuous_task_retry_mode_test.go b/bundle/config/validate/continuous_task_retry_mode_test.go new file mode 100644 index 00000000000..9eac010733b --- /dev/null +++ b/bundle/config/validate/continuous_task_retry_mode_test.go @@ -0,0 +1,98 @@ +package validate + +import ( + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/stretchr/testify/assert" +) + +func TestContinuousTaskRetryModeWarnsWhenUnset(t *testing.T) { + ctx := t.Context() + + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "foo": { + JobSettings: jobs.JobSettings{ + Continuous: &jobs.Continuous{}, + }, + }, + }, + }, + }, + } + + bundletest.SetLocation(b, "resources.jobs.foo.continuous", []dyn.Location{{File: "a.yml", Line: 1, Column: 1}}) + + diags := ContinuousTaskRetryMode().Apply(ctx, b) + assert.Equal(t, diag.Diagnostics{ + { + Severity: diag.Warning, + Summary: continuousTaskRetryWarningSummary, + Detail: continuousTaskRetryWarningDetail, + Locations: []dyn.Location{{File: "a.yml", Line: 1, Column: 1}}, + Paths: []dyn.Path{dyn.MustPathFromString("resources.jobs.foo.continuous")}, + }, + }, diags) +} + +func TestContinuousTaskRetryModeNoWarningWhenSet(t *testing.T) { + ctx := t.Context() + + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "foo": { + JobSettings: jobs.JobSettings{ + Continuous: &jobs.Continuous{ + TaskRetryMode: jobs.TaskRetryModeOnFailure, + }, + }, + }, + }, + }, + }, + } + + bundletest.SetLocation(b, "resources.jobs.foo.continuous", []dyn.Location{{File: "a.yml", Line: 1, Column: 1}}) + + diags := ContinuousTaskRetryMode().Apply(ctx, b) + assert.Empty(t, diags) +} + +func TestContinuousTaskRetryModeNoWarningWithoutContinuous(t *testing.T) { + ctx := t.Context() + + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "foo": { + JobSettings: jobs.JobSettings{ + Tasks: []jobs.Task{ + { + TaskKey: "my_task", + NotebookTask: &jobs.NotebookTask{}, + }, + }, + }, + }, + }, + }, + }, + } + + bundletest.SetLocation(b, "resources.jobs.foo", []dyn.Location{{File: "a.yml", Line: 1, Column: 1}}) + + diags := ContinuousTaskRetryMode().Apply(ctx, b) + assert.Empty(t, diags) +} diff --git a/bundle/config/validate/fast_validate.go b/bundle/config/validate/fast_validate.go index d01eb8c1491..2ca7dbd6426 100644 --- a/bundle/config/validate/fast_validate.go +++ b/bundle/config/validate/fast_validate.go @@ -29,6 +29,7 @@ func (f *fastValidate) Apply(ctx context.Context, rb *bundle.Bundle) diag.Diagno // Fast mutators with only in-memory checks JobClusterKeyDefined(), JobTaskClusterSpec(), + ContinuousTaskRetryMode(), // Blocking mutators. Deployments will fail if these checks fail. ValidateArtifactPath(),