Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
bundle:
name: test-bundle-$UNIQUE_NAME

targets:
default:
mode: development

resources:
jobs:
job1:
max_concurrent_runs: 1
job_clusters:
- job_cluster_key: shared
new_cluster:
spark_version: $DEFAULT_SPARK_VERSION
node_type_id: $NODE_TYPE_ID
num_workers: 1
tasks:
- task_key: shared_cluster_task
job_cluster_key: shared
notebook_task:
notebook_path: /Users/{{workspace_user_name}}/notebook
- task_key: own_cluster_task
notebook_task:
notebook_path: /Users/{{workspace_user_name}}/notebook
new_cluster:
spark_version: $DEFAULT_SPARK_VERSION
node_type_id: $NODE_TYPE_ID
num_workers: 1

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default/files...
Deploying resources...
Updating deployment state...
Deployment complete!

=== Simulate a cluster policy injecting custom_tags and cluster_log_conf
These fields exist only remotely (the user never set them in config).
A legitimate edit (max_concurrent_runs) is made too, to show real edits still sync.

=== Detect and save all changes
Detected changes in 1 resource(s):

Resource: resources.jobs.job1
max_concurrent_runs: replace



=== Configuration changes

>>> diff.py databricks.yml.backup databricks.yml
--- databricks.yml.backup
+++ databricks.yml
@@ -9,5 +9,5 @@
jobs:
job1:
- max_concurrent_runs: 1
+ max_concurrent_runs: 5
job_clusters:
- job_cluster_key: shared

>>> [CLI] bundle destroy --auto-approve
The following resources will be deleted:
delete resources.jobs.job1

All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default

Deleting files...
Destroy complete!
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash

envsubst < databricks.yml.tmpl > databricks.yml

cleanup() {
trace $CLI bundle destroy --auto-approve
}
trap cleanup EXIT

$CLI bundle deploy
job1_id="$(read_id.py job1)"

title "Simulate a cluster policy injecting custom_tags and cluster_log_conf"
echo
echo "These fields exist only remotely (the user never set them in config)."
echo "A legitimate edit (max_concurrent_runs) is made too, to show real edits still sync."
edit_resource.py jobs $job1_id <<'EOF'
r["max_concurrent_runs"] = 5

# A DBFS destination is used instead of S3: the real backend rejects an S3
# cluster log destination without a per-cluster instance-profile ARN, and the
# field's value is irrelevant here — only that it exists remotely and not in config.
policy_tags = {"CostCenter": "dev-1234", "Team": "finops"}
policy_log_conf = {"dbfs": {"destination": "dbfs:/cluster-logs/dev"}}

for task in r.get("tasks", []):
if "new_cluster" in task:
task["new_cluster"]["custom_tags"] = dict(policy_tags)
task["new_cluster"]["cluster_log_conf"] = dict(policy_log_conf)

for jc in r.get("job_clusters", []):
jc["new_cluster"]["custom_tags"] = dict(policy_tags)
jc["new_cluster"]["cluster_log_conf"] = dict(policy_log_conf)
EOF

title "Detect and save all changes"
echo
cp databricks.yml databricks.yml.backup
$CLI bundle config-remote-sync --save

title "Configuration changes"
echo
trace diff.py databricks.yml.backup databricks.yml
rm databricks.yml.backup
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Cloud = true

RecordRequests = false
Ignore = [".databricks", "databricks.yml", "databricks.yml.backup"]

[Env]
DATABRICKS_BUNDLE_ENABLE_EXPERIMENTAL_YAML_SYNC = "true"

[EnvMatrix]
DATABRICKS_BUNDLE_ENGINE = ["direct", "terraform"]
20 changes: 20 additions & 0 deletions bundle/configsync/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,18 @@ type (
skipIfEmptyOrDefault struct {
defaults map[string]any
}
// skipBackendDefault skips a field, regardless of its remote value, when it is
// absent from config. Used for fields the backend or a cluster policy may fill
// in: their remote-only value must not be synced into config. Fields the user
// does manage (present in config) still sync normally.
skipBackendDefault struct{}
)

var (
alwaysSkip = skipAlways{}
zeroOrNil = skipIfZeroOrNil{}
emptyEmailNotifications = skipIfEmptyOrDefault{defaults: map[string]any{"no_alert_for_skipped_runs": false}}
backendDefault = skipBackendDefault{}
)

// serverSideDefaults contains all hardcoded server-side defaults.
Expand Down Expand Up @@ -54,6 +60,13 @@ var serverSideDefaults = map[string]any{
"resources.jobs.*.tasks[*].new_cluster.data_security_mode": "SINGLE_USER", // TODO this field is computed on some workspaces in integration tests, check why and if we can skip it
"resources.jobs.*.tasks[*].new_cluster.enable_elastic_disk": alwaysSkip, // deprecated field
"resources.jobs.*.tasks[*].new_cluster.single_user_name": alwaysSkip,
// custom_tags and cluster_log_conf are commonly injected by cluster policies
// when the user omits them, so they exist only remotely. Syncing them back leaks
// one environment's policy values into (often shared) config and breaks deploys in
// other environments. TODO: move to backend_defaults in resources.yml once
// configsync filtering is migrated to the direct engine lifecycle metadata.
"resources.jobs.*.tasks[*].new_cluster.custom_tags": backendDefault,
"resources.jobs.*.tasks[*].new_cluster.cluster_log_conf": backendDefault,

// Cluster fields (job_clusters)
"resources.jobs.*.job_clusters[*].new_cluster.aws_attributes": alwaysSkip,
Expand All @@ -62,6 +75,8 @@ var serverSideDefaults = map[string]any{
"resources.jobs.*.job_clusters[*].new_cluster.data_security_mode": "SINGLE_USER", // TODO this field is computed on some workspaces in integration tests, check why and if we can skip it
"resources.jobs.*.job_clusters[*].new_cluster.enable_elastic_disk": alwaysSkip, // deprecated field
"resources.jobs.*.job_clusters[*].new_cluster.single_user_name": alwaysSkip,
"resources.jobs.*.job_clusters[*].new_cluster.custom_tags": backendDefault, // see tasks[*].new_cluster.custom_tags
"resources.jobs.*.job_clusters[*].new_cluster.cluster_log_conf": backendDefault, // see tasks[*].new_cluster.cluster_log_conf

// Standalone cluster fields
"resources.clusters.*.aws_attributes": alwaysSkip,
Expand All @@ -71,6 +86,8 @@ var serverSideDefaults = map[string]any{
"resources.clusters.*.driver_node_type_id": alwaysSkip,
"resources.clusters.*.enable_elastic_disk": alwaysSkip,
"resources.clusters.*.single_user_name": alwaysSkip,
"resources.clusters.*.custom_tags": backendDefault, // see jobs.*.tasks[*].new_cluster.custom_tags
"resources.clusters.*.cluster_log_conf": backendDefault, // see jobs.*.tasks[*].new_cluster.cluster_log_conf

// Experiment fields
"resources.experiments.*.artifact_location": alwaysSkip,
Expand Down Expand Up @@ -118,6 +135,9 @@ func shouldSkipField(path string, value any, hasConfigValue bool) bool {
if hasConfigValue {
return false
}
if _, ok := expected.(skipBackendDefault); ok {
return true
}
if _, ok := expected.(skipIfZeroOrNil); ok {
return value == nil || value == int64(0)
}
Expand Down
Loading