From 472321d71ac9df5dddd82a3d4a9d250acc3e9d66 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Thu, 11 Jun 2026 13:36:43 +0000 Subject: [PATCH 1/7] Add telemetry for bundle config-remote-sync Co-authored-by: Isaac --- .../config-remote-sync-error/databricks.yml | 11 ++ .../config-remote-sync-error/out.test.toml | 3 + .../config-remote-sync-error/output.txt | 11 ++ .../telemetry/config-remote-sync-error/script | 7 + .../config-remote-sync-error/test.toml | 4 + .../config-remote-sync/databricks.yml | 11 ++ .../config-remote-sync/out.test.toml | 3 + .../telemetry/config-remote-sync/output.txt | 29 ++++ .../telemetry/config-remote-sync/script | 12 ++ .../telemetry/config-remote-sync/test.toml | 4 + bundle/configsync/diff.go | 4 +- bundle/configsync/telemetry.go | 149 ++++++++++++++++++ bundle/configsync/telemetry_test.go | 96 +++++++++++ bundle/configsync/variables.go | 31 ++-- bundle/configsync/variables_test.go | 8 +- cmd/bundle/config_remote_sync.go | 32 +++- .../protos/bundle_config_remote_sync.go | 73 +++++++++ libs/telemetry/protos/frontend_log.go | 9 +- 18 files changed, 476 insertions(+), 21 deletions(-) create mode 100644 acceptance/bundle/telemetry/config-remote-sync-error/databricks.yml create mode 100644 acceptance/bundle/telemetry/config-remote-sync-error/out.test.toml create mode 100644 acceptance/bundle/telemetry/config-remote-sync-error/output.txt create mode 100644 acceptance/bundle/telemetry/config-remote-sync-error/script create mode 100644 acceptance/bundle/telemetry/config-remote-sync-error/test.toml create mode 100644 acceptance/bundle/telemetry/config-remote-sync/databricks.yml create mode 100644 acceptance/bundle/telemetry/config-remote-sync/out.test.toml create mode 100644 acceptance/bundle/telemetry/config-remote-sync/output.txt create mode 100644 acceptance/bundle/telemetry/config-remote-sync/script create mode 100644 acceptance/bundle/telemetry/config-remote-sync/test.toml create mode 100644 bundle/configsync/telemetry.go create mode 100644 bundle/configsync/telemetry_test.go create mode 100644 libs/telemetry/protos/bundle_config_remote_sync.go diff --git a/acceptance/bundle/telemetry/config-remote-sync-error/databricks.yml b/acceptance/bundle/telemetry/config-remote-sync-error/databricks.yml new file mode 100644 index 00000000000..c0c9f880163 --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync-error/databricks.yml @@ -0,0 +1,11 @@ +bundle: + name: config-remote-sync-telemetry-error + +resources: + jobs: + foo: + name: test job + tasks: + - task_key: main + notebook_task: + notebook_path: /Workspace/Users/tester@databricks.com/notebook diff --git a/acceptance/bundle/telemetry/config-remote-sync-error/out.test.toml b/acceptance/bundle/telemetry/config-remote-sync-error/out.test.toml new file mode 100644 index 00000000000..65156e0457c --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync-error/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform"] diff --git a/acceptance/bundle/telemetry/config-remote-sync-error/output.txt b/acceptance/bundle/telemetry/config-remote-sync-error/output.txt new file mode 100644 index 00000000000..03e421aaa45 --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync-error/output.txt @@ -0,0 +1,11 @@ + +>>> errcode [CLI] bundle config-remote-sync +Error: failed to detect changes: state snapshot not available: resources state snapshot not found remotely at resources-config-sync-snapshot.json: state snapshot not found + +Exit code: 1 + +>>> cat out.requests.txt +{ + "engine": "terraform", + "error_category": "STATE_NOT_FOUND" +} diff --git a/acceptance/bundle/telemetry/config-remote-sync-error/script b/acceptance/bundle/telemetry/config-remote-sync-error/script new file mode 100644 index 00000000000..e48fb6ff123 --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync-error/script @@ -0,0 +1,7 @@ +# Running config-remote-sync without a prior deploy: the state snapshot does +# not exist, so the command fails and telemetry reports STATE_NOT_FOUND. +trace errcode $CLI bundle config-remote-sync + +trace cat out.requests.txt | jq 'select(has("path") and .path == "/telemetry-ext") | .body.protoLogs[] | fromjson | .entry.databricks_cli_log.bundle_config_remote_sync_event | select(. != null)' + +rm out.requests.txt diff --git a/acceptance/bundle/telemetry/config-remote-sync-error/test.toml b/acceptance/bundle/telemetry/config-remote-sync-error/test.toml new file mode 100644 index 00000000000..a7617afecd5 --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync-error/test.toml @@ -0,0 +1,4 @@ +# Running without a prior deploy only fails with STATE_NOT_FOUND on the +# terraform engine (the snapshot pull path); pin the matrix accordingly. +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform"] diff --git a/acceptance/bundle/telemetry/config-remote-sync/databricks.yml b/acceptance/bundle/telemetry/config-remote-sync/databricks.yml new file mode 100644 index 00000000000..543dc4129bd --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync/databricks.yml @@ -0,0 +1,11 @@ +bundle: + name: config-remote-sync-telemetry + +resources: + jobs: + foo: + name: test job + tasks: + - task_key: main + notebook_task: + notebook_path: /Workspace/Users/tester@databricks.com/notebook diff --git a/acceptance/bundle/telemetry/config-remote-sync/out.test.toml b/acceptance/bundle/telemetry/config-remote-sync/out.test.toml new file mode 100644 index 00000000000..e90b6d5d1ba --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/telemetry/config-remote-sync/output.txt b/acceptance/bundle/telemetry/config-remote-sync/output.txt new file mode 100644 index 00000000000..8886bae291a --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync/output.txt @@ -0,0 +1,29 @@ + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/config-remote-sync-telemetry/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] bundle config-remote-sync +Detected changes in 1 resource(s): + +Resource: resources.jobs.foo + name: replace + + + +>>> cat out.requests.txt +{ + "engine": "direct", + "changes_total": 1, + "replace_count": 1, + "resource_changes": [ + { + "resource_type": "jobs", + "changes_count": 1, + "replace_count": 1 + } + ], + "files_changed_count": 1 +} diff --git a/acceptance/bundle/telemetry/config-remote-sync/script b/acceptance/bundle/telemetry/config-remote-sync/script new file mode 100644 index 00000000000..18247c96ce6 --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync/script @@ -0,0 +1,12 @@ +trace $CLI bundle deploy + +job_id="$(read_id.py foo)" +edit_resource.py jobs $job_id < Date: Mon, 15 Jun 2026 13:29:42 +0000 Subject: [PATCH 2/7] config-remote-sync telemetry: add scrubbed error_message, trim ref counters Co-authored-by: Isaac --- .../config-remote-sync-error/output.txt | 1 + bundle/configsync/telemetry.go | 10 +++++----- bundle/configsync/telemetry_test.go | 4 ++-- bundle/configsync/variables.go | 2 -- bundle/phases/telemetry.go | 9 +-------- cmd/bundle/config_remote_sync.go | 8 ++++++-- .../protos/bundle_config_remote_sync.go | 12 ++++++++---- .../telemetry/scrub.go | 16 +++++++++++++++- .../telemetry/scrub_test.go | 2 +- 9 files changed, 39 insertions(+), 25 deletions(-) rename bundle/phases/telemetry_scrub.go => libs/telemetry/scrub.go (91%) rename bundle/phases/telemetry_scrub_test.go => libs/telemetry/scrub_test.go (99%) diff --git a/acceptance/bundle/telemetry/config-remote-sync-error/output.txt b/acceptance/bundle/telemetry/config-remote-sync-error/output.txt index 03e421aaa45..42525087c62 100644 --- a/acceptance/bundle/telemetry/config-remote-sync-error/output.txt +++ b/acceptance/bundle/telemetry/config-remote-sync-error/output.txt @@ -7,5 +7,6 @@ Exit code: 1 >>> cat out.requests.txt { "engine": "terraform", + "error_message": "failed to detect changes: state snapshot not available: resources state snapshot not found remotely at resources-config-sync-snapshot.json: state snapshot not found", "error_category": "STATE_NOT_FOUND" } diff --git a/bundle/configsync/telemetry.go b/bundle/configsync/telemetry.go index a756d7fb9c9..2792f861b63 100644 --- a/bundle/configsync/telemetry.go +++ b/bundle/configsync/telemetry.go @@ -38,13 +38,14 @@ type Stats struct { RawValuesWithVarSyntax int64 + ErrorMessage string ErrorCategory protos.BundleConfigRemoteSyncErrorCategory } -// RestoreStats counts variable-reference restorations by mechanism. +// RestoreStats counts the variable-reference restorations that can leak a +// current-target-scoped reference into a shared file. The safe paths (kept / +// compound-realigned) are not counted. type RestoreStats struct { - Kept int64 - Compound int64 Retargeted int64 FromSiblings int64 } @@ -138,11 +139,10 @@ func (s *Stats) LogTelemetry(ctx context.Context) { ResourceChanges: resourceChanges, FilesChangedCount: s.FilesChangedCount, FilesWrittenCount: s.FilesWrittenCount, - RefsKept: s.Restore.Kept, - RefsCompound: s.Restore.Compound, RefsRetargeted: s.Restore.Retargeted, RefsFromSiblings: s.Restore.FromSiblings, RawValuesWithVarSyntax: s.RawValuesWithVarSyntax, + ErrorMessage: s.ErrorMessage, ErrorCategory: s.ErrorCategory, }, }) diff --git a/bundle/configsync/telemetry_test.go b/bundle/configsync/telemetry_test.go index 550102a3afd..cb48f6108a2 100644 --- a/bundle/configsync/telemetry_test.go +++ b/bundle/configsync/telemetry_test.go @@ -67,11 +67,11 @@ func TestRestoreStatsCounters(t *testing.T) { }), }) - // Original pure ref still matching: counted as kept. + // Original pure ref still matching: restored but not counted (safe path). var kept RestoreStats result := restoreOriginalRefs("us-east-1", dyn.V("${var.region}"), resolved, &kept) assert.Equal(t, "${var.region}", result) - assert.Equal(t, RestoreStats{Kept: 1}, kept) + assert.Equal(t, RestoreStats{}, kept) // Pure ref whose value changed to another variable's value: re-targeted. var retargeted RestoreStats diff --git a/bundle/configsync/variables.go b/bundle/configsync/variables.go index d28eee3c7b6..67305c92444 100644 --- a/bundle/configsync/variables.go +++ b/bundle/configsync/variables.go @@ -240,12 +240,10 @@ func restoreOriginalRefs(value any, preResolved, resolved dyn.Value, stats *Rest switch v := value.(type) { case string, bool, int64: if ref, ok := matchOriginalRef(value, preResolved, resolved); ok { - stats.Kept++ return ref } if s, ok := value.(string); ok { if restored, ok := restoreCompoundInterpolation(s, preResolved, resolved); ok { - stats.Compound++ return restored } } diff --git a/bundle/phases/telemetry.go b/bundle/phases/telemetry.go index cac25eceee1..912c51a7d6b 100644 --- a/bundle/phases/telemetry.go +++ b/bundle/phases/telemetry.go @@ -34,16 +34,9 @@ func getExecutionTimes(b *bundle.Bundle) []protos.IntMapEntry { return executionTimes } -// Maximum length of the error message included in telemetry. -const maxErrorMessageLength = 500 - // LogDeployTelemetry logs a telemetry event for a bundle deploy command. func LogDeployTelemetry(ctx context.Context, b *bundle.Bundle, errMsg string) { - errMsg = scrubForTelemetry(errMsg) - - if len(errMsg) > maxErrorMessageLength { - errMsg = errMsg[:maxErrorMessageLength] - } + errMsg = telemetry.ScrubErrorMessage(errMsg) resourcesCount := int64(0) _, err := dyn.MapByPattern(b.Config.Value(), dyn.NewPattern(dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey()), func(p dyn.Path, v dyn.Value) (dyn.Value, error) { diff --git a/cmd/bundle/config_remote_sync.go b/cmd/bundle/config_remote_sync.go index 9825b59764b..b5289984972 100644 --- a/cmd/bundle/config_remote_sync.go +++ b/cmd/bundle/config_remote_sync.go @@ -15,6 +15,7 @@ import ( "github.com/databricks/cli/libs/cmdctx" "github.com/databricks/cli/libs/flags" "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/telemetry" "github.com/databricks/cli/libs/telemetry/protos" "github.com/spf13/cobra" ) @@ -126,8 +127,11 @@ Examples: return nil }, }) - if err != nil && stats.ErrorCategory == "" { - stats.ErrorCategory = protos.BundleConfigRemoteSyncErrorCategoryBundleLoadFailed + if err != nil { + if stats.ErrorCategory == "" { + stats.ErrorCategory = protos.BundleConfigRemoteSyncErrorCategoryBundleLoadFailed + } + stats.ErrorMessage = telemetry.ScrubErrorMessage(err.Error()) } return err } diff --git a/libs/telemetry/protos/bundle_config_remote_sync.go b/libs/telemetry/protos/bundle_config_remote_sync.go index 0db83b601d8..9e8c1d95561 100644 --- a/libs/telemetry/protos/bundle_config_remote_sync.go +++ b/libs/telemetry/protos/bundle_config_remote_sync.go @@ -4,7 +4,6 @@ type BundleConfigRemoteSyncErrorCategory string const ( BundleConfigRemoteSyncErrorCategoryUnspecified BundleConfigRemoteSyncErrorCategory = "TYPE_UNSPECIFIED" - BundleConfigRemoteSyncErrorCategoryUnsupportedOS BundleConfigRemoteSyncErrorCategory = "UNSUPPORTED_OS" BundleConfigRemoteSyncErrorCategoryBundleLoadFailed BundleConfigRemoteSyncErrorCategory = "BUNDLE_LOAD_FAILED" BundleConfigRemoteSyncErrorCategoryStateNotFound BundleConfigRemoteSyncErrorCategory = "STATE_NOT_FOUND" BundleConfigRemoteSyncErrorCategoryDetectChangesFailed BundleConfigRemoteSyncErrorCategory = "DETECT_CHANGES_FAILED" @@ -45,9 +44,9 @@ type BundleConfigRemoteSyncEvent struct { FilesChangedCount int64 `json:"files_changed_count,omitempty"` FilesWrittenCount int64 `json:"files_written_count,omitempty"` - // Variable-reference restoration statistics. - RefsKept int64 `json:"refs_kept,omitempty"` - RefsCompound int64 `json:"refs_compound,omitempty"` + // Variable-reference restoration counts for the two mechanisms that can + // write a current-target-scoped reference into a shared file (the source of + // the cross-target "reference does not exist" failures). RefsRetargeted int64 `json:"refs_retargeted,omitempty"` RefsFromSiblings int64 `json:"refs_from_siblings,omitempty"` @@ -55,6 +54,11 @@ type BundleConfigRemoteSyncEvent struct { // character sequence "${". The values themselves are not logged. RawValuesWithVarSyntax int64 `json:"raw_values_with_var_syntax,omitempty"` + // Scrubbed, truncated summary of the failure when the command exits with an + // error. Privileged free-text (DATA_LABEL_USER_COMMANDS_RESPONSE, LPP-5543); + // stays in-region and is stripped from centralized logfood. Unset on success. + ErrorMessage string `json:"error_message,omitempty"` + // Category of the failure when the command exits with an error. // Unset on success. ErrorCategory BundleConfigRemoteSyncErrorCategory `json:"error_category,omitempty"` diff --git a/bundle/phases/telemetry_scrub.go b/libs/telemetry/scrub.go similarity index 91% rename from bundle/phases/telemetry_scrub.go rename to libs/telemetry/scrub.go index 051c81bd75b..582526b93a7 100644 --- a/bundle/phases/telemetry_scrub.go +++ b/libs/telemetry/scrub.go @@ -1,4 +1,4 @@ -package phases +package telemetry import ( "path" @@ -6,6 +6,20 @@ import ( "strings" ) +// Maximum length of an error message included in telemetry. +const maxErrorMessageLength = 500 + +// ScrubErrorMessage scrubs sensitive paths and PII from an error message and +// truncates it to maxErrorMessageLength, producing a value safe to attach to a +// telemetry event. The result is still treated as privileged data in-region. +func ScrubErrorMessage(msg string) string { + msg = scrubForTelemetry(msg) + if len(msg) > maxErrorMessageLength { + msg = msg[:maxErrorMessageLength] + } + return msg +} + // Scrub sensitive information from error messages before sending to telemetry. // Inspired by VS Code's telemetry path scrubbing and Sentry's @userpath pattern. // diff --git a/bundle/phases/telemetry_scrub_test.go b/libs/telemetry/scrub_test.go similarity index 99% rename from bundle/phases/telemetry_scrub_test.go rename to libs/telemetry/scrub_test.go index 556b3783ed3..9ae21cad3ed 100644 --- a/bundle/phases/telemetry_scrub_test.go +++ b/libs/telemetry/scrub_test.go @@ -1,4 +1,4 @@ -package phases +package telemetry import ( "testing" From e500bf02572df7129d20fe2e7d8f7bb8c597cbc7 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Mon, 15 Jun 2026 13:55:14 +0000 Subject: [PATCH 3/7] config-remote-sync telemetry: add recreate_forcing_changes + overwritten_local_edits, drop raw_values_with_var_syntax Co-authored-by: Isaac --- bundle/configsync/diff.go | 15 ++++ bundle/configsync/telemetry.go | 71 +++++++++++-------- bundle/configsync/telemetry_test.go | 31 ++++---- .../protos/bundle_config_remote_sync.go | 14 ++-- 4 files changed, 83 insertions(+), 48 deletions(-) diff --git a/bundle/configsync/diff.go b/bundle/configsync/diff.go index 95ff76e57a9..a7d65774c1e 100644 --- a/bundle/configsync/diff.go +++ b/bundle/configsync/diff.go @@ -18,6 +18,7 @@ import ( "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn/convert" "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/structs/structdiff" ) type OperationType string @@ -33,6 +34,12 @@ const ( type ConfigChangeDesc struct { Operation OperationType `json:"operation"` Value any `json:"value,omitempty"` // Normalized remote value (nil for remove operations) + + // LocalEdit reports that the local config value for this field differs from + // the last-deployed state, so applying this change overwrites a not-yet- + // deployed local edit. Telemetry-only; direct engine only (the terraform + // sync snapshot has no per-field base). Not part of the command output. + LocalEdit bool `json:"-"` } type ResourceChanges map[string]*ConfigChangeDesc @@ -167,6 +174,14 @@ func DetectChanges(ctx context.Context, b *bundle.Bundle, engine engine.EngineTy if change.Operation == OperationSkip { continue } + // On the direct engine the state snapshot holds real per-field + // values, so New != Old means the local config diverged from the + // last deploy and this change overwrites that pending edit. The + // terraform sync snapshot stores empty per-resource state, so this + // comparison is meaningless there and is skipped. + if engine.IsDirect() && !structdiff.IsEqual(changeDesc.Old, changeDesc.New) { + change.LocalEdit = true + } change.Value = stripNamePrefix(fullPath, change.Value, b.Config.Presets.NamePrefix) resourceChanges[path] = change } diff --git a/bundle/configsync/telemetry.go b/bundle/configsync/telemetry.go index 2792f861b63..cbc8ce70d81 100644 --- a/bundle/configsync/telemetry.go +++ b/bundle/configsync/telemetry.go @@ -7,6 +7,8 @@ import ( "strings" "github.com/databricks/cli/bundle/config/engine" + "github.com/databricks/cli/bundle/direct/dresources" + "github.com/databricks/cli/libs/structs/structpath" "github.com/databricks/cli/libs/telemetry" "github.com/databricks/cli/libs/telemetry/protos" ) @@ -31,13 +33,14 @@ type Stats struct { // "resources.jobs.foo". PerResourceType map[string]*protos.BundleConfigRemoteSyncResourceChanges + RecreateForcingChanges int64 + OverwrittenLocalEdits int64 + FilesChangedCount int64 FilesWrittenCount int64 Restore RestoreStats - RawValuesWithVarSyntax int64 - ErrorMessage string ErrorCategory protos.BundleConfigRemoteSyncErrorCategory } @@ -57,15 +60,16 @@ func (s *Stats) CollectChangeStats(changes Changes) { s.PerResourceType = make(map[string]*protos.BundleConfigRemoteSyncResourceChanges) } for resourceKey, resourceChanges := range changes { - perType := s.PerResourceType[resourceTypeFromKey(resourceKey)] + resourceType := resourceTypeFromKey(resourceKey) + perType := s.PerResourceType[resourceType] if perType == nil { - perType = &protos.BundleConfigRemoteSyncResourceChanges{ResourceType: resourceTypeFromKey(resourceKey)} - s.PerResourceType[perType.ResourceType] = perType + perType = &protos.BundleConfigRemoteSyncResourceChanges{ResourceType: resourceType} + s.PerResourceType[resourceType] = perType } // Only Add/Replace/Remove reach this function: DetectChanges filters // out Skip operations and convertChangeDesc never produces Unknown, // so the totals always equal the per-operation breakdown. - for _, change := range resourceChanges { + for fieldPath, change := range resourceChanges { s.ChangesTotal++ perType.ChangesCount++ switch change.Operation { @@ -80,11 +84,39 @@ func (s *Stats) CollectChangeStats(changes Changes) { perType.RemoveCount++ case OperationUnknown, OperationSkip: } - s.RawValuesWithVarSyntax += countVarSyntax(change.Value) + if isRecreateForcing(resourceType, fieldPath) { + s.RecreateForcingChanges++ + } + if change.LocalEdit { + s.OverwrittenLocalEdits++ + } } } } +// isRecreateForcing reports whether a change to fieldPath on resourceType lands +// on an immutable field (recreate_on_changes), meaning the next deploy will +// delete+recreate the resource. It consults both the hand-written and the +// spec-generated lifecycle configs, matching bundle plan behavior. +func isRecreateForcing(resourceType, fieldPath string) bool { + node, err := structpath.ParsePath(fieldPath) + if err != nil { + return false + } + for _, cfg := range []*dresources.Config{dresources.MustLoadConfig(), dresources.MustLoadGeneratedConfig()} { + rc, ok := cfg.Resources[resourceType] + if !ok { + continue + } + for _, rule := range rc.RecreateOnChanges { + if node.HasPatternPrefix(rule.Field) { + return true + } + } + } + return false +} + // resourceTypeFromKey extracts the resource type from a change key like // "resources.jobs.foo". Only the type segment is recorded; resource keys are // never logged. @@ -96,28 +128,6 @@ func resourceTypeFromKey(resourceKey string) string { return parts[1] } -// countVarSyntax counts string leaves containing the literal "${" sequence. -// Such values are written to YAML verbatim and are subject to interpolation -// on the next deploy, so they measure exposure to escaping issues. -func countVarSyntax(value any) int64 { - var n int64 - switch v := value.(type) { - case string: - if strings.Contains(v, "${") { - n++ - } - case map[string]any: - for _, val := range v { - n += countVarSyntax(val) - } - case []any: - for _, val := range v { - n += countVarSyntax(val) - } - } - return n -} - // LogTelemetry emits the BundleConfigRemoteSyncEvent for this run. func (s *Stats) LogTelemetry(ctx context.Context) { resourceChanges := make([]protos.BundleConfigRemoteSyncResourceChanges, 0, len(s.PerResourceType)) @@ -136,12 +146,13 @@ func (s *Stats) LogTelemetry(ctx context.Context) { AddCount: s.AddCount, ReplaceCount: s.ReplaceCount, RemoveCount: s.RemoveCount, + RecreateForcingChanges: s.RecreateForcingChanges, + OverwrittenLocalEdits: s.OverwrittenLocalEdits, ResourceChanges: resourceChanges, FilesChangedCount: s.FilesChangedCount, FilesWrittenCount: s.FilesWrittenCount, RefsRetargeted: s.Restore.Retargeted, RefsFromSiblings: s.Restore.FromSiblings, - RawValuesWithVarSyntax: s.RawValuesWithVarSyntax, ErrorMessage: s.ErrorMessage, ErrorCategory: s.ErrorCategory, }, diff --git a/bundle/configsync/telemetry_test.go b/bundle/configsync/telemetry_test.go index cb48f6108a2..78bc0f7677e 100644 --- a/bundle/configsync/telemetry_test.go +++ b/bundle/configsync/telemetry_test.go @@ -11,7 +11,7 @@ func TestCollectChangeStats(t *testing.T) { changes := Changes{ "resources.jobs.foo": { "name": {Operation: OperationReplace, Value: "new name"}, - "tasks[0].notebook_task": {Operation: OperationAdd, Value: map[string]any{"base_parameters": map[string]any{"p": "${workspace.file_path}/x"}}}, + "tasks[0].notebook_task": {Operation: OperationAdd, Value: map[string]any{"base_parameters": map[string]any{"p": "x"}}}, "timeout_seconds": {Operation: OperationRemove}, }, "resources.jobs.bar": { @@ -20,16 +20,22 @@ func TestCollectChangeStats(t *testing.T) { "resources.dashboards.dash": { "etag": {Operation: OperationAdd, Value: "123456"}, }, + // pipelines.storage is recreate_on_changes (immutable); also mark it as + // overwriting a local edit. + "resources.pipelines.pipe": { + "storage": {Operation: OperationReplace, Value: "s3://new", LocalEdit: true}, + }, } var stats Stats stats.CollectChangeStats(changes) - assert.Equal(t, int64(5), stats.ChangesTotal) + assert.Equal(t, int64(6), stats.ChangesTotal) assert.Equal(t, int64(2), stats.AddCount) - assert.Equal(t, int64(2), stats.ReplaceCount) + assert.Equal(t, int64(3), stats.ReplaceCount) assert.Equal(t, int64(1), stats.RemoveCount) - assert.Equal(t, int64(1), stats.RawValuesWithVarSyntax) + assert.Equal(t, int64(1), stats.RecreateForcingChanges) + assert.Equal(t, int64(1), stats.OverwrittenLocalEdits) jobs := stats.PerResourceType["jobs"] assert.Equal(t, int64(4), jobs.ChangesCount) @@ -42,6 +48,13 @@ func TestCollectChangeStats(t *testing.T) { assert.Equal(t, int64(1), dashboards.AddCount) } +func TestIsRecreateForcing(t *testing.T) { + assert.True(t, isRecreateForcing("pipelines", "storage")) + assert.False(t, isRecreateForcing("pipelines", "configuration")) + assert.False(t, isRecreateForcing("jobs", "name")) + assert.False(t, isRecreateForcing("unknown", "storage")) +} + func TestResourceTypeFromKey(t *testing.T) { assert.Equal(t, "jobs", resourceTypeFromKey("resources.jobs.foo")) assert.Equal(t, "dashboards", resourceTypeFromKey("resources.dashboards.a.b")) @@ -49,16 +62,6 @@ func TestResourceTypeFromKey(t *testing.T) { assert.Equal(t, "unknown", resourceTypeFromKey("resources")) } -func TestCountVarSyntax(t *testing.T) { - assert.Equal(t, int64(0), countVarSyntax("plain")) - assert.Equal(t, int64(1), countVarSyntax("${var.x}")) - assert.Equal(t, int64(1), countVarSyntax("prefix ${ suffix")) - assert.Equal(t, int64(2), countVarSyntax(map[string]any{ - "a": "${var.x}", - "b": []any{"${incomplete", "ok", int64(5)}, - })) -} - func TestRestoreStatsCounters(t *testing.T) { resolved := dyn.V(map[string]dyn.Value{ "variables": dyn.V(map[string]dyn.Value{ diff --git a/libs/telemetry/protos/bundle_config_remote_sync.go b/libs/telemetry/protos/bundle_config_remote_sync.go index 9e8c1d95561..466ef3af0f9 100644 --- a/libs/telemetry/protos/bundle_config_remote_sync.go +++ b/libs/telemetry/protos/bundle_config_remote_sync.go @@ -36,6 +36,16 @@ type BundleConfigRemoteSyncEvent struct { ReplaceCount int64 `json:"replace_count,omitempty"` RemoveCount int64 `json:"remove_count,omitempty"` + // Number of detected changes on fields the resource lifecycle metadata + // marks recreate_on_changes (immutable): syncing these into config means the + // next deploy will delete+recreate the resource (a data-loss risk). + RecreateForcingChanges int64 `json:"recreate_forcing_changes,omitempty"` + + // Number of detected changes that overwrite a not-yet-deployed local config + // edit (the local value differs from the last-deployed state). Direct engine + // only; the terraform sync snapshot has no per-field base to compare against. + OverwrittenLocalEdits int64 `json:"overwritten_local_edits,omitempty"` + // One entry per resource type that has at least one detected change. ResourceChanges []BundleConfigRemoteSyncResourceChanges `json:"resource_changes,omitempty"` @@ -50,10 +60,6 @@ type BundleConfigRemoteSyncEvent struct { RefsRetargeted int64 `json:"refs_retargeted,omitempty"` RefsFromSiblings int64 `json:"refs_from_siblings,omitempty"` - // Number of detected remote string values that contain the literal - // character sequence "${". The values themselves are not logged. - RawValuesWithVarSyntax int64 `json:"raw_values_with_var_syntax,omitempty"` - // Scrubbed, truncated summary of the failure when the command exits with an // error. Privileged free-text (DATA_LABEL_USER_COMMANDS_RESPONSE, LPP-5543); // stays in-region and is stripped from centralized logfood. Unset on success. From d9557985012db0fa6f573a71fd7b171b60e93c1d Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Mon, 15 Jun 2026 15:39:12 +0000 Subject: [PATCH 4/7] config-remote-sync telemetry: skip acceptance tests on Windows (command unsupported) Co-authored-by: Isaac --- .../telemetry/config-remote-sync-error/out.test.toml | 1 + .../telemetry/config-remote-sync-error/test.toml | 9 +++++++-- .../bundle/telemetry/config-remote-sync/out.test.toml | 1 + .../bundle/telemetry/config-remote-sync/test.toml | 11 +++++++++-- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/acceptance/bundle/telemetry/config-remote-sync-error/out.test.toml b/acceptance/bundle/telemetry/config-remote-sync-error/out.test.toml index 65156e0457c..4e136c6838f 100644 --- a/acceptance/bundle/telemetry/config-remote-sync-error/out.test.toml +++ b/acceptance/bundle/telemetry/config-remote-sync-error/out.test.toml @@ -1,3 +1,4 @@ Local = true Cloud = false +GOOS.windows = false EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform"] diff --git a/acceptance/bundle/telemetry/config-remote-sync-error/test.toml b/acceptance/bundle/telemetry/config-remote-sync-error/test.toml index a7617afecd5..ecfc085a640 100644 --- a/acceptance/bundle/telemetry/config-remote-sync-error/test.toml +++ b/acceptance/bundle/telemetry/config-remote-sync-error/test.toml @@ -1,4 +1,9 @@ -# Running without a prior deploy only fails with STATE_NOT_FOUND on the -# terraform engine (the snapshot pull path); pin the matrix accordingly. +# config-remote-sync is not supported on Windows. +[GOOS] +windows = false + +# STATE_NOT_FOUND is a terraform-only path: the terraform engine pulls a config +# snapshot that doesn't exist without a prior deploy, while the direct engine +# does not. Pin the engine so the test exercises that specific failure. [EnvMatrix] DATABRICKS_BUNDLE_ENGINE = ["terraform"] diff --git a/acceptance/bundle/telemetry/config-remote-sync/out.test.toml b/acceptance/bundle/telemetry/config-remote-sync/out.test.toml index e90b6d5d1ba..f7c4cf648a9 100644 --- a/acceptance/bundle/telemetry/config-remote-sync/out.test.toml +++ b/acceptance/bundle/telemetry/config-remote-sync/out.test.toml @@ -1,3 +1,4 @@ Local = true Cloud = false +GOOS.windows = false EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/telemetry/config-remote-sync/test.toml b/acceptance/bundle/telemetry/config-remote-sync/test.toml index 32d160d3358..79939fcb273 100644 --- a/acceptance/bundle/telemetry/config-remote-sync/test.toml +++ b/acceptance/bundle/telemetry/config-remote-sync/test.toml @@ -1,4 +1,11 @@ -# The telemetry event includes the engine name, so outputs diverge between -# engines; pin to direct (the default for new bundles). +# config-remote-sync is not supported on Windows. +[GOOS] +windows = false + +# Pinned to the direct engine. The terraform path of config-remote-sync reads +# the experimental yaml-sync state snapshot, which only the cloud test harness +# supports (all terraform config-remote-sync coverage lives in the Cloud=true +# acceptance/bundle/config-remote-sync tests). Direct is the default engine for +# new bundles, and the telemetry is engine-agnostic above the state layer. [EnvMatrix] DATABRICKS_BUNDLE_ENGINE = ["direct"] From bff5c53fe30d6cfb37751ca72fccd2d6783bdc52 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Tue, 16 Jun 2026 16:29:16 +0000 Subject: [PATCH 5/7] config-remote-sync telemetry: acceptance test for save + overwritten_local_edits Co-authored-by: Isaac --- .../config-remote-sync-save/databricks.yml | 11 +++++++ .../config-remote-sync-save/out.test.toml | 4 +++ .../config-remote-sync-save/output.txt | 32 +++++++++++++++++++ .../telemetry/config-remote-sync-save/script | 12 +++++++ .../config-remote-sync-save/test.toml | 11 +++++++ 5 files changed, 70 insertions(+) create mode 100644 acceptance/bundle/telemetry/config-remote-sync-save/databricks.yml create mode 100644 acceptance/bundle/telemetry/config-remote-sync-save/out.test.toml create mode 100644 acceptance/bundle/telemetry/config-remote-sync-save/output.txt create mode 100644 acceptance/bundle/telemetry/config-remote-sync-save/script create mode 100644 acceptance/bundle/telemetry/config-remote-sync-save/test.toml diff --git a/acceptance/bundle/telemetry/config-remote-sync-save/databricks.yml b/acceptance/bundle/telemetry/config-remote-sync-save/databricks.yml new file mode 100644 index 00000000000..80d05fe88aa --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync-save/databricks.yml @@ -0,0 +1,11 @@ +bundle: + name: config-remote-sync-save + +resources: + jobs: + foo: + name: test job + tasks: + - task_key: main + notebook_task: + notebook_path: /Workspace/Users/tester@databricks.com/notebook diff --git a/acceptance/bundle/telemetry/config-remote-sync-save/out.test.toml b/acceptance/bundle/telemetry/config-remote-sync-save/out.test.toml new file mode 100644 index 00000000000..f7c4cf648a9 --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync-save/out.test.toml @@ -0,0 +1,4 @@ +Local = true +Cloud = false +GOOS.windows = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/telemetry/config-remote-sync-save/output.txt b/acceptance/bundle/telemetry/config-remote-sync-save/output.txt new file mode 100644 index 00000000000..ccefb19ce1b --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync-save/output.txt @@ -0,0 +1,32 @@ + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/config-remote-sync-save/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] bundle config-remote-sync --save +Detected changes in 1 resource(s): + +Resource: resources.jobs.foo + name: replace + + + +>>> cat out.requests.txt +{ + "save": true, + "engine": "direct", + "changes_total": 1, + "replace_count": 1, + "overwritten_local_edits": 1, + "resource_changes": [ + { + "resource_type": "jobs", + "changes_count": 1, + "replace_count": 1 + } + ], + "files_changed_count": 1, + "files_written_count": 1 +} diff --git a/acceptance/bundle/telemetry/config-remote-sync-save/script b/acceptance/bundle/telemetry/config-remote-sync-save/script new file mode 100644 index 00000000000..0434f3dc882 --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync-save/script @@ -0,0 +1,12 @@ +trace $CLI bundle deploy + +# Edit the job name locally (not deployed) so the local config diverges from the +# deployed state. config-remote-sync --save then overwrites that pending edit, +# exercising save, files_written_count, and overwritten_local_edits. +update_file.py databricks.yml "test job" "locally edited job name" + +trace $CLI bundle config-remote-sync --save + +trace cat out.requests.txt | jq 'select(has("path") and .path == "/telemetry-ext") | .body.protoLogs[] | fromjson | .entry.databricks_cli_log.bundle_config_remote_sync_event | select(. != null)' + +rm out.requests.txt diff --git a/acceptance/bundle/telemetry/config-remote-sync-save/test.toml b/acceptance/bundle/telemetry/config-remote-sync-save/test.toml new file mode 100644 index 00000000000..5f5f32dc258 --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync-save/test.toml @@ -0,0 +1,11 @@ +# --save rewrites databricks.yml; it's a mutated input, not asserted output. +Ignore = ["databricks.yml", "databricks.yml.backup"] + +# config-remote-sync is not supported on Windows. +[GOOS] +windows = false + +# overwritten_local_edits is computed from the per-field deployed base, which +# only the direct engine materializes (the terraform sync snapshot is empty). +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["direct"] From fb393a7e11541f4ee01499dabdc6364263e9b2d9 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Tue, 16 Jun 2026 19:10:32 +0200 Subject: [PATCH 6/7] config-remote-sync telemetry: end-to-end coverage for the remaining counters The unit tests cover add_count/remove_count, recreate_forcing_changes, and the variable-restoration counts, but they were missing acceptance coverage. These scenarios all round-trip against the local mock testserver, so they don't need a cloud test. - job_fields: assert the telemetry event (add_count/remove_count + per-resource breakdown). The remote edits already add and remove keyed fields. - resolve_variables: assert refs_retargeted (env param re-targeted to a different variable) and refs_from_siblings (added params/tasks restored from siblings). - New telemetry/config-remote-sync-recreate: a remote change to the immutable ingestion_definition.connection_name makes recreate_forcing_changes non-zero. storage (the other immutable pipeline field) can't be used because configsync always skips it as a backend-generated default. The two enriched tests run on both engines, so the asserted event drops the engine name (every other counter is engine-agnostic); RecordRequests is enabled and out.requests.txt is removed in cleanup so it isn't compared. --- .../config-remote-sync/job_fields/output.txt | 22 ++++++++++++++ .../config-remote-sync/job_fields/script | 9 ++++++ .../config-remote-sync/job_fields/test.toml | 5 +++- .../resolve_variables/output.txt | 29 ++++++++++++++++++ .../resolve_variables/script | 10 +++++++ .../resolve_variables/test.toml | 6 +++- .../databricks.yml | 16 ++++++++++ .../config-remote-sync-recreate/out.test.toml | 4 +++ .../config-remote-sync-recreate/output.txt | 30 +++++++++++++++++++ .../config-remote-sync-recreate/script | 15 ++++++++++ .../config-remote-sync-recreate/test.toml | 9 ++++++ 11 files changed, 153 insertions(+), 2 deletions(-) create mode 100644 acceptance/bundle/telemetry/config-remote-sync-recreate/databricks.yml create mode 100644 acceptance/bundle/telemetry/config-remote-sync-recreate/out.test.toml create mode 100644 acceptance/bundle/telemetry/config-remote-sync-recreate/output.txt create mode 100644 acceptance/bundle/telemetry/config-remote-sync-recreate/script create mode 100644 acceptance/bundle/telemetry/config-remote-sync-recreate/test.toml diff --git a/acceptance/bundle/config-remote-sync/job_fields/output.txt b/acceptance/bundle/config-remote-sync/job_fields/output.txt index 56c26de52a1..7e0b7fe07d4 100644 --- a/acceptance/bundle/config-remote-sync/job_fields/output.txt +++ b/acceptance/bundle/config-remote-sync/job_fields/output.txt @@ -86,6 +86,28 @@ Resource: resources.jobs.my_job targets: +=== Telemetry + +>>> cat out.requests.txt +{ + "save": true, + "changes_total": 12, + "add_count": 6, + "replace_count": 4, + "remove_count": 2, + "resource_changes": [ + { + "resource_type": "jobs", + "changes_count": 12, + "add_count": 6, + "replace_count": 4, + "remove_count": 2 + } + ], + "files_changed_count": 1, + "files_written_count": 1 +} + >>> [CLI] bundle destroy --auto-approve The following resources will be deleted: delete resources.jobs.my_job diff --git a/acceptance/bundle/config-remote-sync/job_fields/script b/acceptance/bundle/config-remote-sync/job_fields/script index 264304d7345..ed2d6dd93ea 100755 --- a/acceptance/bundle/config-remote-sync/job_fields/script +++ b/acceptance/bundle/config-remote-sync/job_fields/script @@ -6,6 +6,8 @@ add_repl.py "$NODE_TYPE_ID" "[NODE_TYPE_ID]" cleanup() { trace $CLI bundle destroy --auto-approve + # destroy records requests too; drop the recorded file so it is not compared. + rm -f out.requests.txt } trap cleanup EXIT @@ -63,3 +65,10 @@ title "Configuration changes" echo trace diff.py databricks.yml.backup databricks.yml rm databricks.yml.backup + +title "Telemetry" +echo +# The engine name is dropped because both EnvMatrix variants share this output; +# every other counter is engine-agnostic. This run exercises add_count and +# remove_count (the remote edits add and remove keyed fields). +trace cat out.requests.txt | jq 'select(has("path") and .path == "/telemetry-ext") | .body.protoLogs[] | fromjson | .entry.databricks_cli_log.bundle_config_remote_sync_event | select(. != null) | del(.engine)' diff --git a/acceptance/bundle/config-remote-sync/job_fields/test.toml b/acceptance/bundle/config-remote-sync/job_fields/test.toml index 625c660c617..2e303f0382a 100644 --- a/acceptance/bundle/config-remote-sync/job_fields/test.toml +++ b/acceptance/bundle/config-remote-sync/job_fields/test.toml @@ -1,7 +1,10 @@ Cloud = true RequiresUnityCatalog = true -RecordRequests = false +# RecordRequests captures the config-remote-sync telemetry event so the script +# can assert the per-operation change counts (add_count/remove_count/...). +# out.requests.txt is removed in the script's cleanup so it is not compared. +RecordRequests = true Ignore = [".databricks", "dummy.whl", "databricks.yml", "databricks.yml.backup"] [Env] diff --git a/acceptance/bundle/config-remote-sync/resolve_variables/output.txt b/acceptance/bundle/config-remote-sync/resolve_variables/output.txt index 520745de4ed..388f7ab3bbf 100644 --- a/acceptance/bundle/config-remote-sync/resolve_variables/output.txt +++ b/acceptance/bundle/config-remote-sync/resolve_variables/output.txt @@ -108,6 +108,35 @@ Resource: resources.pipelines.my_pipeline targets: +=== Telemetry + +>>> cat out.requests.txt +{ + "save": true, + "changes_total": 17, + "add_count": 10, + "replace_count": 6, + "remove_count": 1, + "resource_changes": [ + { + "resource_type": "jobs", + "changes_count": 16, + "add_count": 10, + "replace_count": 5, + "remove_count": 1 + }, + { + "resource_type": "pipelines", + "changes_count": 1, + "replace_count": 1 + } + ], + "files_changed_count": 1, + "files_written_count": 1, + "refs_retargeted": 1, + "refs_from_siblings": 7 +} + >>> [CLI] bundle destroy --auto-approve The following resources will be deleted: delete resources.jobs.my_job diff --git a/acceptance/bundle/config-remote-sync/resolve_variables/script b/acceptance/bundle/config-remote-sync/resolve_variables/script index 449fa9407f2..9a18bfe2d50 100755 --- a/acceptance/bundle/config-remote-sync/resolve_variables/script +++ b/acceptance/bundle/config-remote-sync/resolve_variables/script @@ -4,6 +4,8 @@ envsubst < databricks.yml.tmpl > databricks.yml cleanup() { trace $CLI bundle destroy --auto-approve + # destroy records requests too; drop the recorded file so it is not compared. + rm -f out.requests.txt } trap cleanup EXIT @@ -127,3 +129,11 @@ title "Configuration changes" echo trace diff.py databricks.yml.backup databricks.yml rm databricks.yml.backup + +title "Telemetry" +echo +# The engine name is dropped because both EnvMatrix variants share this output; +# every other counter is engine-agnostic. This run exercises refs_retargeted +# (the env param re-targeted to a different variable) and refs_from_siblings +# (added params/tasks restored from sibling references). +trace cat out.requests.txt | jq 'select(has("path") and .path == "/telemetry-ext") | .body.protoLogs[] | fromjson | .entry.databricks_cli_log.bundle_config_remote_sync_event | select(. != null) | del(.engine)' diff --git a/acceptance/bundle/config-remote-sync/resolve_variables/test.toml b/acceptance/bundle/config-remote-sync/resolve_variables/test.toml index 3174477f7d2..fff2325bf22 100644 --- a/acceptance/bundle/config-remote-sync/resolve_variables/test.toml +++ b/acceptance/bundle/config-remote-sync/resolve_variables/test.toml @@ -1,7 +1,11 @@ Cloud = true RequiresUnityCatalog = true -RecordRequests = false +# RecordRequests captures the config-remote-sync telemetry event so the script +# can assert the variable-reference restoration counts (refs_retargeted / +# refs_from_siblings). out.requests.txt is removed in the script's cleanup so it +# is not compared. +RecordRequests = true Ignore = [".databricks", "databricks.yml", "databricks.yml.backup"] [Env] diff --git a/acceptance/bundle/telemetry/config-remote-sync-recreate/databricks.yml b/acceptance/bundle/telemetry/config-remote-sync-recreate/databricks.yml new file mode 100644 index 00000000000..ba8a696ceb8 --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync-recreate/databricks.yml @@ -0,0 +1,16 @@ +bundle: + name: config-remote-sync-recreate + +resources: + pipelines: + foo: + name: test pipeline + # ingestion_definition.connection_name is immutable (recreate_on_changes): + # a remote change to it means the next deploy would delete + recreate the + # pipeline. (storage, the other immutable pipeline field, is skipped by + # configsync because the backend auto-generates it, so it can't be used + # here.) + ingestion_definition: + connection_name: my_connection + objects: + - {} diff --git a/acceptance/bundle/telemetry/config-remote-sync-recreate/out.test.toml b/acceptance/bundle/telemetry/config-remote-sync-recreate/out.test.toml new file mode 100644 index 00000000000..f7c4cf648a9 --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync-recreate/out.test.toml @@ -0,0 +1,4 @@ +Local = true +Cloud = false +GOOS.windows = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/telemetry/config-remote-sync-recreate/output.txt b/acceptance/bundle/telemetry/config-remote-sync-recreate/output.txt new file mode 100644 index 00000000000..c002f652ed3 --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync-recreate/output.txt @@ -0,0 +1,30 @@ + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/config-remote-sync-recreate/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] bundle config-remote-sync +Detected changes in 1 resource(s): + +Resource: resources.pipelines.foo + ingestion_definition.connection_name: replace + + + +>>> cat out.requests.txt +{ + "engine": "direct", + "changes_total": 1, + "replace_count": 1, + "recreate_forcing_changes": 1, + "resource_changes": [ + { + "resource_type": "pipelines", + "changes_count": 1, + "replace_count": 1 + } + ], + "files_changed_count": 1 +} diff --git a/acceptance/bundle/telemetry/config-remote-sync-recreate/script b/acceptance/bundle/telemetry/config-remote-sync-recreate/script new file mode 100644 index 00000000000..eb6c1773888 --- /dev/null +++ b/acceptance/bundle/telemetry/config-remote-sync-recreate/script @@ -0,0 +1,15 @@ +trace $CLI bundle deploy + +pipeline_id="$(read_id.py foo)" + +# Change the immutable ingestion_definition.connection_name remotely. +# config-remote-sync would sync this into config, so recreate_forcing_changes is 1. +edit_resource.py pipelines $pipeline_id < Date: Wed, 17 Jun 2026 10:07:53 +0200 Subject: [PATCH 7/7] config-remote-sync telemetry: never fail the command on telemetry errors Telemetry is best-effort and must not break config-remote-sync: - CollectChangeStats and LogTelemetry now defer a recover() that logs at debug and swallows any panic (e.g. the dresources.MustLoad* lifecycle-config loaders called from isRecreateForcing, or anything during proto build/upload). - RestoreStats counters are incremented through nil-safe incRetargeted / incFromSiblings methods, so threading the pointer through the deep restoration recursion can never nil-panic; the now-redundant nil guard in RestoreVariableReferences is removed. --- bundle/configsync/telemetry.go | 29 ++++++++++++++++++++++++++++- bundle/configsync/telemetry_test.go | 11 ++++++++++- bundle/configsync/variables.go | 10 ++++------ cmd/bundle/config_remote_sync.go | 2 +- 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/bundle/configsync/telemetry.go b/bundle/configsync/telemetry.go index cbc8ce70d81..fe776fd9fc9 100644 --- a/bundle/configsync/telemetry.go +++ b/bundle/configsync/telemetry.go @@ -8,11 +8,21 @@ import ( "github.com/databricks/cli/bundle/config/engine" "github.com/databricks/cli/bundle/direct/dresources" + "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/structs/structpath" "github.com/databricks/cli/libs/telemetry" "github.com/databricks/cli/libs/telemetry/protos" ) +// recoverTelemetry swallows any panic raised while collecting or emitting +// telemetry. Telemetry is best-effort and must never fail the command, so every +// telemetry entry point that runs in the command path defers this. +func recoverTelemetry(ctx context.Context) { + if r := recover(); r != nil { + log.Debugf(ctx, "config-remote-sync telemetry panicked and was skipped: %v", r) + } +} + // ErrStateSnapshotNotFound indicates the deployed state snapshot required for // change detection does not exist (the bundle was likely never deployed). var ErrStateSnapshotNotFound = errors.New("state snapshot not found") @@ -53,9 +63,25 @@ type RestoreStats struct { FromSiblings int64 } +// incRetargeted and incFromSiblings are nil-safe so that threading the pointer +// through the deep restoration recursion (or passing nil when counters are not +// needed) can never panic the command. +func (s *RestoreStats) incRetargeted() { + if s != nil { + s.Retargeted++ + } +} + +func (s *RestoreStats) incFromSiblings() { + if s != nil { + s.FromSiblings++ + } +} + // CollectChangeStats fills change counters from the raw (pre-restoration) // detected changes. -func (s *Stats) CollectChangeStats(changes Changes) { +func (s *Stats) CollectChangeStats(ctx context.Context, changes Changes) { + defer recoverTelemetry(ctx) if s.PerResourceType == nil { s.PerResourceType = make(map[string]*protos.BundleConfigRemoteSyncResourceChanges) } @@ -130,6 +156,7 @@ func resourceTypeFromKey(resourceKey string) string { // LogTelemetry emits the BundleConfigRemoteSyncEvent for this run. func (s *Stats) LogTelemetry(ctx context.Context) { + defer recoverTelemetry(ctx) resourceChanges := make([]protos.BundleConfigRemoteSyncResourceChanges, 0, len(s.PerResourceType)) for _, perType := range s.PerResourceType { resourceChanges = append(resourceChanges, *perType) diff --git a/bundle/configsync/telemetry_test.go b/bundle/configsync/telemetry_test.go index 78bc0f7677e..d2a3da537f8 100644 --- a/bundle/configsync/telemetry_test.go +++ b/bundle/configsync/telemetry_test.go @@ -28,7 +28,7 @@ func TestCollectChangeStats(t *testing.T) { } var stats Stats - stats.CollectChangeStats(changes) + stats.CollectChangeStats(t.Context(), changes) assert.Equal(t, int64(6), stats.ChangesTotal) assert.Equal(t, int64(2), stats.AddCount) @@ -97,3 +97,12 @@ func TestRestoreStatsCounters(t *testing.T) { assert.Equal(t, "hardcoded", result) assert.Equal(t, RestoreStats{}, none) } + +func TestRestoreStatsCountersNilSafe(t *testing.T) { + // Telemetry must never panic the command: counting on a nil pointer is a no-op. + var s *RestoreStats + assert.NotPanics(t, func() { + s.incRetargeted() + s.incFromSiblings() + }) +} diff --git a/bundle/configsync/variables.go b/bundle/configsync/variables.go index 67305c92444..4f02dac6537 100644 --- a/bundle/configsync/variables.go +++ b/bundle/configsync/variables.go @@ -52,11 +52,9 @@ var varPrefix = dyn.NewPath(dyn.Key("var")) // strings — enabling correct sibling lookup even for sequences split across // files via target overrides. // Restoration counts by mechanism are accumulated into stats (used for -// telemetry); pass nil when counters are not needed. +// telemetry); pass nil when counters are not needed (the counter methods are +// nil-safe). func RestoreVariableReferences(ctx context.Context, b *bundle.Bundle, fieldChanges []FieldChange, stats *RestoreStats) error { - if stats == nil { - stats = &RestoreStats{} - } preResolved := loadPreResolvedConfig(ctx, b) if !preResolved.IsValid() { return errors.New("pre-resolved config unavailable; variable-backed fields will be hardcoded") @@ -249,7 +247,7 @@ func restoreOriginalRefs(value any, preResolved, resolved dyn.Value, stats *Rest } if isPureVarRef(preResolved) { if ref, ok := matchAnyVariable(value, resolved); ok { - stats.Retargeted++ + stats.incRetargeted() return ref } } @@ -331,7 +329,7 @@ func restoreFromSiblingsAt(value any, siblings []dyn.Value, resolved dyn.Value, } if len(refs) == 1 { for ref := range refs { - stats.FromSiblings++ + stats.incFromSiblings() return ref } } diff --git a/cmd/bundle/config_remote_sync.go b/cmd/bundle/config_remote_sync.go index b5289984972..2192852497c 100644 --- a/cmd/bundle/config_remote_sync.go +++ b/cmd/bundle/config_remote_sync.go @@ -79,7 +79,7 @@ Examples: } return fmt.Errorf("failed to detect changes: %w", err) } - stats.CollectChangeStats(changes) + stats.CollectChangeStats(ctx, changes) fieldChanges, err := configsync.ResolveChanges(ctx, b, changes) if err != nil {