From 4066dd65f0bec721d206a934347c134a5a55d4dd Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 15 Jun 2026 14:24:24 +0000 Subject: [PATCH 01/13] direct: store serialized_dashboard/serialized_space in state as content hashes The direct deploy engine persists the full planned config per resource in resources.json. For dashboards and genie spaces, that includes the inlined serialized_dashboard / serialized_space contents, which routinely run into the hundreds of KB to several MB. These fields are never read back from state: drift is detected via etag (they are ignore_remote_changes), and a deploy always sends the contents from the plan's new_state, never from saved state. Add an optional CompactState adapter method that replaces such equality-only fields with a "sha256:" content hash. The framework applies it both before persisting state and to every value entering the diff, so stored and compared values share one form and unchanged content still produces an equal-hash skip. The plan's new_state (sent to the API) and the raw remote_state snapshot keep full content. No state version bump: legacy full-content state is hashed on read for the comparison and rewritten compactly on the next save. Co-authored-by: Isaac --- .../out.state_after_bind.direct.json | 2 +- .../migrate/dashboards/out.new_state.json | 2 +- .../dashboards/out.plan_after_migrate.json | 6 +-- .../detect-change/out.plan.direct.json | 6 +-- .../dashboards/simple/out.plan.direct.json | 6 +-- .../out.plan.direct.json | 6 +-- .../genie_spaces/inline/out.plan.json | 6 +-- .../genie_spaces/version_migration/output.txt | 3 +- .../genie_spaces/version_migration/script | 9 ++-- bundle/direct/apply.go | 18 +++++-- bundle/direct/bind.go | 11 +++- bundle/direct/bundle_apply.go | 2 +- bundle/direct/bundle_plan.go | 27 +++++++++- bundle/direct/dresources/README.md | 11 ++++ bundle/direct/dresources/adapter.go | 37 +++++++++++++ bundle/direct/dresources/dashboard.go | 16 ++++++ bundle/direct/dresources/dashboard_test.go | 47 ++++++++++++++++ bundle/direct/dresources/genie_space.go | 15 ++++++ bundle/direct/dresources/genie_space_test.go | 38 +++++++++++++ bundle/direct/dresources/state_compaction.go | 40 ++++++++++++++ .../dresources/state_compaction_test.go | 53 +++++++++++++++++++ 21 files changed, 331 insertions(+), 30 deletions(-) create mode 100644 bundle/direct/dresources/state_compaction.go create mode 100644 bundle/direct/dresources/state_compaction_test.go diff --git a/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json b/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json index 771dd3f908b..948a7e838a9 100644 --- a/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json +++ b/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json @@ -12,7 +12,7 @@ "etag": [ETAG], "parent_path": "/Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default/resources", "published": true, - "serialized_dashboard": "{\"pages\":[{\"displayName\":\"Page One\",\"name\":\"02724bf2\"}]}", + "serialized_dashboard": "sha256:[DASHBOARD_ID][DASHBOARD_ID]", "warehouse_id": "[TEST_DEFAULT_WAREHOUSE_ID]" } } diff --git a/acceptance/bundle/migrate/dashboards/out.new_state.json b/acceptance/bundle/migrate/dashboards/out.new_state.json index 695d14602a7..0cb70b8172c 100644 --- a/acceptance/bundle/migrate/dashboards/out.new_state.json +++ b/acceptance/bundle/migrate/dashboards/out.new_state.json @@ -12,7 +12,7 @@ "etag": "[NUMID]", "parent_path": "/Workspace/Users/[USERNAME]", "published": true, - "serialized_dashboard": "{\"pages\":[{\"name\":\"02724bf2\",\"displayName\":\"Dashboard test bundle-deploy-dashboard\"}]}\n", + "serialized_dashboard": "sha256:9dc171d249749a592c717bb13f2d948d2f1fa5530b82ac308a3df0c48d347ef3", "warehouse_id": "123456" } } diff --git a/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json b/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json index 6b55f64bd8d..3cdd9cc82f5 100644 --- a/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json +++ b/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json @@ -30,9 +30,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "{\"pages\":[{\"name\":\"02724bf2\",\"displayName\":\"Dashboard test bundle-deploy-dashboard\"}]}\n", - "new": "{\"pages\":[{\"name\":\"02724bf2\",\"displayName\":\"Dashboard test bundle-deploy-dashboard\"}]}\n", - "remote": "{\"pages\":[{\"displayName\":\"Dashboard test bundle-deploy-dashboard\",\"name\":\"02724bf2\",\"pageType\":\"PAGE_TYPE_CANVAS\"}]}\n" + "old": "sha256:9dc171d249749a592c717bb13f2d948d2f1fa5530b82ac308a3df0c48d347ef3", + "new": "sha256:9dc171d249749a592c717bb13f2d948d2f1fa5530b82ac308a3df0c48d347ef3", + "remote": "sha256:aae99d4284b3390b1b35974f7bd4c03603cacd6a5a4a838ca563f652257ab6a7" } } } diff --git a/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json b/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json index 7af15357c25..eaaafcb6c33 100644 --- a/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json @@ -39,9 +39,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "{\n \"pages\": [\n {\n \"displayName\": \"New Page\",\n \"layout\": [\n {\n \"position\": {\n \"height\": 2,\n \"width\": 6,\n \"x\": 0,\n \"y\": 0\n },\n \"widget\": {\n \"name\": \"82eb9107\",\n \"textbox_spec\": \"# I'm a title\"\n }\n },\n {\n \"position\": {\n \"height\": 2,\n \"width\": 6,\n \"x\": 0,\n \"y\": 2\n },\n \"widget\": {\n \"name\": \"ffa6de4f\",\n \"textbox_spec\": \"Text\"\n }\n }\n ],\n \"name\": \"fdd21a3c\"\n }\n ]\n}\n", - "new": "{\n \"pages\": [\n {\n \"displayName\": \"New Page\",\n \"layout\": [\n {\n \"position\": {\n \"height\": 2,\n \"width\": 6,\n \"x\": 0,\n \"y\": 0\n },\n \"widget\": {\n \"name\": \"82eb9107\",\n \"textbox_spec\": \"# I'm a title\"\n }\n },\n {\n \"position\": {\n \"height\": 2,\n \"width\": 6,\n \"x\": 0,\n \"y\": 2\n },\n \"widget\": {\n \"name\": \"ffa6de4f\",\n \"textbox_spec\": \"Text\"\n }\n }\n ],\n \"name\": \"fdd21a3c\"\n }\n ]\n}\n", - "remote": "{}\n" + "old": "sha256:[ALPHANUMID]", + "new": "sha256:[ALPHANUMID]", + "remote": "sha256:[ALPHANUMID]" } } } diff --git a/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json b/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json index 59ab070e00e..aaade035814 100644 --- a/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json @@ -30,9 +30,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "{ }\n", - "new": "{ }\n", - "remote": "{}\n" + "old": "sha256:cf207804e[NUMID]becbb5703765c4978cd7a8466dbdd[NUMID]e571ad09", + "new": "sha256:cf207804e[NUMID]becbb5703765c4978cd7a8466dbdd[NUMID]e571ad09", + "remote": "sha256:bb511d1e2723214ac1c3710d[NUMID]d57e31b0145d33d427d9628b209be38" } } } diff --git a/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json b/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json index 558a4ddcfd8..77174a8a0e9 100644 --- a/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json @@ -46,9 +46,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "{\"pages\":[{\"displayName\":\"Test Dashboard\",\"name\":\"test-page\"}]}", - "new": "{\"pages\":[{\"displayName\":\"Test Dashboard\",\"name\":\"test-page\"}]}", - "remote": "{\"pages\":[{\"displayName\":\"Test Dashboard\",\"name\":\"test-page\",\"pageType\":\"PAGE_TYPE_CANVAS\"}]}" + "old": "sha256:a196fded8b5e0ebe5af2c6bed934d90fe055644de7773bbd84c851b577ff4f19", + "new": "sha256:a196fded8b5e0ebe5af2c6bed934d90fe055644de7773bbd84c851b577ff4f19", + "remote": "sha256:df70601c54be8ac2a66ac4a01dbd5224f4fa20ca9e8d15a615beab794cfdca08" } } } diff --git a/acceptance/bundle/resources/genie_spaces/inline/out.plan.json b/acceptance/bundle/resources/genie_spaces/inline/out.plan.json index 975688268b2..3569622b6d0 100644 --- a/acceptance/bundle/resources/genie_spaces/inline/out.plan.json +++ b/acceptance/bundle/resources/genie_spaces/inline/out.plan.json @@ -24,9 +24,9 @@ "serialized_space": { "action": "skip", "reason": "etag_based", - "old": "{\"config\":{\"sample_questions\":[{\"id\":\"sq-001\",\"question\":[\"What is the total revenue?\"]}]},\"data_sources\":{\"tables\":[{\"column_configs\":[{\"column_name\":\"amount\",\"get_example_values\":true}],\"identifier\":\"main.sales.orders\"}]},\"version\":1}", - "new": "{\"config\":{\"sample_questions\":[{\"id\":\"sq-001\",\"question\":[\"What is the total revenue?\"]}]},\"data_sources\":{\"tables\":[{\"column_configs\":[{\"column_name\":\"amount\",\"get_example_values\":true}],\"identifier\":\"main.sales.orders\"}]},\"version\":1}", - "remote": "{\"config\":{\"sample_questions\":[{\"id\":\"sq-001\",\"question\":[\"What is the total revenue?\"]}]},\"data_sources\":{\"tables\":[{\"column_configs\":[{\"column_name\":\"amount\",\"get_example_values\":true}],\"identifier\":\"main.sales.orders\"}]},\"version\":2}" + "old": "sha256:ea4b505f061a2b65db49891ea5cab6ac875350aa7a8afed4778860a50d8f4db5", + "new": "sha256:ea4b505f061a2b65db49891ea5cab6ac875350aa7a8afed4778860a50d8f4db5", + "remote": "sha256:8df899b73697f19d25ed51f3a97ddce65f97d61cd7fc3b402aef349eb2fe93e8" } } } diff --git a/acceptance/bundle/resources/genie_spaces/version_migration/output.txt b/acceptance/bundle/resources/genie_spaces/version_migration/output.txt index e554b79825c..74443ccd812 100644 --- a/acceptance/bundle/resources/genie_spaces/version_migration/output.txt +++ b/acceptance/bundle/resources/genie_spaces/version_migration/output.txt @@ -12,8 +12,7 @@ Plan: 0 to add, 0 to change, 0 to delete, 1 unchanged === serialized_space: local version 1, remote version 2, skipped (etag_based){ "action": "skip", "reason": "etag_based", - "local_version": 1, - "remote_version": 2 + "local_remote_differ": true } >>> [CLI] bundle destroy --auto-approve diff --git a/acceptance/bundle/resources/genie_spaces/version_migration/script b/acceptance/bundle/resources/genie_spaces/version_migration/script index 141efd5bef6..116d81a72c4 100644 --- a/acceptance/bundle/resources/genie_spaces/version_migration/script +++ b/acceptance/bundle/resources/genie_spaces/version_migration/script @@ -18,9 +18,12 @@ trace $CLI bundle plan # The mismatch the etag masks: the local config stays version 1 while the remote # (and the etag guarding it) is version 2. The serialized_space change is # recorded but skipped via the etag_based rule. +# +# serialized_space is stored in state as a content hash (it is never compared +# against the remote value), so the plan reports old/new/remote as hashes, not +# the full content. We assert that local and remote differ and that the change is +# skipped, rather than parsing version numbers out of the content. title "serialized_space: local version 1, remote version 2, skipped (etag_based)" $CLI bundle plan -o json \ | jq '.plan["resources.genie_spaces.foo"].changes.serialized_space - | {action, reason, - local_version: (.new | fromjson | .version), - remote_version: (.remote | fromjson | .version)}' + | {action, reason, local_remote_differ: (.new != .remote)}' diff --git a/bundle/direct/apply.go b/bundle/direct/apply.go index a4a61f727f2..14d85ed6d2d 100644 --- a/bundle/direct/apply.go +++ b/bundle/direct/apply.go @@ -50,6 +50,16 @@ func (d *DeploymentUnit) Deploy(ctx context.Context, db *dstate.DeploymentState, } } +// saveState compacts the state (replacing large equality-only fields with content +// hashes, see Adapter.CompactState) before persisting it. +func (d *DeploymentUnit) saveState(db *dstate.DeploymentState, id string, newState any) error { + compacted, err := d.Adapter.CompactState(newState) + if err != nil { + return fmt.Errorf("compacting state: %w", err) + } + return db.SaveState(d.ResourceKey, id, compacted, d.DependsOn) +} + func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, newState any) error { var newID string var remoteState any @@ -75,7 +85,7 @@ func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, return err } - err = db.SaveState(d.ResourceKey, newID, newState, d.DependsOn) + err = d.saveState(db, newID, newState) if err != nil { return fmt.Errorf("saving state after creating id=%s: %w", newID, err) } @@ -146,7 +156,7 @@ func (d *DeploymentUnit) Update(ctx context.Context, db *dstate.DeploymentState, return err } - err = db.SaveState(d.ResourceKey, id, newState, d.DependsOn) + err = d.saveState(db, id, newState) if err != nil { return fmt.Errorf("saving state id=%s: %w", id, err) } @@ -190,7 +200,7 @@ func (d *DeploymentUnit) UpdateWithID(ctx context.Context, db *dstate.Deployment return err } - err = db.SaveState(d.ResourceKey, newID, newState, d.DependsOn) + err = d.saveState(db, newID, newState) if err != nil { return fmt.Errorf("saving state id=%s: %w", oldID, err) } @@ -252,7 +262,7 @@ func (d *DeploymentUnit) Resize(ctx context.Context, db *dstate.DeploymentState, return fmt.Errorf("resizing id=%s: %w", id, err) } - err = db.SaveState(d.ResourceKey, id, newState, d.DependsOn) + err = d.saveState(db, id, newState) if err != nil { return fmt.Errorf("saving state id=%s: %w", id, err) } diff --git a/bundle/direct/bind.go b/bundle/direct/bind.go index 9760ce95666..1b8e84591da 100644 --- a/bundle/direct/bind.go +++ b/bundle/direct/bind.go @@ -145,13 +145,22 @@ func (b *DeploymentBundle) Bind(ctx context.Context, client *databricks.Workspac } } + adapter, err := b.getAdapterForKey(resourceKey) + if err != nil { + return nil, err + } + compacted, err := adapter.CompactState(sv.Value) + if err != nil { + return nil, fmt.Errorf("compacting state: %w", err) + } + err = b.StateDB.Open(ctx, tmpStatePath, dstate.WithRecovery(true), dstate.WithWrite(true)) if err != nil { os.Remove(tmpStatePath) return nil, err } - err = b.StateDB.SaveState(resourceKey, resourceID, sv.Value, dependsOn) + err = b.StateDB.SaveState(resourceKey, resourceID, compacted, dependsOn) if err != nil { os.Remove(tmpStatePath) return nil, err diff --git a/bundle/direct/bundle_apply.go b/bundle/direct/bundle_apply.go index a9981ee63d8..8cf4565f170 100644 --- a/bundle/direct/bundle_apply.go +++ b/bundle/direct/bundle_apply.go @@ -130,7 +130,7 @@ func (b *DeploymentBundle) Apply(ctx context.Context, client *databricks.Workspa logdiag.LogError(ctx, fmt.Errorf("state entry not found for %q", resourceKey)) return false } - err = b.StateDB.SaveState(resourceKey, id, sv.Value, entry.DependsOn) + err = d.saveState(&b.StateDB, id, sv.Value) } else { // TODO: redo calcDiff to downgrade planned action if possible (?) err = d.Deploy(ctx, &b.StateDB, sv.Value, action, entry) diff --git a/bundle/direct/bundle_plan.go b/bundle/direct/bundle_plan.go index 552d2067cc3..187c26cbf19 100644 --- a/bundle/direct/bundle_plan.go +++ b/bundle/direct/bundle_plan.go @@ -197,6 +197,15 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks return false } + // Compact every value that enters the diff so they share one comparison form. + // For saved state this also hashes legacy full-content entries on the fly; the + // on-disk entry is rewritten compactly on the next save (lazy migration). + savedState, err = adapter.CompactState(savedState) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("%s: compacting saved state: %w", errorPrefix, err)) + return false + } + // Note, currently we're diffing static structs, not dynamic value. // This means for fields that contain references like ${resources.group.foo.id} we do one of the following: // for strings: comparing unresolved string like "${resoures.group.foo.id}" with actual object id. As long as IDs do not have ${...} format we're good. @@ -208,7 +217,15 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks logdiag.LogError(ctx, fmt.Errorf("%s: internal error: no state cache entry found for %q", errorPrefix, resourceKey)) return false } - localDiff, err := structdiff.GetStructDiff(savedState, sv.Value, adapter.KeyedSlices()) + + // Compact a copy for comparison only; sv.Value keeps the full contents, which + // the deploy sends to the API. + localState, err := adapter.CompactState(sv.Value) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("%s: compacting local state: %w", errorPrefix, err)) + return false + } + localDiff, err := structdiff.GetStructDiff(savedState, localState, adapter.KeyedSlices()) if err != nil { logdiag.LogError(ctx, fmt.Errorf("%s: diffing local state: %w", errorPrefix, err)) return false @@ -241,7 +258,13 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks return false } - remoteDiff, err = structdiff.GetStructDiff(remoteStateComparable, sv.Value, adapter.KeyedSlices()) + remoteStateComparable, err = adapter.CompactState(remoteStateComparable) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("%s: compacting remote state id=%q: %w", errorPrefix, dbentry.ID, err)) + return false + } + + remoteDiff, err = structdiff.GetStructDiff(remoteStateComparable, localState, adapter.KeyedSlices()) if err != nil { logdiag.LogError(ctx, fmt.Errorf("%s: diffing remote state: %w", errorPrefix, err)) return false diff --git a/bundle/direct/dresources/README.md b/bundle/direct/dresources/README.md index a20b68c4ebd..d904deac2d5 100644 --- a/bundle/direct/dresources/README.md +++ b/bundle/direct/dresources/README.md @@ -47,6 +47,17 @@ If the API may return a slice's elements in a different order between calls (e.g The state struct is serialized to JSON and persisted between deploys. Backward incompatible changes will result in a drift, which depending on field behaviour might result in recreate. See dstate/migrate.go on how to handle state migration. +## CompactState: storing large fields as content hashes + +Implement the optional `CompactState(state *T) (*T, error)` method when a field holds large content that is only ever compared for equality and never read back from state. It returns a copy of the state with such fields replaced by a content hash (use `hashStateValue`), and the framework applies it both before persisting state and to every value entering the diff, so stored and compared values share one form. + +A field qualifies only if **all** of the following hold: + - it is declared `ignore_remote_changes` (so it is never meaningfully compared against the remote value — typically `etag_based` drift detection), + - it is not read back from state by any code path (e.g. not consumed raw by `OverrideChangeDesc` or by state export), and + - it can be large (a small field gains nothing — the hash placeholder is ~70 bytes). + +`dashboards.serialized_dashboard` and `genie_spaces.serialized_space` use this: both inline a file's contents into state, both detect drift via `etag`, and both always send the full contents to the API from the plan's `new_state`. As a result the plan reports such fields as hashes (`sha256:...`) rather than full content. No state version bump is needed: legacy full-content state is hashed on read for comparison and rewritten compactly on the next save. Add a test asserting the field is `ignore_remote_changes` to guard the invariant. + ## OverrideChangeDesc Use `OverrideChangeDesc` only as a last resort when `resources.yml` settings cannot express the needed logic. Skipping an action with `change.Action = deployplan.Skip` in `OverrideChangeDesc` creates a silent no-op: the plan shows no change even if the user's config differs from remote. Document the skip reason clearly in both the comment and `change.Reason`. diff --git a/bundle/direct/dresources/adapter.go b/bundle/direct/dresources/adapter.go index 6891632b626..7e00cd03034 100644 --- a/bundle/direct/dresources/adapter.go +++ b/bundle/direct/dresources/adapter.go @@ -51,6 +51,13 @@ type IResource interface { // [Optional] OverrideChangeDesc can implement custom logic to update a given ChangeDesc; it is run last after built-in classifiers and field triggers. OverrideChangeDesc(ctx context.Context, path *structpath.PathNode, changedesc *ChangeDesc, remoteState any) error + // [Optional] CompactState returns a copy of state with large, equality-only fields + // (e.g. dashboards' serialized_dashboard) replaced by content hashes before the state + // is persisted and diffed. Must be idempotent. Only valid for fields that are + // ignore_remote_changes and never read back from state. + // Example: func (r *ResourceDashboard) CompactState(state *DashboardState) (*DashboardState, error) + CompactState(state any) (any, error) + // DoCreate creates a new resource from the newState. Returns id of the resource and optionally remote state. // If remote state is available as part of the operation, return it; otherwise return nil. // Example: func (r *ResourceVolume) DoCreate(ctx context.Context, newState *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) @@ -102,6 +109,7 @@ type Adapter struct { waitAfterUpdate *calladapt.BoundCaller waitAfterDelete *calladapt.BoundCaller overrideChangeDesc *calladapt.BoundCaller + compactState *calladapt.BoundCaller doResize *calladapt.BoundCaller resourceConfig *ResourceLifecycleConfig @@ -135,6 +143,7 @@ func NewAdapter(typedNil any, resourceType string, client *databricks.WorkspaceC waitAfterUpdate: nil, waitAfterDelete: nil, overrideChangeDesc: nil, + compactState: nil, resourceConfig: GetResourceConfig(resourceType), generatedResourceConfig: GetGeneratedResourceConfig(resourceType), keyedSlices: nil, @@ -226,6 +235,11 @@ func (a *Adapter) initMethods(resource any) error { return err } + a.compactState, err = calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), "CompactState") + if err != nil { + return err + } + a.doResize, err = calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), "DoResize") if err != nil { return err @@ -339,6 +353,13 @@ func (a *Adapter) validate() error { validations = append(validations, "WaitAfterUpdate remoteState return", a.waitAfterUpdate.OutTypes[0], remoteType) } + if a.compactState != nil { + validations = append(validations, + "CompactState input", a.compactState.InTypes[0], stateType, + "CompactState return", a.compactState.OutTypes[0], stateType, + ) + } + err = validateTypes(validations...) if err != nil { return err @@ -556,6 +577,22 @@ func (a *Adapter) KeyedSlices() map[string]any { return a.keyedSlices } +// CompactState returns a copy of state with large, equality-only fields replaced by +// content hashes for compact storage, or state unchanged if the resource does not +// implement CompactState. It is applied both before persisting state and to every +// value entering the state diff, so stored and compared values share one form. +func (a *Adapter) CompactState(state any) (any, error) { + if a.compactState == nil { + return state, nil + } + + outs, err := a.compactState.Call(state) + if err != nil { + return nil, err + } + return outs[0], nil +} + // prepareCallRequired prepares a call and ensures the method is found. func prepareCallRequired(resource any, methodName string) (*calladapt.BoundCaller, error) { caller, err := calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), methodName) diff --git a/bundle/direct/dresources/dashboard.go b/bundle/direct/dresources/dashboard.go index dbf492a0bee..67fd8364a07 100644 --- a/bundle/direct/dresources/dashboard.go +++ b/bundle/direct/dresources/dashboard.go @@ -106,6 +106,22 @@ func (r *ResourceDashboard) RemapState(state *DashboardState) *DashboardState { } } +// CompactState replaces the inlined serialized_dashboard contents with a content +// hash before the state is persisted. The full contents are never needed back from +// state: remote drift is detected via etag (serialized_dashboard is ignore_remote_changes, +// see resources.yml), and a deploy always sends the contents from the plan's new_state, +// never from saved state. Hashing keeps resources.json small for dashboards whose +// serialized form can be several megabytes. +func (r *ResourceDashboard) CompactState(state *DashboardState) (*DashboardState, error) { + hashed, err := hashStateValue(state.SerializedDashboard) + if err != nil { + return nil, err + } + compacted := *state + compacted.SerializedDashboard = hashed + return &compacted, nil +} + func (r *ResourceDashboard) DoRead(ctx context.Context, id string) (*DashboardState, error) { var dashboard *dashboards.Dashboard var publishedDashboard *dashboards.PublishedDashboard diff --git a/bundle/direct/dresources/dashboard_test.go b/bundle/direct/dresources/dashboard_test.go index 177e1236221..2b1ece93566 100644 --- a/bundle/direct/dresources/dashboard_test.go +++ b/bundle/direct/dresources/dashboard_test.go @@ -2,9 +2,11 @@ package dresources import ( "encoding/json" + "strings" "testing" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/structs/structpath" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -23,3 +25,48 @@ func TestDashboardState_JSONSerialization_PublishedField(t *testing.T) { assert.Contains(t, string(data), `"published":true`) } + +func TestDashboardCompactState(t *testing.T) { + r := &ResourceDashboard{} + state := &DashboardState{ + DashboardConfig: resources.DashboardConfig{ + DisplayName: "test-dashboard", + Etag: "etag-123", + SerializedDashboard: `{"pages":[{"name":"p1"}]}`, + }, + } + + compacted, err := r.CompactState(state) + require.NoError(t, err) + + // serialized_dashboard is replaced by a content hash; other fields are preserved. + require.IsType(t, "", compacted.SerializedDashboard) + assert.True(t, strings.HasPrefix(compacted.SerializedDashboard.(string), stateHashPrefix)) + assert.Equal(t, "test-dashboard", compacted.DisplayName) + assert.Equal(t, "etag-123", compacted.Etag) + + // The original state is not mutated. + assert.Equal(t, `{"pages":[{"name":"p1"}]}`, state.SerializedDashboard) + + // Compacting is idempotent. + again, err := r.CompactState(compacted) + require.NoError(t, err) + assert.Equal(t, compacted.SerializedDashboard, again.SerializedDashboard) +} + +// TestDashboardSerializedDashboardIsIgnoreRemoteChanges guards the SHA-only invariant: +// because serialized_dashboard is stored as a content hash, it must never be compared +// against the (full-content) remote value, i.e. it must be declared ignore_remote_changes. +func TestDashboardSerializedDashboardIsIgnoreRemoteChanges(t *testing.T) { + cfg := GetResourceConfig("dashboards") + path := structpath.NewStringKey(nil, "serialized_dashboard") + + found := false + for _, rule := range cfg.IgnoreRemoteChanges { + if path.HasPatternPrefix(rule.Field) { + found = true + break + } + } + assert.True(t, found, "serialized_dashboard must be ignore_remote_changes for SHA-only state to be correct") +} diff --git a/bundle/direct/dresources/genie_space.go b/bundle/direct/dresources/genie_space.go index 16aa0a57e8a..fcb4f8b5905 100644 --- a/bundle/direct/dresources/genie_space.go +++ b/bundle/direct/dresources/genie_space.go @@ -58,6 +58,21 @@ func (r *ResourceGenieSpace) RemapState(remote *resources.GenieSpaceConfig) *res } } +// CompactState replaces the inlined serialized_space contents with a content hash +// before the state is persisted. As with dashboards, the full contents are never +// needed back from state: drift is detected via etag (serialized_space is +// ignore_remote_changes, see resources.yml), and a deploy always sends the contents +// from the plan's new_state, never from saved state. +func (r *ResourceGenieSpace) CompactState(state *resources.GenieSpaceConfig) (*resources.GenieSpaceConfig, error) { + hashed, err := hashStateValue(state.SerializedSpace) + if err != nil { + return nil, err + } + compacted := *state + compacted.SerializedSpace = hashed + return &compacted, nil +} + func (r *ResourceGenieSpace) DoRead(ctx context.Context, id string) (*resources.GenieSpaceConfig, error) { space, err := r.client.Genie.GetSpace(ctx, dashboards.GenieGetSpaceRequest{ SpaceId: id, diff --git a/bundle/direct/dresources/genie_space_test.go b/bundle/direct/dresources/genie_space_test.go index f77289083d3..c298d6f7ebd 100644 --- a/bundle/direct/dresources/genie_space_test.go +++ b/bundle/direct/dresources/genie_space_test.go @@ -1,6 +1,7 @@ package dresources import ( + "strings" "testing" "github.com/databricks/cli/bundle/config/resources" @@ -199,3 +200,40 @@ func TestGenieSpaceOverrideChangeDescEtag(t *testing.T) { assert.Equal(t, deployplan.Update, change.Action) }) } + +func TestGenieSpaceCompactState(t *testing.T) { + r := &ResourceGenieSpace{} + state := &resources.GenieSpaceConfig{ + Title: "test-space", + Etag: "etag-7", + SerializedSpace: `{"datasets":[{"name":"d1"}]}`, + } + + compacted, err := r.CompactState(state) + require.NoError(t, err) + + require.IsType(t, "", compacted.SerializedSpace) + assert.True(t, strings.HasPrefix(compacted.SerializedSpace.(string), stateHashPrefix)) + assert.Equal(t, "test-space", compacted.Title) + assert.Equal(t, "etag-7", compacted.Etag) + + // The original state is not mutated. + assert.Equal(t, `{"datasets":[{"name":"d1"}]}`, state.SerializedSpace) +} + +// TestGenieSpaceSerializedSpaceIsIgnoreRemoteChanges guards the SHA-only invariant: +// serialized_space is stored as a content hash, so it must never be compared against +// the remote value, i.e. it must be declared ignore_remote_changes. +func TestGenieSpaceSerializedSpaceIsIgnoreRemoteChanges(t *testing.T) { + cfg := GetResourceConfig("genie_spaces") + path := structpath.NewStringKey(nil, "serialized_space") + + found := false + for _, rule := range cfg.IgnoreRemoteChanges { + if path.HasPatternPrefix(rule.Field) { + found = true + break + } + } + assert.True(t, found, "serialized_space must be ignore_remote_changes for SHA-only state to be correct") +} diff --git a/bundle/direct/dresources/state_compaction.go b/bundle/direct/dresources/state_compaction.go new file mode 100644 index 00000000000..691255137c1 --- /dev/null +++ b/bundle/direct/dresources/state_compaction.go @@ -0,0 +1,40 @@ +package dresources + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "strings" +) + +// stateHashPrefix marks a state value that holds a content hash instead of the +// full content. It is part of the on-disk state format, so changing it is a +// backward-incompatible change. +const stateHashPrefix = "sha256:" + +// hashStateValue returns a content-hash placeholder ("sha256:") over the JSON +// encoding of v. It is used to store large, equality-only fields (e.g. a dashboard's +// serialized_dashboard) compactly in state instead of their full contents. +// +// It is idempotent and stable: nil, an empty string, and a value that is already a +// placeholder are returned unchanged, so re-compacting an already-compact state and +// comparing a fresh config against stored state both behave predictably. +func hashStateValue(v any) (any, error) { + if s, ok := v.(string); ok { + if s == "" || strings.HasPrefix(s, stateHashPrefix) { + return v, nil + } + } + if v == nil { + return v, nil + } + + // json.Marshal is deterministic (map keys are sorted), so equal content always + // produces an equal hash across runs and platforms. + data, err := json.Marshal(v) + if err != nil { + return nil, err + } + sum := sha256.Sum256(data) + return stateHashPrefix + hex.EncodeToString(sum[:]), nil +} diff --git a/bundle/direct/dresources/state_compaction_test.go b/bundle/direct/dresources/state_compaction_test.go new file mode 100644 index 00000000000..d2c5d8b6846 --- /dev/null +++ b/bundle/direct/dresources/state_compaction_test.go @@ -0,0 +1,53 @@ +package dresources + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestHashStateValue(t *testing.T) { + stringHash, err := hashStateValue("hello") + require.NoError(t, err) + require.IsType(t, "", stringHash) + assert.True(t, strings.HasPrefix(stringHash.(string), stateHashPrefix)) + + // Same content always hashes to the same value. + again, err := hashStateValue("hello") + require.NoError(t, err) + assert.Equal(t, stringHash, again) + + // Different content hashes differently. + other, err := hashStateValue("world") + require.NoError(t, err) + assert.NotEqual(t, stringHash, other) + + // A map hashes deterministically regardless of literal key order. + mapHash, err := hashStateValue(map[string]any{"a": 1, "b": 2}) + require.NoError(t, err) + mapHash2, err := hashStateValue(map[string]any{"b": 2, "a": 1}) + require.NoError(t, err) + assert.Equal(t, mapHash, mapHash2) +} + +func TestHashStateValueIdempotent(t *testing.T) { + hashed, err := hashStateValue("some content") + require.NoError(t, err) + + // Re-hashing a placeholder returns it unchanged. + again, err := hashStateValue(hashed) + require.NoError(t, err) + assert.Equal(t, hashed, again) +} + +func TestHashStateValueEmptyAndNil(t *testing.T) { + empty, err := hashStateValue("") + require.NoError(t, err) + assert.Empty(t, empty) + + null, err := hashStateValue(nil) + require.NoError(t, err) + assert.Nil(t, null) +} From c3ea69cdd2a0eb33cd47e9cfed4796be4a3c22c8 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 19 Jun 2026 15:22:39 +0000 Subject: [PATCH 02/13] acceptance: assert state-stores-hash and API-gets-content invariants together Extend the genie_spaces/serialized_space test so a single deploy asserts both sides of the SHA-only-state contract on the same state: - the create requests carry the full serialized_space (existing capture, plus an explicit "no sha256 in the request" guard), and - the persisted state stores only the sha256 content hash, never the content. Co-authored-by: Isaac --- .../resources/genie_spaces/serialized_space/output.txt | 3 +++ .../resources/genie_spaces/serialized_space/script | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/acceptance/bundle/resources/genie_spaces/serialized_space/output.txt b/acceptance/bundle/resources/genie_spaces/serialized_space/output.txt index ed1a5d8c214..18a13be3574 100644 --- a/acceptance/bundle/resources/genie_spaces/serialized_space/output.txt +++ b/acceptance/bundle/resources/genie_spaces/serialized_space/output.txt @@ -5,6 +5,9 @@ Deploying resources... Updating deployment state... Deployment complete! +=== State stores the serialized_space content hash, not the contentjson.state.resources.genie_spaces.from_file.state.serialized_space = "sha256:[NUMID]fd6bf4666354bd7e47234b226b1a6a9c72e2d56019fdd1bf8d3aa852"; +json.state.resources.genie_spaces.from_inline.state.serialized_space = "sha256:a98de2fbe715467b789896e79e[NUMID]c07e2cc764b3cd80bf39f517eceffb"; + >>> [CLI] bundle destroy --auto-approve The following resources will be deleted: delete resources.genie_spaces.from_file diff --git a/acceptance/bundle/resources/genie_spaces/serialized_space/script b/acceptance/bundle/resources/genie_spaces/serialized_space/script index bf5aea4c21d..17c79686741 100644 --- a/acceptance/bundle/resources/genie_spaces/serialized_space/script +++ b/acceptance/bundle/resources/genie_spaces/serialized_space/script @@ -19,3 +19,12 @@ jq -s '[.[] | select(.method == "POST" and .path == "/api/2.0/genie/spaces") parsed: (.body.serialized_space | fromjson)}] | sort_by(.title)' out.requests.txt > out.create_requests.json rm -f out.requests.txt + +# Invariant: the create requests above carry the full serialized_space, while the +# persisted state stores only its sha256 content hash (serialized_space is never +# compared against the remote value, so the full content is not kept in state). +# Assert both halves against the same deploy: state has the hash and not the inline +# content; the create requests have the content and no hash. +title "State stores the serialized_space content hash, not the content" +print_state.py | gron.py | grep serialized_space | contains.py 'sha256:' '!main.sales.orders' +contains.py '!sha256:' < out.create_requests.json > /dev/null From 6c396a3228c22d6857fc092352b1f2fabd3d5973 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 19 Jun 2026 15:35:34 +0000 Subject: [PATCH 03/13] bundle/direct: clean up temp state file on bind compaction error paths The two error paths added for state compaction in Bind (adapter lookup and CompactState) returned without removing the temp state file, unlike every other error path in the function. Add os.Remove(tmpStatePath) to match. Co-authored-by: Isaac --- bundle/direct/bind.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bundle/direct/bind.go b/bundle/direct/bind.go index 1b8e84591da..a447ab6aed3 100644 --- a/bundle/direct/bind.go +++ b/bundle/direct/bind.go @@ -147,10 +147,12 @@ func (b *DeploymentBundle) Bind(ctx context.Context, client *databricks.Workspac adapter, err := b.getAdapterForKey(resourceKey) if err != nil { + os.Remove(tmpStatePath) return nil, err } compacted, err := adapter.CompactState(sv.Value) if err != nil { + os.Remove(tmpStatePath) return nil, fmt.Errorf("compacting state: %w", err) } From fd65c7f39cc886ebb5669328a54090e7db180884 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 22 Jun 2026 08:18:44 +0000 Subject: [PATCH 04/13] acceptance: render serialized_dashboard/serialized_space state hash as one token The generic test replacements split the persisted sha256 digest into pieces: [0-9a-f]{32} turned it into "[DASHBOARD_ID][DASHBOARD_ID]" in the bind test, and [0-9]{3,} fragmented it as "sha256:...[NUMID]..." in several plan outputs. Add a single low-Order replacement in acceptance/bundle/test.toml that collapses the 64-char digest to "sha256:[ALPHANUMID]" before those rules run, so every state and plan output renders the hash consistently. Also add the missing newline after the state-assertion title in the genie serialized_space test. Co-authored-by: Isaac --- .../recreation/out.state_after_bind.direct.json | 2 +- acceptance/bundle/migrate/dashboards/out.new_state.json | 2 +- .../migrate/dashboards/out.plan_after_migrate.json | 6 +++--- .../resources/dashboards/simple/out.plan.direct.json | 6 +++--- .../unpublish-out-of-band/out.plan.direct.json | 6 +++--- .../bundle/resources/genie_spaces/inline/out.plan.json | 6 +++--- .../resources/genie_spaces/serialized_space/output.txt | 5 +++-- .../resources/genie_spaces/serialized_space/script | 2 +- acceptance/bundle/test.toml | 9 +++++++++ 9 files changed, 27 insertions(+), 17 deletions(-) diff --git a/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json b/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json index 948a7e838a9..77cc6eb24ff 100644 --- a/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json +++ b/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json @@ -12,7 +12,7 @@ "etag": [ETAG], "parent_path": "/Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default/resources", "published": true, - "serialized_dashboard": "sha256:[DASHBOARD_ID][DASHBOARD_ID]", + "serialized_dashboard": "sha256:[ALPHANUMID]", "warehouse_id": "[TEST_DEFAULT_WAREHOUSE_ID]" } } diff --git a/acceptance/bundle/migrate/dashboards/out.new_state.json b/acceptance/bundle/migrate/dashboards/out.new_state.json index 0cb70b8172c..3708743b439 100644 --- a/acceptance/bundle/migrate/dashboards/out.new_state.json +++ b/acceptance/bundle/migrate/dashboards/out.new_state.json @@ -12,7 +12,7 @@ "etag": "[NUMID]", "parent_path": "/Workspace/Users/[USERNAME]", "published": true, - "serialized_dashboard": "sha256:9dc171d249749a592c717bb13f2d948d2f1fa5530b82ac308a3df0c48d347ef3", + "serialized_dashboard": "sha256:[ALPHANUMID]", "warehouse_id": "123456" } } diff --git a/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json b/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json index 3cdd9cc82f5..31013aa14b0 100644 --- a/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json +++ b/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json @@ -30,9 +30,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "sha256:9dc171d249749a592c717bb13f2d948d2f1fa5530b82ac308a3df0c48d347ef3", - "new": "sha256:9dc171d249749a592c717bb13f2d948d2f1fa5530b82ac308a3df0c48d347ef3", - "remote": "sha256:aae99d4284b3390b1b35974f7bd4c03603cacd6a5a4a838ca563f652257ab6a7" + "old": "sha256:[ALPHANUMID]", + "new": "sha256:[ALPHANUMID]", + "remote": "sha256:[ALPHANUMID]" } } } diff --git a/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json b/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json index aaade035814..1c738b9242e 100644 --- a/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json @@ -30,9 +30,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "sha256:cf207804e[NUMID]becbb5703765c4978cd7a8466dbdd[NUMID]e571ad09", - "new": "sha256:cf207804e[NUMID]becbb5703765c4978cd7a8466dbdd[NUMID]e571ad09", - "remote": "sha256:bb511d1e2723214ac1c3710d[NUMID]d57e31b0145d33d427d9628b209be38" + "old": "sha256:[ALPHANUMID]", + "new": "sha256:[ALPHANUMID]", + "remote": "sha256:[ALPHANUMID]" } } } diff --git a/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json b/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json index 77174a8a0e9..2ab6c41cbb1 100644 --- a/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json @@ -46,9 +46,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "sha256:a196fded8b5e0ebe5af2c6bed934d90fe055644de7773bbd84c851b577ff4f19", - "new": "sha256:a196fded8b5e0ebe5af2c6bed934d90fe055644de7773bbd84c851b577ff4f19", - "remote": "sha256:df70601c54be8ac2a66ac4a01dbd5224f4fa20ca9e8d15a615beab794cfdca08" + "old": "sha256:[ALPHANUMID]", + "new": "sha256:[ALPHANUMID]", + "remote": "sha256:[ALPHANUMID]" } } } diff --git a/acceptance/bundle/resources/genie_spaces/inline/out.plan.json b/acceptance/bundle/resources/genie_spaces/inline/out.plan.json index 3569622b6d0..911e5409b6e 100644 --- a/acceptance/bundle/resources/genie_spaces/inline/out.plan.json +++ b/acceptance/bundle/resources/genie_spaces/inline/out.plan.json @@ -24,9 +24,9 @@ "serialized_space": { "action": "skip", "reason": "etag_based", - "old": "sha256:ea4b505f061a2b65db49891ea5cab6ac875350aa7a8afed4778860a50d8f4db5", - "new": "sha256:ea4b505f061a2b65db49891ea5cab6ac875350aa7a8afed4778860a50d8f4db5", - "remote": "sha256:8df899b73697f19d25ed51f3a97ddce65f97d61cd7fc3b402aef349eb2fe93e8" + "old": "sha256:[ALPHANUMID]", + "new": "sha256:[ALPHANUMID]", + "remote": "sha256:[ALPHANUMID]" } } } diff --git a/acceptance/bundle/resources/genie_spaces/serialized_space/output.txt b/acceptance/bundle/resources/genie_spaces/serialized_space/output.txt index 18a13be3574..b13e80729cc 100644 --- a/acceptance/bundle/resources/genie_spaces/serialized_space/output.txt +++ b/acceptance/bundle/resources/genie_spaces/serialized_space/output.txt @@ -5,8 +5,9 @@ Deploying resources... Updating deployment state... Deployment complete! -=== State stores the serialized_space content hash, not the contentjson.state.resources.genie_spaces.from_file.state.serialized_space = "sha256:[NUMID]fd6bf4666354bd7e47234b226b1a6a9c72e2d56019fdd1bf8d3aa852"; -json.state.resources.genie_spaces.from_inline.state.serialized_space = "sha256:a98de2fbe715467b789896e79e[NUMID]c07e2cc764b3cd80bf39f517eceffb"; +=== State stores the serialized_space content hash, not the content: +json.state.resources.genie_spaces.from_file.state.serialized_space = "sha256:[ALPHANUMID]"; +json.state.resources.genie_spaces.from_inline.state.serialized_space = "sha256:[ALPHANUMID]"; >>> [CLI] bundle destroy --auto-approve The following resources will be deleted: diff --git a/acceptance/bundle/resources/genie_spaces/serialized_space/script b/acceptance/bundle/resources/genie_spaces/serialized_space/script index 17c79686741..89c716f0619 100644 --- a/acceptance/bundle/resources/genie_spaces/serialized_space/script +++ b/acceptance/bundle/resources/genie_spaces/serialized_space/script @@ -25,6 +25,6 @@ rm -f out.requests.txt # compared against the remote value, so the full content is not kept in state). # Assert both halves against the same deploy: state has the hash and not the inline # content; the create requests have the content and no hash. -title "State stores the serialized_space content hash, not the content" +title "State stores the serialized_space content hash, not the content:\n" print_state.py | gron.py | grep serialized_space | contains.py 'sha256:' '!main.sales.orders' contains.py '!sha256:' < out.create_requests.json > /dev/null diff --git a/acceptance/bundle/test.toml b/acceptance/bundle/test.toml index fca8259a3b5..3439585bd1a 100644 --- a/acceptance/bundle/test.toml +++ b/acceptance/bundle/test.toml @@ -22,3 +22,12 @@ New = 'os/[OS]' [[Repls]] Old = ' cicd/github' New = '' + +# serialized_dashboard / serialized_space are persisted in direct-engine state as a +# content hash. Collapse the 64-char digest to a single token, at a low Order so it +# runs before the generic [NUMID] and [DASHBOARD_ID] rules that would otherwise split +# the hex into pieces. +[[Repls]] +Old = 'sha256:[0-9a-f]{64}' +New = 'sha256:[ALPHANUMID]' +Order = -1 From f747ba4d7a34eeba893b185f3825fc8183d9b790 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 22 Jun 2026 10:52:09 +0000 Subject: [PATCH 05/13] Revert genie_spaces changes to scope this PR to dashboards serialized_space gets the same treatment in a follow-up; keeping this PR to dashboards only. Co-authored-by: Isaac --- .../genie_spaces/inline/out.plan.json | 6 +-- .../genie_spaces/serialized_space/output.txt | 4 -- .../genie_spaces/serialized_space/script | 9 ----- .../genie_spaces/version_migration/output.txt | 3 +- .../genie_spaces/version_migration/script | 9 ++--- bundle/direct/dresources/genie_space.go | 15 -------- bundle/direct/dresources/genie_space_test.go | 38 ------------------- 7 files changed, 8 insertions(+), 76 deletions(-) diff --git a/acceptance/bundle/resources/genie_spaces/inline/out.plan.json b/acceptance/bundle/resources/genie_spaces/inline/out.plan.json index 911e5409b6e..975688268b2 100644 --- a/acceptance/bundle/resources/genie_spaces/inline/out.plan.json +++ b/acceptance/bundle/resources/genie_spaces/inline/out.plan.json @@ -24,9 +24,9 @@ "serialized_space": { "action": "skip", "reason": "etag_based", - "old": "sha256:[ALPHANUMID]", - "new": "sha256:[ALPHANUMID]", - "remote": "sha256:[ALPHANUMID]" + "old": "{\"config\":{\"sample_questions\":[{\"id\":\"sq-001\",\"question\":[\"What is the total revenue?\"]}]},\"data_sources\":{\"tables\":[{\"column_configs\":[{\"column_name\":\"amount\",\"get_example_values\":true}],\"identifier\":\"main.sales.orders\"}]},\"version\":1}", + "new": "{\"config\":{\"sample_questions\":[{\"id\":\"sq-001\",\"question\":[\"What is the total revenue?\"]}]},\"data_sources\":{\"tables\":[{\"column_configs\":[{\"column_name\":\"amount\",\"get_example_values\":true}],\"identifier\":\"main.sales.orders\"}]},\"version\":1}", + "remote": "{\"config\":{\"sample_questions\":[{\"id\":\"sq-001\",\"question\":[\"What is the total revenue?\"]}]},\"data_sources\":{\"tables\":[{\"column_configs\":[{\"column_name\":\"amount\",\"get_example_values\":true}],\"identifier\":\"main.sales.orders\"}]},\"version\":2}" } } } diff --git a/acceptance/bundle/resources/genie_spaces/serialized_space/output.txt b/acceptance/bundle/resources/genie_spaces/serialized_space/output.txt index b13e80729cc..ed1a5d8c214 100644 --- a/acceptance/bundle/resources/genie_spaces/serialized_space/output.txt +++ b/acceptance/bundle/resources/genie_spaces/serialized_space/output.txt @@ -5,10 +5,6 @@ Deploying resources... Updating deployment state... Deployment complete! -=== State stores the serialized_space content hash, not the content: -json.state.resources.genie_spaces.from_file.state.serialized_space = "sha256:[ALPHANUMID]"; -json.state.resources.genie_spaces.from_inline.state.serialized_space = "sha256:[ALPHANUMID]"; - >>> [CLI] bundle destroy --auto-approve The following resources will be deleted: delete resources.genie_spaces.from_file diff --git a/acceptance/bundle/resources/genie_spaces/serialized_space/script b/acceptance/bundle/resources/genie_spaces/serialized_space/script index 89c716f0619..bf5aea4c21d 100644 --- a/acceptance/bundle/resources/genie_spaces/serialized_space/script +++ b/acceptance/bundle/resources/genie_spaces/serialized_space/script @@ -19,12 +19,3 @@ jq -s '[.[] | select(.method == "POST" and .path == "/api/2.0/genie/spaces") parsed: (.body.serialized_space | fromjson)}] | sort_by(.title)' out.requests.txt > out.create_requests.json rm -f out.requests.txt - -# Invariant: the create requests above carry the full serialized_space, while the -# persisted state stores only its sha256 content hash (serialized_space is never -# compared against the remote value, so the full content is not kept in state). -# Assert both halves against the same deploy: state has the hash and not the inline -# content; the create requests have the content and no hash. -title "State stores the serialized_space content hash, not the content:\n" -print_state.py | gron.py | grep serialized_space | contains.py 'sha256:' '!main.sales.orders' -contains.py '!sha256:' < out.create_requests.json > /dev/null diff --git a/acceptance/bundle/resources/genie_spaces/version_migration/output.txt b/acceptance/bundle/resources/genie_spaces/version_migration/output.txt index 74443ccd812..e554b79825c 100644 --- a/acceptance/bundle/resources/genie_spaces/version_migration/output.txt +++ b/acceptance/bundle/resources/genie_spaces/version_migration/output.txt @@ -12,7 +12,8 @@ Plan: 0 to add, 0 to change, 0 to delete, 1 unchanged === serialized_space: local version 1, remote version 2, skipped (etag_based){ "action": "skip", "reason": "etag_based", - "local_remote_differ": true + "local_version": 1, + "remote_version": 2 } >>> [CLI] bundle destroy --auto-approve diff --git a/acceptance/bundle/resources/genie_spaces/version_migration/script b/acceptance/bundle/resources/genie_spaces/version_migration/script index 116d81a72c4..141efd5bef6 100644 --- a/acceptance/bundle/resources/genie_spaces/version_migration/script +++ b/acceptance/bundle/resources/genie_spaces/version_migration/script @@ -18,12 +18,9 @@ trace $CLI bundle plan # The mismatch the etag masks: the local config stays version 1 while the remote # (and the etag guarding it) is version 2. The serialized_space change is # recorded but skipped via the etag_based rule. -# -# serialized_space is stored in state as a content hash (it is never compared -# against the remote value), so the plan reports old/new/remote as hashes, not -# the full content. We assert that local and remote differ and that the change is -# skipped, rather than parsing version numbers out of the content. title "serialized_space: local version 1, remote version 2, skipped (etag_based)" $CLI bundle plan -o json \ | jq '.plan["resources.genie_spaces.foo"].changes.serialized_space - | {action, reason, local_remote_differ: (.new != .remote)}' + | {action, reason, + local_version: (.new | fromjson | .version), + remote_version: (.remote | fromjson | .version)}' diff --git a/bundle/direct/dresources/genie_space.go b/bundle/direct/dresources/genie_space.go index fcb4f8b5905..16aa0a57e8a 100644 --- a/bundle/direct/dresources/genie_space.go +++ b/bundle/direct/dresources/genie_space.go @@ -58,21 +58,6 @@ func (r *ResourceGenieSpace) RemapState(remote *resources.GenieSpaceConfig) *res } } -// CompactState replaces the inlined serialized_space contents with a content hash -// before the state is persisted. As with dashboards, the full contents are never -// needed back from state: drift is detected via etag (serialized_space is -// ignore_remote_changes, see resources.yml), and a deploy always sends the contents -// from the plan's new_state, never from saved state. -func (r *ResourceGenieSpace) CompactState(state *resources.GenieSpaceConfig) (*resources.GenieSpaceConfig, error) { - hashed, err := hashStateValue(state.SerializedSpace) - if err != nil { - return nil, err - } - compacted := *state - compacted.SerializedSpace = hashed - return &compacted, nil -} - func (r *ResourceGenieSpace) DoRead(ctx context.Context, id string) (*resources.GenieSpaceConfig, error) { space, err := r.client.Genie.GetSpace(ctx, dashboards.GenieGetSpaceRequest{ SpaceId: id, diff --git a/bundle/direct/dresources/genie_space_test.go b/bundle/direct/dresources/genie_space_test.go index c298d6f7ebd..f77289083d3 100644 --- a/bundle/direct/dresources/genie_space_test.go +++ b/bundle/direct/dresources/genie_space_test.go @@ -1,7 +1,6 @@ package dresources import ( - "strings" "testing" "github.com/databricks/cli/bundle/config/resources" @@ -200,40 +199,3 @@ func TestGenieSpaceOverrideChangeDescEtag(t *testing.T) { assert.Equal(t, deployplan.Update, change.Action) }) } - -func TestGenieSpaceCompactState(t *testing.T) { - r := &ResourceGenieSpace{} - state := &resources.GenieSpaceConfig{ - Title: "test-space", - Etag: "etag-7", - SerializedSpace: `{"datasets":[{"name":"d1"}]}`, - } - - compacted, err := r.CompactState(state) - require.NoError(t, err) - - require.IsType(t, "", compacted.SerializedSpace) - assert.True(t, strings.HasPrefix(compacted.SerializedSpace.(string), stateHashPrefix)) - assert.Equal(t, "test-space", compacted.Title) - assert.Equal(t, "etag-7", compacted.Etag) - - // The original state is not mutated. - assert.Equal(t, `{"datasets":[{"name":"d1"}]}`, state.SerializedSpace) -} - -// TestGenieSpaceSerializedSpaceIsIgnoreRemoteChanges guards the SHA-only invariant: -// serialized_space is stored as a content hash, so it must never be compared against -// the remote value, i.e. it must be declared ignore_remote_changes. -func TestGenieSpaceSerializedSpaceIsIgnoreRemoteChanges(t *testing.T) { - cfg := GetResourceConfig("genie_spaces") - path := structpath.NewStringKey(nil, "serialized_space") - - found := false - for _, rule := range cfg.IgnoreRemoteChanges { - if path.HasPatternPrefix(rule.Field) { - found = true - break - } - } - assert.True(t, found, "serialized_space must be ignore_remote_changes for SHA-only state to be correct") -} From b6a6afacdeb36cf14c6f968e14746d3de43418cc Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 22 Jun 2026 11:03:02 +0000 Subject: [PATCH 06/13] direct: replace CompactState adapter method with a resources.yml declaration The per-resource CompactState(state any) (any, error) adapter method (plus its IResource entry and calladapt plumbing) is removed. Instead, fields persisted as a content hash are declared under hashed_in_state in resources.yml, and a generic dresources.CompactState(cfg, state) hashes them via structaccess. Behaviour is unchanged: new_state still carries the full content (so deploy and deploy --plan send the real bytes), and only resources.json + the diff use the hash. dashboards.serialized_dashboard is the sole declared field; acceptance outputs are identical. Co-authored-by: Isaac --- bundle/direct/apply.go | 6 +-- bundle/direct/bind.go | 3 +- bundle/direct/bundle_plan.go | 6 +-- bundle/direct/dresources/README.md | 6 +-- bundle/direct/dresources/adapter.go | 37 -------------- bundle/direct/dresources/config.go | 7 +++ bundle/direct/dresources/dashboard.go | 16 ------- bundle/direct/dresources/dashboard_test.go | 31 ++++++------ bundle/direct/dresources/resources.yml | 7 +++ bundle/direct/dresources/state_compaction.go | 48 +++++++++++++++++++ .../dresources/state_compaction_test.go | 15 ++++++ 11 files changed, 105 insertions(+), 77 deletions(-) diff --git a/bundle/direct/apply.go b/bundle/direct/apply.go index 14d85ed6d2d..036c70d0a41 100644 --- a/bundle/direct/apply.go +++ b/bundle/direct/apply.go @@ -50,10 +50,10 @@ func (d *DeploymentUnit) Deploy(ctx context.Context, db *dstate.DeploymentState, } } -// saveState compacts the state (replacing large equality-only fields with content -// hashes, see Adapter.CompactState) before persisting it. +// saveState compacts the state (replacing fields declared hashed_in_state with content +// hashes, see dresources.CompactState) before persisting it. func (d *DeploymentUnit) saveState(db *dstate.DeploymentState, id string, newState any) error { - compacted, err := d.Adapter.CompactState(newState) + compacted, err := dresources.CompactState(d.Adapter.ResourceConfig(), newState) if err != nil { return fmt.Errorf("compacting state: %w", err) } diff --git a/bundle/direct/bind.go b/bundle/direct/bind.go index a447ab6aed3..3d074faf64b 100644 --- a/bundle/direct/bind.go +++ b/bundle/direct/bind.go @@ -8,6 +8,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/deployplan" + "github.com/databricks/cli/bundle/direct/dresources" "github.com/databricks/cli/bundle/direct/dstate" "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/structs/structaccess" @@ -150,7 +151,7 @@ func (b *DeploymentBundle) Bind(ctx context.Context, client *databricks.Workspac os.Remove(tmpStatePath) return nil, err } - compacted, err := adapter.CompactState(sv.Value) + compacted, err := dresources.CompactState(adapter.ResourceConfig(), sv.Value) if err != nil { os.Remove(tmpStatePath) return nil, fmt.Errorf("compacting state: %w", err) diff --git a/bundle/direct/bundle_plan.go b/bundle/direct/bundle_plan.go index 187c26cbf19..e1a8daa067f 100644 --- a/bundle/direct/bundle_plan.go +++ b/bundle/direct/bundle_plan.go @@ -200,7 +200,7 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks // Compact every value that enters the diff so they share one comparison form. // For saved state this also hashes legacy full-content entries on the fly; the // on-disk entry is rewritten compactly on the next save (lazy migration). - savedState, err = adapter.CompactState(savedState) + savedState, err = dresources.CompactState(adapter.ResourceConfig(), savedState) if err != nil { logdiag.LogError(ctx, fmt.Errorf("%s: compacting saved state: %w", errorPrefix, err)) return false @@ -220,7 +220,7 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks // Compact a copy for comparison only; sv.Value keeps the full contents, which // the deploy sends to the API. - localState, err := adapter.CompactState(sv.Value) + localState, err := dresources.CompactState(adapter.ResourceConfig(), sv.Value) if err != nil { logdiag.LogError(ctx, fmt.Errorf("%s: compacting local state: %w", errorPrefix, err)) return false @@ -258,7 +258,7 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks return false } - remoteStateComparable, err = adapter.CompactState(remoteStateComparable) + remoteStateComparable, err = dresources.CompactState(adapter.ResourceConfig(), remoteStateComparable) if err != nil { logdiag.LogError(ctx, fmt.Errorf("%s: compacting remote state id=%q: %w", errorPrefix, dbentry.ID, err)) return false diff --git a/bundle/direct/dresources/README.md b/bundle/direct/dresources/README.md index d904deac2d5..63b7fbc79a4 100644 --- a/bundle/direct/dresources/README.md +++ b/bundle/direct/dresources/README.md @@ -47,16 +47,16 @@ If the API may return a slice's elements in a different order between calls (e.g The state struct is serialized to JSON and persisted between deploys. Backward incompatible changes will result in a drift, which depending on field behaviour might result in recreate. See dstate/migrate.go on how to handle state migration. -## CompactState: storing large fields as content hashes +## hashed_in_state: storing large fields as content hashes -Implement the optional `CompactState(state *T) (*T, error)` method when a field holds large content that is only ever compared for equality and never read back from state. It returns a copy of the state with such fields replaced by a content hash (use `hashStateValue`), and the framework applies it both before persisting state and to every value entering the diff, so stored and compared values share one form. +Declare a field under `hashed_in_state` in `resources.yml` when it holds large content that is only ever compared for equality and never read back from state. The engine then persists only a `sha256:` content hash for that field (via `CompactState`, applied both before saving state and to every value entering the diff), so stored and compared values share one form. The full contents stay in the plan's `new_state` and are sent to the API on every deploy, so the deploy is unaffected. A field qualifies only if **all** of the following hold: - it is declared `ignore_remote_changes` (so it is never meaningfully compared against the remote value — typically `etag_based` drift detection), - it is not read back from state by any code path (e.g. not consumed raw by `OverrideChangeDesc` or by state export), and - it can be large (a small field gains nothing — the hash placeholder is ~70 bytes). -`dashboards.serialized_dashboard` and `genie_spaces.serialized_space` use this: both inline a file's contents into state, both detect drift via `etag`, and both always send the full contents to the API from the plan's `new_state`. As a result the plan reports such fields as hashes (`sha256:...`) rather than full content. No state version bump is needed: legacy full-content state is hashed on read for comparison and rewritten compactly on the next save. Add a test asserting the field is `ignore_remote_changes` to guard the invariant. +`dashboards.serialized_dashboard` uses this: it inlines a file's contents into config, detects drift via `etag`, and always sends the full contents to the API from the plan's `new_state`. As a result the plan reports the field as a hash (`sha256:...`) rather than full content. No state version bump is needed: legacy full-content state is hashed on read for comparison and rewritten compactly on the next save. Add a test asserting the field is declared both `hashed_in_state` and `ignore_remote_changes` to guard the invariant. ## OverrideChangeDesc diff --git a/bundle/direct/dresources/adapter.go b/bundle/direct/dresources/adapter.go index 7e00cd03034..6891632b626 100644 --- a/bundle/direct/dresources/adapter.go +++ b/bundle/direct/dresources/adapter.go @@ -51,13 +51,6 @@ type IResource interface { // [Optional] OverrideChangeDesc can implement custom logic to update a given ChangeDesc; it is run last after built-in classifiers and field triggers. OverrideChangeDesc(ctx context.Context, path *structpath.PathNode, changedesc *ChangeDesc, remoteState any) error - // [Optional] CompactState returns a copy of state with large, equality-only fields - // (e.g. dashboards' serialized_dashboard) replaced by content hashes before the state - // is persisted and diffed. Must be idempotent. Only valid for fields that are - // ignore_remote_changes and never read back from state. - // Example: func (r *ResourceDashboard) CompactState(state *DashboardState) (*DashboardState, error) - CompactState(state any) (any, error) - // DoCreate creates a new resource from the newState. Returns id of the resource and optionally remote state. // If remote state is available as part of the operation, return it; otherwise return nil. // Example: func (r *ResourceVolume) DoCreate(ctx context.Context, newState *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) @@ -109,7 +102,6 @@ type Adapter struct { waitAfterUpdate *calladapt.BoundCaller waitAfterDelete *calladapt.BoundCaller overrideChangeDesc *calladapt.BoundCaller - compactState *calladapt.BoundCaller doResize *calladapt.BoundCaller resourceConfig *ResourceLifecycleConfig @@ -143,7 +135,6 @@ func NewAdapter(typedNil any, resourceType string, client *databricks.WorkspaceC waitAfterUpdate: nil, waitAfterDelete: nil, overrideChangeDesc: nil, - compactState: nil, resourceConfig: GetResourceConfig(resourceType), generatedResourceConfig: GetGeneratedResourceConfig(resourceType), keyedSlices: nil, @@ -235,11 +226,6 @@ func (a *Adapter) initMethods(resource any) error { return err } - a.compactState, err = calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), "CompactState") - if err != nil { - return err - } - a.doResize, err = calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), "DoResize") if err != nil { return err @@ -353,13 +339,6 @@ func (a *Adapter) validate() error { validations = append(validations, "WaitAfterUpdate remoteState return", a.waitAfterUpdate.OutTypes[0], remoteType) } - if a.compactState != nil { - validations = append(validations, - "CompactState input", a.compactState.InTypes[0], stateType, - "CompactState return", a.compactState.OutTypes[0], stateType, - ) - } - err = validateTypes(validations...) if err != nil { return err @@ -577,22 +556,6 @@ func (a *Adapter) KeyedSlices() map[string]any { return a.keyedSlices } -// CompactState returns a copy of state with large, equality-only fields replaced by -// content hashes for compact storage, or state unchanged if the resource does not -// implement CompactState. It is applied both before persisting state and to every -// value entering the state diff, so stored and compared values share one form. -func (a *Adapter) CompactState(state any) (any, error) { - if a.compactState == nil { - return state, nil - } - - outs, err := a.compactState.Call(state) - if err != nil { - return nil, err - } - return outs[0], nil -} - // prepareCallRequired prepares a call and ensures the method is found. func prepareCallRequired(resource any, methodName string) (*calladapt.BoundCaller, error) { caller, err := calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), methodName) diff --git a/bundle/direct/dresources/config.go b/bundle/direct/dresources/config.go index a425d7d7208..6976c0a1696 100644 --- a/bundle/direct/dresources/config.go +++ b/bundle/direct/dresources/config.go @@ -70,6 +70,12 @@ type ResourceLifecycleConfig struct { // BackendDefaults: fields where the backend may set defaults. // When old and new are nil but remote is set, and the remote value matches allowed values (if specified), the change is skipped. BackendDefaults []BackendDefaultRule `yaml:"backend_defaults,omitempty"` + + // HashedInState: fields persisted to state as a content hash ("sha256:") + // instead of their full contents. Only valid for large, equality-only fields + // that are never read back from state (e.g. dashboards' serialized_dashboard, + // which is ignore_remote_changes and re-sent from config on every deploy). + HashedInState []FieldRule `yaml:"hashed_in_state,omitempty"` } // Config is the root configuration structure for resource lifecycle behavior. @@ -91,6 +97,7 @@ var empty = ResourceLifecycleConfig{ NormalizeCase: nil, NormalizeSlash: nil, BackendDefaults: nil, + HashedInState: nil, } func mustParseConfig(data []byte) func() *Config { diff --git a/bundle/direct/dresources/dashboard.go b/bundle/direct/dresources/dashboard.go index 67fd8364a07..dbf492a0bee 100644 --- a/bundle/direct/dresources/dashboard.go +++ b/bundle/direct/dresources/dashboard.go @@ -106,22 +106,6 @@ func (r *ResourceDashboard) RemapState(state *DashboardState) *DashboardState { } } -// CompactState replaces the inlined serialized_dashboard contents with a content -// hash before the state is persisted. The full contents are never needed back from -// state: remote drift is detected via etag (serialized_dashboard is ignore_remote_changes, -// see resources.yml), and a deploy always sends the contents from the plan's new_state, -// never from saved state. Hashing keeps resources.json small for dashboards whose -// serialized form can be several megabytes. -func (r *ResourceDashboard) CompactState(state *DashboardState) (*DashboardState, error) { - hashed, err := hashStateValue(state.SerializedDashboard) - if err != nil { - return nil, err - } - compacted := *state - compacted.SerializedDashboard = hashed - return &compacted, nil -} - func (r *ResourceDashboard) DoRead(ctx context.Context, id string) (*DashboardState, error) { var dashboard *dashboards.Dashboard var publishedDashboard *dashboards.PublishedDashboard diff --git a/bundle/direct/dresources/dashboard_test.go b/bundle/direct/dresources/dashboard_test.go index 2b1ece93566..0a75096c800 100644 --- a/bundle/direct/dresources/dashboard_test.go +++ b/bundle/direct/dresources/dashboard_test.go @@ -27,7 +27,6 @@ func TestDashboardState_JSONSerialization_PublishedField(t *testing.T) { } func TestDashboardCompactState(t *testing.T) { - r := &ResourceDashboard{} state := &DashboardState{ DashboardConfig: resources.DashboardConfig{ DisplayName: "test-dashboard", @@ -36,8 +35,9 @@ func TestDashboardCompactState(t *testing.T) { }, } - compacted, err := r.CompactState(state) + out, err := CompactState(GetResourceConfig("dashboards"), state) require.NoError(t, err) + compacted := out.(*DashboardState) // serialized_dashboard is replaced by a content hash; other fields are preserved. require.IsType(t, "", compacted.SerializedDashboard) @@ -49,24 +49,27 @@ func TestDashboardCompactState(t *testing.T) { assert.Equal(t, `{"pages":[{"name":"p1"}]}`, state.SerializedDashboard) // Compacting is idempotent. - again, err := r.CompactState(compacted) + out2, err := CompactState(GetResourceConfig("dashboards"), compacted) require.NoError(t, err) - assert.Equal(t, compacted.SerializedDashboard, again.SerializedDashboard) + assert.Equal(t, compacted.SerializedDashboard, out2.(*DashboardState).SerializedDashboard) } -// TestDashboardSerializedDashboardIsIgnoreRemoteChanges guards the SHA-only invariant: -// because serialized_dashboard is stored as a content hash, it must never be compared -// against the (full-content) remote value, i.e. it must be declared ignore_remote_changes. -func TestDashboardSerializedDashboardIsIgnoreRemoteChanges(t *testing.T) { +// TestDashboardSerializedDashboardStateRules guards the SHA-only invariant. The field +// must be declared hashed_in_state (so it is persisted as a hash) and, because the hash +// can never equal the full-content remote value, it must also be ignore_remote_changes. +func TestDashboardSerializedDashboardStateRules(t *testing.T) { cfg := GetResourceConfig("dashboards") path := structpath.NewStringKey(nil, "serialized_dashboard") - found := false - for _, rule := range cfg.IgnoreRemoteChanges { - if path.HasPatternPrefix(rule.Field) { - found = true - break + hasRule := func(rules []FieldRule) bool { + for _, rule := range rules { + if path.HasPatternPrefix(rule.Field) { + return true + } } + return false } - assert.True(t, found, "serialized_dashboard must be ignore_remote_changes for SHA-only state to be correct") + + assert.True(t, hasRule(cfg.HashedInState), "serialized_dashboard must be declared hashed_in_state") + assert.True(t, hasRule(cfg.IgnoreRemoteChanges), "serialized_dashboard must be ignore_remote_changes for SHA-only state to be correct") } diff --git a/bundle/direct/dresources/resources.yml b/bundle/direct/dresources/resources.yml index 9c12da9f39b..65965472662 100644 --- a/bundle/direct/dresources/resources.yml +++ b/bundle/direct/dresources/resources.yml @@ -382,6 +382,13 @@ resources: - field: dataset_schema reason: input_only + hashed_in_state: + # serialized_dashboard holds the inlined dashboard JSON (often megabytes). It is + # ignore_remote_changes (etag_based) and re-sent from config on every deploy, so it + # is never read back from state; persist only its content hash to keep state small. + - field: serialized_dashboard + reason: large_content + genie_spaces: ignore_remote_changes: # serialized_space locally (structured YAML) and remotely (JSON string) will differ diff --git a/bundle/direct/dresources/state_compaction.go b/bundle/direct/dresources/state_compaction.go index 691255137c1..419e6ee1f47 100644 --- a/bundle/direct/dresources/state_compaction.go +++ b/bundle/direct/dresources/state_compaction.go @@ -4,7 +4,12 @@ import ( "crypto/sha256" "encoding/hex" "encoding/json" + "errors" + "fmt" + "reflect" "strings" + + "github.com/databricks/cli/libs/structs/structaccess" ) // stateHashPrefix marks a state value that holds a content hash instead of the @@ -38,3 +43,46 @@ func hashStateValue(v any) (any, error) { sum := sha256.Sum256(data) return stateHashPrefix + hex.EncodeToString(sum[:]), nil } + +// CompactState returns a copy of state with every field declared in cfg.HashedInState +// replaced by a content hash, so the state persists only the hash and not the full +// contents. It is applied both before persisting state and to every value entering the +// state diff, so stored and compared values share one form. The caller's value is never +// mutated (it is reused for the deploy API call, which needs the full contents). +// +// Returns state unchanged when no fields are declared or state is not a non-nil pointer. +func CompactState(cfg *ResourceLifecycleConfig, state any) (any, error) { + if cfg == nil || len(cfg.HashedInState) == 0 { + return state, nil + } + + rv := reflect.ValueOf(state) + if rv.Kind() != reflect.Pointer || rv.IsNil() { + return state, nil + } + + // Shallow copy so the caller's value (reused for the deploy) is untouched. + out := reflect.New(rv.Type().Elem()) + out.Elem().Set(rv.Elem()) + compacted := out.Interface() + + for _, rule := range cfg.HashedInState { + field := rule.Field.String() + current, err := structaccess.GetByString(compacted, field) + if err != nil { + if _, ok := errors.AsType[*structaccess.NotFoundError](err); ok { + continue + } + return nil, fmt.Errorf("compacting state field %q: %w", field, err) + } + hashed, err := hashStateValue(current) + if err != nil { + return nil, fmt.Errorf("compacting state field %q: %w", field, err) + } + if err := structaccess.SetByString(compacted, field, hashed); err != nil { + return nil, fmt.Errorf("compacting state field %q: %w", field, err) + } + } + + return compacted, nil +} diff --git a/bundle/direct/dresources/state_compaction_test.go b/bundle/direct/dresources/state_compaction_test.go index d2c5d8b6846..9fd6611cc8b 100644 --- a/bundle/direct/dresources/state_compaction_test.go +++ b/bundle/direct/dresources/state_compaction_test.go @@ -4,10 +4,25 @@ import ( "strings" "testing" + "github.com/databricks/cli/bundle/config/resources" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) +func TestCompactStateNoDeclaredFields(t *testing.T) { + state := &DashboardState{DashboardConfig: resources.DashboardConfig{SerializedDashboard: `{"a":1}`}} + + // A resource type with no hashed_in_state declaration returns the value untouched. + out, err := CompactState(GetResourceConfig("jobs"), state) + require.NoError(t, err) + assert.Same(t, state, out.(*DashboardState)) + + // A nil config is also a no-op. + out, err = CompactState(nil, state) + require.NoError(t, err) + assert.Same(t, state, out.(*DashboardState)) +} + func TestHashStateValue(t *testing.T) { stringHash, err := hashStateValue("hello") require.NoError(t, err) From 3fc40adaa2d765687310ecfc470873fe2f3a2c7f Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 22 Jun 2026 11:52:38 +0000 Subject: [PATCH 07/13] direct: clarify the compact-saved-state comment in CalculatePlan Spell out that hashing the saved state on read is what lets state written before this change (or before a field was added to hashed_in_state) still diff correctly against the now-hashed local config, and that it is read-only normalization (no state_version bump). Link the PR. Co-authored-by: Isaac --- bundle/direct/bundle_plan.go | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/bundle/direct/bundle_plan.go b/bundle/direct/bundle_plan.go index e1a8daa067f..36a019ffa81 100644 --- a/bundle/direct/bundle_plan.go +++ b/bundle/direct/bundle_plan.go @@ -197,9 +197,22 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks return false } - // Compact every value that enters the diff so they share one comparison form. - // For saved state this also hashes legacy full-content entries on the fly; the - // on-disk entry is rewritten compactly on the next save (lazy migration). + // Replace the hashed_in_state fields with their "sha256:..." hash in the state we + // just read off disk, so the diff below compares like-for-like. + // + // We only ever keep a content hash for big fields like serialized_dashboard, never + // the full contents. The new config we diff against (localState, below) is hashed + // too, so the saved side has to be hashed as well or the two could never match. The + // state we read might still hold the full, un-hashed contents though: either it was + // written by an older CLI from before this change, or the field was only just added + // to hashed_in_state. Hashing it here, on read, lines the two sides up so an + // unchanged resource correctly shows "no change". + // + // This only changes the in-memory copy used for the diff. The on-disk entry keeps + // its full contents until the resource is next saved (which rewrites it as a hash), + // so no state_version bump or explicit migration is needed. + // + // See https://github.com/databricks/cli/pull/5609 savedState, err = dresources.CompactState(adapter.ResourceConfig(), savedState) if err != nil { logdiag.LogError(ctx, fmt.Errorf("%s: compacting saved state: %w", errorPrefix, err)) From 6361754f1c940f13b537771f2a3976e0d825c4d6 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 22 Jun 2026 12:01:54 +0000 Subject: [PATCH 08/13] direct: don't compact remote state in the diff; clearer hash token; invariant test - Drop the CompactState call on the remapped remote state in CalculatePlan. Hashing only ever helps the local diff (saved vs config); a hashed_in_state field is always ignore_remote_changes, so its remote diff is discarded regardless. The plan now shows the real remote value instead of a meaningless remote hash. - Mask the state hash in acceptance output as "sha256:[HASH]" instead of the generic "[ALPHANUMID]", so it clearly reads as a content hash. - Add TestHashedInStateImpliesIgnoreRemoteChanges enforcing, for every resource, that any hashed_in_state field is also ignore_remote_changes (a hashed field can only detect local changes; its remote diff must be ignored or it would drift forever). Co-authored-by: Isaac --- .../out.state_after_bind.direct.json | 2 +- .../migrate/dashboards/out.new_state.json | 2 +- .../dashboards/out.plan_after_migrate.json | 6 ++-- .../detect-change/out.plan.direct.json | 6 ++-- .../dashboards/simple/out.plan.direct.json | 6 ++-- .../out.plan.direct.json | 6 ++-- acceptance/bundle/test.toml | 10 +++---- bundle/direct/bundle_plan.go | 10 +++---- .../dresources/state_compaction_test.go | 29 +++++++++++++++++++ 9 files changed, 52 insertions(+), 25 deletions(-) diff --git a/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json b/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json index 77cc6eb24ff..586e33472fb 100644 --- a/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json +++ b/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json @@ -12,7 +12,7 @@ "etag": [ETAG], "parent_path": "/Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default/resources", "published": true, - "serialized_dashboard": "sha256:[ALPHANUMID]", + "serialized_dashboard": "sha256:[HASH]", "warehouse_id": "[TEST_DEFAULT_WAREHOUSE_ID]" } } diff --git a/acceptance/bundle/migrate/dashboards/out.new_state.json b/acceptance/bundle/migrate/dashboards/out.new_state.json index 3708743b439..b2e70881a8e 100644 --- a/acceptance/bundle/migrate/dashboards/out.new_state.json +++ b/acceptance/bundle/migrate/dashboards/out.new_state.json @@ -12,7 +12,7 @@ "etag": "[NUMID]", "parent_path": "/Workspace/Users/[USERNAME]", "published": true, - "serialized_dashboard": "sha256:[ALPHANUMID]", + "serialized_dashboard": "sha256:[HASH]", "warehouse_id": "123456" } } diff --git a/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json b/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json index 31013aa14b0..130e63ac632 100644 --- a/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json +++ b/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json @@ -30,9 +30,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "sha256:[ALPHANUMID]", - "new": "sha256:[ALPHANUMID]", - "remote": "sha256:[ALPHANUMID]" + "old": "sha256:[HASH]", + "new": "sha256:[HASH]", + "remote": "{\"pages\":[{\"displayName\":\"Dashboard test bundle-deploy-dashboard\",\"name\":\"02724bf2\",\"pageType\":\"PAGE_TYPE_CANVAS\"}]}\n" } } } diff --git a/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json b/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json index eaaafcb6c33..e23e7e58268 100644 --- a/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json @@ -39,9 +39,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "sha256:[ALPHANUMID]", - "new": "sha256:[ALPHANUMID]", - "remote": "sha256:[ALPHANUMID]" + "old": "sha256:[HASH]", + "new": "sha256:[HASH]", + "remote": "{}\n" } } } diff --git a/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json b/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json index 1c738b9242e..63a22eb7d08 100644 --- a/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json @@ -30,9 +30,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "sha256:[ALPHANUMID]", - "new": "sha256:[ALPHANUMID]", - "remote": "sha256:[ALPHANUMID]" + "old": "sha256:[HASH]", + "new": "sha256:[HASH]", + "remote": "{}\n" } } } diff --git a/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json b/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json index 2ab6c41cbb1..ee588c887d1 100644 --- a/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json @@ -46,9 +46,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "sha256:[ALPHANUMID]", - "new": "sha256:[ALPHANUMID]", - "remote": "sha256:[ALPHANUMID]" + "old": "sha256:[HASH]", + "new": "sha256:[HASH]", + "remote": "{\"pages\":[{\"displayName\":\"Test Dashboard\",\"name\":\"test-page\",\"pageType\":\"PAGE_TYPE_CANVAS\"}]}" } } } diff --git a/acceptance/bundle/test.toml b/acceptance/bundle/test.toml index 3439585bd1a..fb81209776e 100644 --- a/acceptance/bundle/test.toml +++ b/acceptance/bundle/test.toml @@ -23,11 +23,11 @@ New = 'os/[OS]' Old = ' cicd/github' New = '' -# serialized_dashboard / serialized_space are persisted in direct-engine state as a -# content hash. Collapse the 64-char digest to a single token, at a low Order so it -# runs before the generic [NUMID] and [DASHBOARD_ID] rules that would otherwise split -# the hex into pieces. +# serialized_dashboard is persisted in direct-engine state as a content hash. Mask the +# 64-char digest with a hash-specific token (not the generic [ALPHANUMID]) so the output +# reads clearly as a sha256 hash, at a low Order so it runs before the generic [NUMID] / +# [DASHBOARD_ID] rules that would otherwise split the hex into pieces. [[Repls]] Old = 'sha256:[0-9a-f]{64}' -New = 'sha256:[ALPHANUMID]' +New = 'sha256:[HASH]' Order = -1 diff --git a/bundle/direct/bundle_plan.go b/bundle/direct/bundle_plan.go index 36a019ffa81..a2381bf07aa 100644 --- a/bundle/direct/bundle_plan.go +++ b/bundle/direct/bundle_plan.go @@ -271,12 +271,10 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks return false } - remoteStateComparable, err = dresources.CompactState(adapter.ResourceConfig(), remoteStateComparable) - if err != nil { - logdiag.LogError(ctx, fmt.Errorf("%s: compacting remote state id=%q: %w", errorPrefix, dbentry.ID, err)) - return false - } - + // remoteStateComparable is intentionally NOT compacted. Hashing only ever + // helps the local diff (saved vs config); a hashed_in_state field is always + // ignore_remote_changes, so its remote diff is discarded regardless. Leaving + // the remote value uncompacted keeps the real server value visible in the plan. remoteDiff, err = structdiff.GetStructDiff(remoteStateComparable, localState, adapter.KeyedSlices()) if err != nil { logdiag.LogError(ctx, fmt.Errorf("%s: diffing remote state: %w", errorPrefix, err)) diff --git a/bundle/direct/dresources/state_compaction_test.go b/bundle/direct/dresources/state_compaction_test.go index 9fd6611cc8b..d06529548a4 100644 --- a/bundle/direct/dresources/state_compaction_test.go +++ b/bundle/direct/dresources/state_compaction_test.go @@ -5,10 +5,39 @@ import ( "testing" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/structs/structpath" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) +// TestHashedInStateImpliesIgnoreRemoteChanges enforces the core invariant of the +// hashed_in_state mechanism: a field stored as a content hash can only ever detect a +// LOCAL change (saved-hash vs config-hash). Its hash never equals the full-content +// remote value, so its remote diff is meaningless and must be discarded — i.e. the +// field must also be ignore_remote_changes, otherwise every plan would report a +// permanent spurious update for it. This holds for every resource, not just dashboards. +func TestHashedInStateImpliesIgnoreRemoteChanges(t *testing.T) { + for _, cfg := range []*Config{MustLoadConfig(), MustLoadGeneratedConfig()} { + for resourceType, rc := range cfg.Resources { + for _, hashed := range rc.HashedInState { + path, err := structpath.ParsePath(hashed.Field.String()) + require.NoError(t, err, "%s: invalid hashed_in_state field %q", resourceType, hashed.Field) + + covered := false + for _, ignore := range rc.IgnoreRemoteChanges { + if path.HasPatternPrefix(ignore.Field) { + covered = true + break + } + } + assert.True(t, covered, + "%s: field %q is hashed_in_state but not ignore_remote_changes (a hashed field only detects local changes; its remote diff must be ignored)", + resourceType, hashed.Field) + } + } + } +} + func TestCompactStateNoDeclaredFields(t *testing.T) { state := &DashboardState{DashboardConfig: resources.DashboardConfig{SerializedDashboard: `{"a":1}`}} From 8854393a5fca2c4796db4160c3f68b31632f87c6 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 22 Jun 2026 12:34:12 +0000 Subject: [PATCH 09/13] direct: make hashed_in_state orthogonal to ignore_remote_changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hash the remapped remote on hashed_in_state fields too, so a hashed field is a hash on all three sides of the diff (saved, config, remote). Once the saved value is a hash, every comparison must be hash-vs-hash — including remote drift (Remote vs New) — or it is hash-vs-content nonsense. With the remote hashed, remote drift is detected as hash != hash, so a field can be hashed_in_state without being ignore_remote_changes. The two are now declared independently. serialized_dashboard needs both, but for unrelated reasons: hashed because the blob is large, ignore_remote_changes because the server normalizes it (so its remote hash never matches the config hash). Dropped the test that wrongly required hashed_in_state to imply ignore_remote_changes. Co-authored-by: Isaac --- .../dashboards/out.plan_after_migrate.json | 2 +- .../detect-change/out.plan.direct.json | 2 +- .../dashboards/simple/out.plan.direct.json | 2 +- .../out.plan.direct.json | 2 +- bundle/direct/bundle_plan.go | 18 +++++++++--- bundle/direct/dresources/README.md | 11 ++++--- bundle/direct/dresources/dashboard_test.go | 10 ++++--- .../dresources/state_compaction_test.go | 29 ------------------- 8 files changed, 29 insertions(+), 47 deletions(-) diff --git a/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json b/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json index 130e63ac632..d28383d0bfb 100644 --- a/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json +++ b/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json @@ -32,7 +32,7 @@ "reason": "etag_based", "old": "sha256:[HASH]", "new": "sha256:[HASH]", - "remote": "{\"pages\":[{\"displayName\":\"Dashboard test bundle-deploy-dashboard\",\"name\":\"02724bf2\",\"pageType\":\"PAGE_TYPE_CANVAS\"}]}\n" + "remote": "sha256:[HASH]" } } } diff --git a/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json b/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json index e23e7e58268..525f981d100 100644 --- a/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json @@ -41,7 +41,7 @@ "reason": "etag_based", "old": "sha256:[HASH]", "new": "sha256:[HASH]", - "remote": "{}\n" + "remote": "sha256:[HASH]" } } } diff --git a/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json b/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json index 63a22eb7d08..5770db0e58f 100644 --- a/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json @@ -32,7 +32,7 @@ "reason": "etag_based", "old": "sha256:[HASH]", "new": "sha256:[HASH]", - "remote": "{}\n" + "remote": "sha256:[HASH]" } } } diff --git a/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json b/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json index ee588c887d1..3aeff915c1c 100644 --- a/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json @@ -48,7 +48,7 @@ "reason": "etag_based", "old": "sha256:[HASH]", "new": "sha256:[HASH]", - "remote": "{\"pages\":[{\"displayName\":\"Test Dashboard\",\"name\":\"test-page\",\"pageType\":\"PAGE_TYPE_CANVAS\"}]}" + "remote": "sha256:[HASH]" } } } diff --git a/bundle/direct/bundle_plan.go b/bundle/direct/bundle_plan.go index a2381bf07aa..a22429d90db 100644 --- a/bundle/direct/bundle_plan.go +++ b/bundle/direct/bundle_plan.go @@ -271,10 +271,20 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks return false } - // remoteStateComparable is intentionally NOT compacted. Hashing only ever - // helps the local diff (saved vs config); a hashed_in_state field is always - // ignore_remote_changes, so its remote diff is discarded regardless. Leaving - // the remote value uncompacted keeps the real server value visible in the plan. + // Compact the remapped remote on the same fields, so a hashed_in_state field + // is a hash on all three sides of the diff (saved, config, remote). Once the + // saved value is a hash, every comparison must be hash-vs-hash to be meaningful + // — including remote drift (Remote vs New). This is what keeps hashed_in_state + // orthogonal to ignore_remote_changes: remote drift is detected as hash != hash, + // so a field can be hashed without being ignore_remote_changes. serialized_dashboard + // is also ignore_remote_changes, but for an independent reason: the server + // normalizes it, so its remote hash never matches the config hash (see resources.yml). + remoteStateComparable, err = dresources.CompactState(adapter.ResourceConfig(), remoteStateComparable) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("%s: compacting remote state id=%q: %w", errorPrefix, dbentry.ID, err)) + return false + } + remoteDiff, err = structdiff.GetStructDiff(remoteStateComparable, localState, adapter.KeyedSlices()) if err != nil { logdiag.LogError(ctx, fmt.Errorf("%s: diffing remote state: %w", errorPrefix, err)) diff --git a/bundle/direct/dresources/README.md b/bundle/direct/dresources/README.md index 63b7fbc79a4..f99652673de 100644 --- a/bundle/direct/dresources/README.md +++ b/bundle/direct/dresources/README.md @@ -49,14 +49,13 @@ on field behaviour might result in recreate. See dstate/migrate.go on how to han ## hashed_in_state: storing large fields as content hashes -Declare a field under `hashed_in_state` in `resources.yml` when it holds large content that is only ever compared for equality and never read back from state. The engine then persists only a `sha256:` content hash for that field (via `CompactState`, applied both before saving state and to every value entering the diff), so stored and compared values share one form. The full contents stay in the plan's `new_state` and are sent to the API on every deploy, so the deploy is unaffected. +Declare a field under `hashed_in_state` in `resources.yml` when it holds large content that is only ever compared for equality and never read back from state. The engine then persists only a `sha256:` content hash for that field (via `CompactState`), and — crucially — hashes it on **every** value entering the diff: saved state, the local config, and the remapped remote. Once the saved value is a hash, all three sides must be hashes or the comparisons (`Old==New` for a local change, `Remote==New` for remote drift) would be hash-vs-content nonsense. The full contents stay only in the plan's `new_state` and are sent to the API on every deploy, so the deploy is unaffected. -A field qualifies only if **all** of the following hold: - - it is declared `ignore_remote_changes` (so it is never meaningfully compared against the remote value — typically `etag_based` drift detection), - - it is not read back from state by any code path (e.g. not consumed raw by `OverrideChangeDesc` or by state export), and - - it can be large (a small field gains nothing — the hash placeholder is ~70 bytes). +A field qualifies if it is large (a small field gains nothing — the hash placeholder is ~70 bytes) and is never read back from state by any code path (e.g. not consumed raw by `OverrideChangeDesc` or by state export). -`dashboards.serialized_dashboard` uses this: it inlines a file's contents into config, detects drift via `etag`, and always sends the full contents to the API from the plan's `new_state`. As a result the plan reports the field as a hash (`sha256:...`) rather than full content. No state version bump is needed: legacy full-content state is hashed on read for comparison and rewritten compactly on the next save. Add a test asserting the field is declared both `hashed_in_state` and `ignore_remote_changes` to guard the invariant. +**`hashed_in_state` is orthogonal to `ignore_remote_changes`.** Because the remote side is hashed too, remote drift on a hashed field is detected as `hash != hash` — so a field can be `hashed_in_state` *without* being `ignore_remote_changes` (as long as the server echoes the value back unchanged). The two are declared independently. + +`dashboards.serialized_dashboard` happens to need both, for unrelated reasons: it is `hashed_in_state` because the inlined dashboard JSON is large, and it is *separately* `ignore_remote_changes` because the **server normalizes** it (adds `pageType`, reorders keys) so its remote hash never equals the config hash — drift is detected via `etag` instead. The plan therefore reports the field as a hash on all three sides. No state version bump is needed: legacy full-content state is hashed on read for comparison and rewritten compactly on the next save. ## OverrideChangeDesc diff --git a/bundle/direct/dresources/dashboard_test.go b/bundle/direct/dresources/dashboard_test.go index 0a75096c800..17f93475d28 100644 --- a/bundle/direct/dresources/dashboard_test.go +++ b/bundle/direct/dresources/dashboard_test.go @@ -54,9 +54,11 @@ func TestDashboardCompactState(t *testing.T) { assert.Equal(t, compacted.SerializedDashboard, out2.(*DashboardState).SerializedDashboard) } -// TestDashboardSerializedDashboardStateRules guards the SHA-only invariant. The field -// must be declared hashed_in_state (so it is persisted as a hash) and, because the hash -// can never equal the full-content remote value, it must also be ignore_remote_changes. +// TestDashboardSerializedDashboardStateRules documents that serialized_dashboard carries +// two independent declarations: hashed_in_state (persist only its hash, since the blob is +// large) and ignore_remote_changes (the server normalizes the content, so its remote hash +// never matches the config hash — drift is detected via etag instead). The two are +// orthogonal in general; serialized_dashboard just happens to need both. func TestDashboardSerializedDashboardStateRules(t *testing.T) { cfg := GetResourceConfig("dashboards") path := structpath.NewStringKey(nil, "serialized_dashboard") @@ -71,5 +73,5 @@ func TestDashboardSerializedDashboardStateRules(t *testing.T) { } assert.True(t, hasRule(cfg.HashedInState), "serialized_dashboard must be declared hashed_in_state") - assert.True(t, hasRule(cfg.IgnoreRemoteChanges), "serialized_dashboard must be ignore_remote_changes for SHA-only state to be correct") + assert.True(t, hasRule(cfg.IgnoreRemoteChanges), "serialized_dashboard must be ignore_remote_changes (server normalizes the content)") } diff --git a/bundle/direct/dresources/state_compaction_test.go b/bundle/direct/dresources/state_compaction_test.go index d06529548a4..9fd6611cc8b 100644 --- a/bundle/direct/dresources/state_compaction_test.go +++ b/bundle/direct/dresources/state_compaction_test.go @@ -5,39 +5,10 @@ import ( "testing" "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/cli/libs/structs/structpath" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -// TestHashedInStateImpliesIgnoreRemoteChanges enforces the core invariant of the -// hashed_in_state mechanism: a field stored as a content hash can only ever detect a -// LOCAL change (saved-hash vs config-hash). Its hash never equals the full-content -// remote value, so its remote diff is meaningless and must be discarded — i.e. the -// field must also be ignore_remote_changes, otherwise every plan would report a -// permanent spurious update for it. This holds for every resource, not just dashboards. -func TestHashedInStateImpliesIgnoreRemoteChanges(t *testing.T) { - for _, cfg := range []*Config{MustLoadConfig(), MustLoadGeneratedConfig()} { - for resourceType, rc := range cfg.Resources { - for _, hashed := range rc.HashedInState { - path, err := structpath.ParsePath(hashed.Field.String()) - require.NoError(t, err, "%s: invalid hashed_in_state field %q", resourceType, hashed.Field) - - covered := false - for _, ignore := range rc.IgnoreRemoteChanges { - if path.HasPatternPrefix(ignore.Field) { - covered = true - break - } - } - assert.True(t, covered, - "%s: field %q is hashed_in_state but not ignore_remote_changes (a hashed field only detects local changes; its remote diff must be ignored)", - resourceType, hashed.Field) - } - } - } -} - func TestCompactStateNoDeclaredFields(t *testing.T) { state := &DashboardState{DashboardConfig: resources.DashboardConfig{SerializedDashboard: `{"a":1}`}} From 85af285ff8ab3a32c11ba1dc630f8b83099cdb11 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 22 Jun 2026 17:05:07 +0000 Subject: [PATCH 10/13] acceptance: cover dashboard hashed-state correctness (plan->apply roundtrip) Add dashboard-state-sha: a direct-only test (terraform does not hash state) that runs both in-memory (READPLAN="") and saved-plan (READPLAN=1, deploy --plan) deploys and asserts: - the create request carries the FULL serialized_dashboard (out.create.serialized.json, identical across READPLAN, so the saved plan applies the real content, not the hash); - the persisted state holds only the content hash (out.state.direct.txt); - the plan keeps full content in new_state but reports the diff as a hash; - a re-plan with no local change is a no-op (remote normalization ignored, etag_based). Local diff detection (edit -> update) is covered by change-serialized-dashboard. Co-authored-by: Isaac --- .../dashboard-state-sha/dashboard.lvdash.json | 1 + .../dashboard-state-sha/databricks.yml.tmpl | 11 +++++ .../out.create.serialized.json | 3 ++ .../out.plan_create.direct.json | 19 +++++++++ .../out.plan_skip.direct.json | 40 +++++++++++++++++++ .../dashboard-state-sha/out.state.direct.txt | 1 + .../dashboard-state-sha/out.test.toml | 5 +++ .../resources/dashboard-state-sha/output.txt | 17 ++++++++ .../resources/dashboard-state-sha/script | 38 ++++++++++++++++++ .../resources/dashboard-state-sha/test.toml | 22 ++++++++++ 10 files changed, 157 insertions(+) create mode 100644 acceptance/bundle/resources/dashboard-state-sha/dashboard.lvdash.json create mode 100644 acceptance/bundle/resources/dashboard-state-sha/databricks.yml.tmpl create mode 100644 acceptance/bundle/resources/dashboard-state-sha/out.create.serialized.json create mode 100644 acceptance/bundle/resources/dashboard-state-sha/out.plan_create.direct.json create mode 100644 acceptance/bundle/resources/dashboard-state-sha/out.plan_skip.direct.json create mode 100644 acceptance/bundle/resources/dashboard-state-sha/out.state.direct.txt create mode 100644 acceptance/bundle/resources/dashboard-state-sha/out.test.toml create mode 100644 acceptance/bundle/resources/dashboard-state-sha/output.txt create mode 100644 acceptance/bundle/resources/dashboard-state-sha/script create mode 100644 acceptance/bundle/resources/dashboard-state-sha/test.toml diff --git a/acceptance/bundle/resources/dashboard-state-sha/dashboard.lvdash.json b/acceptance/bundle/resources/dashboard-state-sha/dashboard.lvdash.json new file mode 100644 index 00000000000..e92b987863c --- /dev/null +++ b/acceptance/bundle/resources/dashboard-state-sha/dashboard.lvdash.json @@ -0,0 +1 @@ +{"pages":[{"name":"main","displayName":"Sales Overview"}]} \ No newline at end of file diff --git a/acceptance/bundle/resources/dashboard-state-sha/databricks.yml.tmpl b/acceptance/bundle/resources/dashboard-state-sha/databricks.yml.tmpl new file mode 100644 index 00000000000..a41576273c0 --- /dev/null +++ b/acceptance/bundle/resources/dashboard-state-sha/databricks.yml.tmpl @@ -0,0 +1,11 @@ +bundle: + name: dashboard-state-sha-$UNIQUE_NAME + +resources: + dashboards: + dashboard1: + display_name: $DASHBOARD_DISPLAY_NAME + warehouse_id: $TEST_DEFAULT_WAREHOUSE_ID + embed_credentials: true + file_path: "dashboard.lvdash.json" + parent_path: /Users/$CURRENT_USER_NAME diff --git a/acceptance/bundle/resources/dashboard-state-sha/out.create.serialized.json b/acceptance/bundle/resources/dashboard-state-sha/out.create.serialized.json new file mode 100644 index 00000000000..e3ad638d7d3 --- /dev/null +++ b/acceptance/bundle/resources/dashboard-state-sha/out.create.serialized.json @@ -0,0 +1,3 @@ +[ + "{\"pages\":[{\"name\":\"main\",\"displayName\":\"Sales Overview\"}]}" +] diff --git a/acceptance/bundle/resources/dashboard-state-sha/out.plan_create.direct.json b/acceptance/bundle/resources/dashboard-state-sha/out.plan_create.direct.json new file mode 100644 index 00000000000..cd992ae4c25 --- /dev/null +++ b/acceptance/bundle/resources/dashboard-state-sha/out.plan_create.direct.json @@ -0,0 +1,19 @@ +{ + "plan_version": 2, + "cli_version": "[DEV_VERSION]", + "plan": { + "resources.dashboards.dashboard1": { + "action": "create", + "new_state": { + "value": { + "display_name": "dashboard-state-sha [UUID]", + "embed_credentials": true, + "parent_path": "/Workspace/Users/[USERNAME]", + "published": true, + "serialized_dashboard": "{\"pages\":[{\"name\":\"main\",\"displayName\":\"Sales Overview\"}]}", + "warehouse_id": "[TEST_DEFAULT_WAREHOUSE_ID]" + } + } + } + } +} diff --git a/acceptance/bundle/resources/dashboard-state-sha/out.plan_skip.direct.json b/acceptance/bundle/resources/dashboard-state-sha/out.plan_skip.direct.json new file mode 100644 index 00000000000..87b3dd06841 --- /dev/null +++ b/acceptance/bundle/resources/dashboard-state-sha/out.plan_skip.direct.json @@ -0,0 +1,40 @@ +{ + "plan_version": 2, + "cli_version": "[DEV_VERSION]", + "lineage": "[UUID]", + "serial": 1, + "plan": { + "resources.dashboards.dashboard1": { + "action": "skip", + "remote_state": { + "create_time": "[TIMESTAMP]", + "dashboard_id": "[DASHBOARD_ID]", + "display_name": "dashboard-state-sha [UUID]", + "embed_credentials": true, + "etag": [ETAG], + "lifecycle_state": "ACTIVE", + "parent_path": "/Workspace/Users/[USERNAME]", + "path": "/Users/[USERNAME]/dashboard-state-sha [UUID].lvdash.json", + "published": true, + "serialized_dashboard": "{\"pages\":[{\"displayName\":\"Sales Overview\",\"name\":\"main\",\"pageType\":\"PAGE_TYPE_CANVAS\"}]}\n", + "update_time": "[TIMESTAMP]", + "warehouse_id": "[TEST_DEFAULT_WAREHOUSE_ID]" + }, + "changes": { + "etag": { + "action": "skip", + "reason": "custom", + "old": [ETAG], + "remote": [ETAG] + }, + "serialized_dashboard": { + "action": "skip", + "reason": "etag_based", + "old": "sha256:[HASH]", + "new": "sha256:[HASH]", + "remote": "sha256:[HASH]" + } + } + } + } +} diff --git a/acceptance/bundle/resources/dashboard-state-sha/out.state.direct.txt b/acceptance/bundle/resources/dashboard-state-sha/out.state.direct.txt new file mode 100644 index 00000000000..c04e637a0b6 --- /dev/null +++ b/acceptance/bundle/resources/dashboard-state-sha/out.state.direct.txt @@ -0,0 +1 @@ +json.state.resources.dashboards.dashboard1.state.serialized_dashboard = "sha256:[HASH]"; diff --git a/acceptance/bundle/resources/dashboard-state-sha/out.test.toml b/acceptance/bundle/resources/dashboard-state-sha/out.test.toml new file mode 100644 index 00000000000..2a4f200653a --- /dev/null +++ b/acceptance/bundle/resources/dashboard-state-sha/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = true +RequiresWarehouse = true +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] +EnvMatrix.READPLAN = ["", "1"] diff --git a/acceptance/bundle/resources/dashboard-state-sha/output.txt b/acceptance/bundle/resources/dashboard-state-sha/output.txt new file mode 100644 index 00000000000..925d48c5e99 --- /dev/null +++ b/acceptance/bundle/resources/dashboard-state-sha/output.txt @@ -0,0 +1,17 @@ + +>>> [CLI] bundle plan -o json +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/dashboard-state-sha-[UNIQUE_NAME]/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] bundle plan -o json + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.dashboards.dashboard1 + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/dashboard-state-sha-[UNIQUE_NAME]/default + +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/resources/dashboard-state-sha/script b/acceptance/bundle/resources/dashboard-state-sha/script new file mode 100644 index 00000000000..1a43a4f592b --- /dev/null +++ b/acceptance/bundle/resources/dashboard-state-sha/script @@ -0,0 +1,38 @@ +DASHBOARD_DISPLAY_NAME="dashboard-state-sha $(uuid)" +if [ -z "$CLOUD_ENV" ]; then + export TEST_DEFAULT_WAREHOUSE_ID="warehouse-1234" + echo "warehouse-1234:TEST_DEFAULT_WAREHOUSE_ID" >> ACC_REPLS +fi +export DASHBOARD_DISPLAY_NAME +envsubst < databricks.yml.tmpl > databricks.yml + +cleanup() { + trace $CLI bundle destroy --auto-approve + rm -f out.requests.txt +} +trap cleanup EXIT +rm -f out.requests.txt + +# The direct-engine plan keeps the FULL serialized_dashboard in new_state (so it can be +# deployed), and reports the diff against state as a content hash. +trace $CLI bundle plan -o json > out.plan_create.$DATABRICKS_BUNDLE_ENGINE.json + +# Deploy. With READPLAN=1 this applies the SAVED plan file instead of re-planning. The plan +# and the persisted state keep only the hash, but the create API call must still send the +# FULL serialized_dashboard. out.create.serialized.json is identical for READPLAN="" and +# READPLAN=1, which proves the saved plan applies the real content rather than the hash. +# Not traced: the deploy command line differs by READPLAN (--plan vs none). +$CLI bundle deploy $(readplanarg out.plan_create.direct.json) + +DASHBOARD_ID=$($CLI bundle summary --output json | jq -r '.resources.dashboards.dashboard1.id') +echo "$DASHBOARD_ID:DASHBOARD_ID" >> ACC_REPLS + +jq -s '[.[] | select(.method == "POST" and (.path | endswith("/api/2.0/lakeview/dashboards")) and .body.serialized_dashboard != null) | .body.serialized_dashboard]' out.requests.txt > out.create.serialized.json +rm -f out.requests.txt + +# Persisted state holds ONLY the content hash, never the dashboard JSON. +print_state.py | gron.py | grep serialized_dashboard > out.state.$DATABRICKS_BUNDLE_ENGINE.txt + +# Re-plan with no local change: the server normalizes serialized_dashboard, but it is +# ignore_remote_changes (etag_based), so the resource is unchanged (no spurious update). +trace $CLI bundle plan -o json > out.plan_skip.$DATABRICKS_BUNDLE_ENGINE.json diff --git a/acceptance/bundle/resources/dashboard-state-sha/test.toml b/acceptance/bundle/resources/dashboard-state-sha/test.toml new file mode 100644 index 00000000000..3f7a4f7a0a6 --- /dev/null +++ b/acceptance/bundle/resources/dashboard-state-sha/test.toml @@ -0,0 +1,22 @@ +# Direct-engine hashed-state behaviour for dashboards. Kept outside dashboards/ so it can run +# direct-only: terraform does not hash state, and the saved-plan path (deploy --plan, i.e. +# READPLAN=1) is direct-only. RecordRequests is inherited from resources/test.toml. +Local = true +Cloud = true +RequiresWarehouse = true + +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] +EnvMatrix.READPLAN = ["", "1"] + +[Env] +# Prevent MSYS2 from rewriting /Users/... paths on Windows. +MSYS_NO_PATHCONV = "1" + +# Etag can be both negative and positive. +[[Repls]] +Old = "\"[-0-9]{8,}\"" +New = "[ETAG]" + +[[Repls]] +Old = "\"[0-9]{8,}\"" +New = "[ETAG]" From e2cc859ec48ff1c60cacbbb2c5b5fede0472ea5b Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 24 Jun 2026 11:14:36 +0000 Subject: [PATCH 11/13] direct: add libs/hash and simplify hashed_in_state to field names Extract the "json.Marshal + sha256 -> hex" primitive into a new libs/hash package and route both consumers through it: the dashboard state-compaction hash and libs/cache's fingerprintToHash, which were each doing the same thing independently. hashed_in_state in resources.yml is now a plain list of field names ([]string) rather than {field, reason} rules: it only ever needs the path, so the reason field was dead weight. Co-authored-by: Isaac --- bundle/direct/dresources/config.go | 9 +++-- bundle/direct/dresources/dashboard_test.go | 16 ++++----- bundle/direct/dresources/resources.yml | 8 ++--- bundle/direct/dresources/state_compaction.go | 14 +++----- libs/cache/fingerprint.go | 18 ++-------- libs/hash/hash.go | 21 +++++++++++ libs/hash/hash_test.go | 38 ++++++++++++++++++++ 7 files changed, 80 insertions(+), 44 deletions(-) create mode 100644 libs/hash/hash.go create mode 100644 libs/hash/hash_test.go diff --git a/bundle/direct/dresources/config.go b/bundle/direct/dresources/config.go index 6976c0a1696..48ea2a4d388 100644 --- a/bundle/direct/dresources/config.go +++ b/bundle/direct/dresources/config.go @@ -71,11 +71,10 @@ type ResourceLifecycleConfig struct { // When old and new are nil but remote is set, and the remote value matches allowed values (if specified), the change is skipped. BackendDefaults []BackendDefaultRule `yaml:"backend_defaults,omitempty"` - // HashedInState: fields persisted to state as a content hash ("sha256:") - // instead of their full contents. Only valid for large, equality-only fields - // that are never read back from state (e.g. dashboards' serialized_dashboard, - // which is ignore_remote_changes and re-sent from config on every deploy). - HashedInState []FieldRule `yaml:"hashed_in_state,omitempty"` + // HashedInState: field paths persisted to state as a content hash ("sha256:") + // instead of their full contents. Only valid for large, equality-only fields that + // are never read back from state (e.g. dashboards' serialized_dashboard). + HashedInState []string `yaml:"hashed_in_state,omitempty"` } // Config is the root configuration structure for resource lifecycle behavior. diff --git a/bundle/direct/dresources/dashboard_test.go b/bundle/direct/dresources/dashboard_test.go index 17f93475d28..49895cea916 100644 --- a/bundle/direct/dresources/dashboard_test.go +++ b/bundle/direct/dresources/dashboard_test.go @@ -2,6 +2,7 @@ package dresources import ( "encoding/json" + "slices" "strings" "testing" @@ -63,15 +64,14 @@ func TestDashboardSerializedDashboardStateRules(t *testing.T) { cfg := GetResourceConfig("dashboards") path := structpath.NewStringKey(nil, "serialized_dashboard") - hasRule := func(rules []FieldRule) bool { - for _, rule := range rules { - if path.HasPatternPrefix(rule.Field) { - return true - } + ignoresRemote := false + for _, rule := range cfg.IgnoreRemoteChanges { + if path.HasPatternPrefix(rule.Field) { + ignoresRemote = true + break } - return false } - assert.True(t, hasRule(cfg.HashedInState), "serialized_dashboard must be declared hashed_in_state") - assert.True(t, hasRule(cfg.IgnoreRemoteChanges), "serialized_dashboard must be ignore_remote_changes (server normalizes the content)") + assert.True(t, slices.Contains(cfg.HashedInState, "serialized_dashboard"), "serialized_dashboard must be declared hashed_in_state") + assert.True(t, ignoresRemote, "serialized_dashboard must be ignore_remote_changes (server normalizes the content)") } diff --git a/bundle/direct/dresources/resources.yml b/bundle/direct/dresources/resources.yml index 65965472662..f2f3c8d7b55 100644 --- a/bundle/direct/dresources/resources.yml +++ b/bundle/direct/dresources/resources.yml @@ -382,12 +382,10 @@ resources: - field: dataset_schema reason: input_only + # serialized_dashboard holds the inlined dashboard JSON (often megabytes); persist only + # its content hash. See "hashed_in_state" in README.md. hashed_in_state: - # serialized_dashboard holds the inlined dashboard JSON (often megabytes). It is - # ignore_remote_changes (etag_based) and re-sent from config on every deploy, so it - # is never read back from state; persist only its content hash to keep state small. - - field: serialized_dashboard - reason: large_content + - serialized_dashboard genie_spaces: ignore_remote_changes: diff --git a/bundle/direct/dresources/state_compaction.go b/bundle/direct/dresources/state_compaction.go index 419e6ee1f47..7903d433ec6 100644 --- a/bundle/direct/dresources/state_compaction.go +++ b/bundle/direct/dresources/state_compaction.go @@ -1,14 +1,12 @@ package dresources import ( - "crypto/sha256" - "encoding/hex" - "encoding/json" "errors" "fmt" "reflect" "strings" + "github.com/databricks/cli/libs/hash" "github.com/databricks/cli/libs/structs/structaccess" ) @@ -34,14 +32,11 @@ func hashStateValue(v any) (any, error) { return v, nil } - // json.Marshal is deterministic (map keys are sorted), so equal content always - // produces an equal hash across runs and platforms. - data, err := json.Marshal(v) + h, err := hash.JSON(v) if err != nil { return nil, err } - sum := sha256.Sum256(data) - return stateHashPrefix + hex.EncodeToString(sum[:]), nil + return stateHashPrefix + h, nil } // CompactState returns a copy of state with every field declared in cfg.HashedInState @@ -66,8 +61,7 @@ func CompactState(cfg *ResourceLifecycleConfig, state any) (any, error) { out.Elem().Set(rv.Elem()) compacted := out.Interface() - for _, rule := range cfg.HashedInState { - field := rule.Field.String() + for _, field := range cfg.HashedInState { current, err := structaccess.GetByString(compacted, field) if err != nil { if _, ok := errors.AsType[*structaccess.NotFoundError](err); ok { diff --git a/libs/cache/fingerprint.go b/libs/cache/fingerprint.go index 8f866405122..90a1cbd71fb 100644 --- a/libs/cache/fingerprint.go +++ b/libs/cache/fingerprint.go @@ -1,22 +1,8 @@ package cache -import ( - "crypto/sha256" - "encoding/hex" - "encoding/json" - "fmt" -) +import "github.com/databricks/cli/libs/hash" // fingerprintToHash converts any struct to a deterministic string representation for use as a cache key. func fingerprintToHash(fingerprint any) (string, error) { - // Marshal map - json.Marshal sorts map keys alphabetically - data, err := json.Marshal(fingerprint) - if err != nil { - return "", fmt.Errorf("failed to marshal normalized fingerprint: %w", err) - } - - // Hash for consistent, reasonably-sized key. - // hash[:] converts the [32]byte array returned by Sum256 to a []byte slice. - hash := sha256.Sum256(data) - return hex.EncodeToString(hash[:]), nil + return hash.JSON(fingerprint) } diff --git a/libs/hash/hash.go b/libs/hash/hash.go new file mode 100644 index 00000000000..4b6b0d6a48e --- /dev/null +++ b/libs/hash/hash.go @@ -0,0 +1,21 @@ +// Package hash provides deterministic content-hashing helpers shared across the CLI. +package hash + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" +) + +// JSON returns the hex-encoded SHA-256 of v's JSON encoding. json.Marshal sorts +// map keys, so equal content yields an equal hash across runs and platforms, +// which makes the result safe to use as a cache key or a state fingerprint. +func JSON(v any) (string, error) { + data, err := json.Marshal(v) + if err != nil { + return "", fmt.Errorf("hashing value: %w", err) + } + sum := sha256.Sum256(data) + return hex.EncodeToString(sum[:]), nil +} diff --git a/libs/hash/hash_test.go b/libs/hash/hash_test.go new file mode 100644 index 00000000000..1d6da96c7f3 --- /dev/null +++ b/libs/hash/hash_test.go @@ -0,0 +1,38 @@ +package hash + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestJSONStable(t *testing.T) { + // Equal content (including maps built in a different key order) must hash equal. + a, err := JSON(map[string]int{"a": 1, "b": 2}) + require.NoError(t, err) + b, err := JSON(map[string]int{"b": 2, "a": 1}) + require.NoError(t, err) + assert.Equal(t, a, b) +} + +func TestJSONDistinct(t *testing.T) { + a, err := JSON("hello") + require.NoError(t, err) + b, err := JSON("world") + require.NoError(t, err) + assert.NotEqual(t, a, b) +} + +func TestJSONKnownVector(t *testing.T) { + // sha256 of the JSON encoding of the string "hello", i.e. of the 7 bytes `"hello"`. + h, err := JSON("hello") + require.NoError(t, err) + assert.Equal(t, "5aa762ae383fbb727af3c7a36d4940a5b8c40a989452d2304fc958ff3f354e7a", h) +} + +func TestJSONUnmarshalableErrors(t *testing.T) { + // Channels cannot be JSON-encoded, so JSON must surface the marshal error. + _, err := JSON(make(chan int)) + assert.Error(t, err) +} From bc443768e84bdff56c153304c9cb6aebbd8a8501 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 24 Jun 2026 11:14:37 +0000 Subject: [PATCH 12/13] acceptance: cover the --plan update cycle for dashboard hashed state Add an edit-and-redeploy cycle to dashboard-state-sha: re-render the dashboard, plan, and deploy (both in-memory and via a saved --plan file). This proves the local diff detects the change as a hash, the persisted state keeps only the new hash, and the update API call still carries the full new serialized_dashboard - identical across READPLAN variants, so the saved plan applies real content rather than the hash. Co-authored-by: Isaac --- .../dashboard-state-sha/dashboard.lvdash.json | 1 - .../dashboard-state-sha/dashboard.tmpl | 1 + .../out.create.serialized.json | 2 +- .../out.plan_create.direct.json | 2 +- .../out.plan_update.direct.json | 49 +++++++++++++++++++ .../out.state_after_update.direct.txt | 1 + .../out.update.serialized.json | 3 ++ .../resources/dashboard-state-sha/output.txt | 9 ++++ .../resources/dashboard-state-sha/script | 26 ++++++++-- .../resources/dashboard-state-sha/test.toml | 3 ++ 10 files changed, 90 insertions(+), 7 deletions(-) delete mode 100644 acceptance/bundle/resources/dashboard-state-sha/dashboard.lvdash.json create mode 100644 acceptance/bundle/resources/dashboard-state-sha/dashboard.tmpl create mode 100644 acceptance/bundle/resources/dashboard-state-sha/out.plan_update.direct.json create mode 100644 acceptance/bundle/resources/dashboard-state-sha/out.state_after_update.direct.txt create mode 100644 acceptance/bundle/resources/dashboard-state-sha/out.update.serialized.json diff --git a/acceptance/bundle/resources/dashboard-state-sha/dashboard.lvdash.json b/acceptance/bundle/resources/dashboard-state-sha/dashboard.lvdash.json deleted file mode 100644 index e92b987863c..00000000000 --- a/acceptance/bundle/resources/dashboard-state-sha/dashboard.lvdash.json +++ /dev/null @@ -1 +0,0 @@ -{"pages":[{"name":"main","displayName":"Sales Overview"}]} \ No newline at end of file diff --git a/acceptance/bundle/resources/dashboard-state-sha/dashboard.tmpl b/acceptance/bundle/resources/dashboard-state-sha/dashboard.tmpl new file mode 100644 index 00000000000..bf5c695c6d2 --- /dev/null +++ b/acceptance/bundle/resources/dashboard-state-sha/dashboard.tmpl @@ -0,0 +1 @@ +{"pages":[{"name":"main","displayName":"$DASH_TITLE"}]} diff --git a/acceptance/bundle/resources/dashboard-state-sha/out.create.serialized.json b/acceptance/bundle/resources/dashboard-state-sha/out.create.serialized.json index e3ad638d7d3..c21b142583a 100644 --- a/acceptance/bundle/resources/dashboard-state-sha/out.create.serialized.json +++ b/acceptance/bundle/resources/dashboard-state-sha/out.create.serialized.json @@ -1,3 +1,3 @@ [ - "{\"pages\":[{\"name\":\"main\",\"displayName\":\"Sales Overview\"}]}" + "{\"pages\":[{\"name\":\"main\",\"displayName\":\"Sales Overview\"}]}\n" ] diff --git a/acceptance/bundle/resources/dashboard-state-sha/out.plan_create.direct.json b/acceptance/bundle/resources/dashboard-state-sha/out.plan_create.direct.json index cd992ae4c25..8b6597ef8bb 100644 --- a/acceptance/bundle/resources/dashboard-state-sha/out.plan_create.direct.json +++ b/acceptance/bundle/resources/dashboard-state-sha/out.plan_create.direct.json @@ -10,7 +10,7 @@ "embed_credentials": true, "parent_path": "/Workspace/Users/[USERNAME]", "published": true, - "serialized_dashboard": "{\"pages\":[{\"name\":\"main\",\"displayName\":\"Sales Overview\"}]}", + "serialized_dashboard": "{\"pages\":[{\"name\":\"main\",\"displayName\":\"Sales Overview\"}]}\n", "warehouse_id": "[TEST_DEFAULT_WAREHOUSE_ID]" } } diff --git a/acceptance/bundle/resources/dashboard-state-sha/out.plan_update.direct.json b/acceptance/bundle/resources/dashboard-state-sha/out.plan_update.direct.json new file mode 100644 index 00000000000..2255cfe0224 --- /dev/null +++ b/acceptance/bundle/resources/dashboard-state-sha/out.plan_update.direct.json @@ -0,0 +1,49 @@ +{ + "plan_version": 2, + "cli_version": "[DEV_VERSION]", + "lineage": "[UUID]", + "serial": 1, + "plan": { + "resources.dashboards.dashboard1": { + "action": "update", + "new_state": { + "value": { + "display_name": "dashboard-state-sha [UUID]", + "embed_credentials": true, + "parent_path": "/Workspace/Users/[USERNAME]", + "published": true, + "serialized_dashboard": "{\"pages\":[{\"name\":\"main\",\"displayName\":\"Sales Overview v2\"}]}\n", + "warehouse_id": "[TEST_DEFAULT_WAREHOUSE_ID]" + } + }, + "remote_state": { + "create_time": "[TIMESTAMP]", + "dashboard_id": "[DASHBOARD_ID]", + "display_name": "dashboard-state-sha [UUID]", + "embed_credentials": true, + "etag": [ETAG], + "lifecycle_state": "ACTIVE", + "parent_path": "/Workspace/Users/[USERNAME]", + "path": "/Users/[USERNAME]/dashboard-state-sha [UUID].lvdash.json", + "published": true, + "serialized_dashboard": "{\"pages\":[{\"displayName\":\"Sales Overview\",\"name\":\"main\",\"pageType\":\"PAGE_TYPE_CANVAS\"}]}\n", + "update_time": "[TIMESTAMP]", + "warehouse_id": "[TEST_DEFAULT_WAREHOUSE_ID]" + }, + "changes": { + "etag": { + "action": "skip", + "reason": "custom", + "old": [ETAG], + "remote": [ETAG] + }, + "serialized_dashboard": { + "action": "update", + "old": "sha256:[HASH]", + "new": "sha256:[HASH]", + "remote": "sha256:[HASH]" + } + } + } + } +} diff --git a/acceptance/bundle/resources/dashboard-state-sha/out.state_after_update.direct.txt b/acceptance/bundle/resources/dashboard-state-sha/out.state_after_update.direct.txt new file mode 100644 index 00000000000..c04e637a0b6 --- /dev/null +++ b/acceptance/bundle/resources/dashboard-state-sha/out.state_after_update.direct.txt @@ -0,0 +1 @@ +json.state.resources.dashboards.dashboard1.state.serialized_dashboard = "sha256:[HASH]"; diff --git a/acceptance/bundle/resources/dashboard-state-sha/out.update.serialized.json b/acceptance/bundle/resources/dashboard-state-sha/out.update.serialized.json new file mode 100644 index 00000000000..5c8339e26b5 --- /dev/null +++ b/acceptance/bundle/resources/dashboard-state-sha/out.update.serialized.json @@ -0,0 +1,3 @@ +[ + "{\"pages\":[{\"name\":\"main\",\"displayName\":\"Sales Overview v2\"}]}\n" +] diff --git a/acceptance/bundle/resources/dashboard-state-sha/output.txt b/acceptance/bundle/resources/dashboard-state-sha/output.txt index 925d48c5e99..bd79441ffe3 100644 --- a/acceptance/bundle/resources/dashboard-state-sha/output.txt +++ b/acceptance/bundle/resources/dashboard-state-sha/output.txt @@ -1,12 +1,21 @@ +=== Create: plan keeps full content in new_state but reports the diff as a hash >>> [CLI] bundle plan -o json Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/dashboard-state-sha-[UNIQUE_NAME]/default/files... Deploying resources... Updating deployment state... Deployment complete! +=== Re-plan with no local change is a no-op (server normalization is ignored, etag_based) >>> [CLI] bundle plan -o json +=== Edit serialized_dashboard: the hash changes, so the local diff detects an update +>>> [CLI] bundle plan -o json +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/dashboard-state-sha-[UNIQUE_NAME]/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + >>> [CLI] bundle destroy --auto-approve The following resources will be deleted: delete resources.dashboards.dashboard1 diff --git a/acceptance/bundle/resources/dashboard-state-sha/script b/acceptance/bundle/resources/dashboard-state-sha/script index 1a43a4f592b..39fb090585e 100644 --- a/acceptance/bundle/resources/dashboard-state-sha/script +++ b/acceptance/bundle/resources/dashboard-state-sha/script @@ -6,6 +6,10 @@ fi export DASHBOARD_DISPLAY_NAME envsubst < databricks.yml.tmpl > databricks.yml +render_dashboard() { + DASH_TITLE="$1" envsubst < dashboard.tmpl > dashboard.lvdash.json +} + cleanup() { trace $CLI bundle destroy --auto-approve rm -f out.requests.txt @@ -13,8 +17,8 @@ cleanup() { trap cleanup EXIT rm -f out.requests.txt -# The direct-engine plan keeps the FULL serialized_dashboard in new_state (so it can be -# deployed), and reports the diff against state as a content hash. +title "Create: plan keeps full content in new_state but reports the diff as a hash" +render_dashboard "Sales Overview" trace $CLI bundle plan -o json > out.plan_create.$DATABRICKS_BUNDLE_ENGINE.json # Deploy. With READPLAN=1 this applies the SAVED plan file instead of re-planning. The plan @@ -33,6 +37,20 @@ rm -f out.requests.txt # Persisted state holds ONLY the content hash, never the dashboard JSON. print_state.py | gron.py | grep serialized_dashboard > out.state.$DATABRICKS_BUNDLE_ENGINE.txt -# Re-plan with no local change: the server normalizes serialized_dashboard, but it is -# ignore_remote_changes (etag_based), so the resource is unchanged (no spurious update). +title "Re-plan with no local change is a no-op (server normalization is ignored, etag_based)" trace $CLI bundle plan -o json > out.plan_skip.$DATABRICKS_BUNDLE_ENGINE.json + +title "Edit serialized_dashboard: the hash changes, so the local diff detects an update" +render_dashboard "Sales Overview v2" +trace $CLI bundle plan -o json > out.plan_update.$DATABRICKS_BUNDLE_ENGINE.json + +# Apply the update (in-memory or from the saved plan). The update API call must carry the +# FULL new content; out.update.serialized.json is identical across READPLAN, proving the +# saved plan applies the real content for updates too. +$CLI bundle deploy $(readplanarg out.plan_update.direct.json) + +jq -s '[.[] | select(.method == "PATCH" and (.path | contains("/api/2.0/lakeview/dashboards")) and .body.serialized_dashboard != null) | .body.serialized_dashboard]' out.requests.txt > out.update.serialized.json +rm -f out.requests.txt + +# State holds the NEW content hash after the update. +print_state.py | gron.py | grep serialized_dashboard > out.state_after_update.$DATABRICKS_BUNDLE_ENGINE.txt diff --git a/acceptance/bundle/resources/dashboard-state-sha/test.toml b/acceptance/bundle/resources/dashboard-state-sha/test.toml index 3f7a4f7a0a6..ecdc1a0a77a 100644 --- a/acceptance/bundle/resources/dashboard-state-sha/test.toml +++ b/acceptance/bundle/resources/dashboard-state-sha/test.toml @@ -5,6 +5,9 @@ Local = true Cloud = true RequiresWarehouse = true +# dashboard.lvdash.json is rendered from dashboard.tmpl in the script (re-rendered for the edit). +Ignore = ["dashboard.lvdash.json"] + EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] EnvMatrix.READPLAN = ["", "1"] From 58037af29721a246a5b8916dc9cbb67b8cbb79e8 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 24 Jun 2026 11:17:36 +0000 Subject: [PATCH 13/13] direct: pin the legacy full-content state migration invariant Assert that a pre-existing full-content state and a matching config compact to the same hash, so re-planning against hashed-on-read legacy state shows no spurious diff. Co-authored-by: Isaac --- .../dresources/state_compaction_test.go | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/bundle/direct/dresources/state_compaction_test.go b/bundle/direct/dresources/state_compaction_test.go index 9fd6611cc8b..598f7bd0113 100644 --- a/bundle/direct/dresources/state_compaction_test.go +++ b/bundle/direct/dresources/state_compaction_test.go @@ -23,6 +23,26 @@ func TestCompactStateNoDeclaredFields(t *testing.T) { assert.Same(t, state, out.(*DashboardState)) } +func TestCompactStateMigratesLegacyFullContent(t *testing.T) { + // A pre-existing state still holds the full serialized_dashboard; the matching config + // holds identical content. Compaction must map both to the same hash, so a diff computed + // after the legacy state is hashed-on-read shows no spurious change (and the next save + // rewrites the state compactly). + content := `{"pages":[{"name":"p1"}]}` + legacy := &DashboardState{DashboardConfig: resources.DashboardConfig{SerializedDashboard: content}} + config := &DashboardState{DashboardConfig: resources.DashboardConfig{SerializedDashboard: content}} + + cfg := GetResourceConfig("dashboards") + compactedLegacy, err := CompactState(cfg, legacy) + require.NoError(t, err) + compactedConfig, err := CompactState(cfg, config) + require.NoError(t, err) + + legacyHash := compactedLegacy.(*DashboardState).SerializedDashboard + assert.Equal(t, compactedConfig.(*DashboardState).SerializedDashboard, legacyHash) + assert.True(t, strings.HasPrefix(legacyHash.(string), stateHashPrefix)) +} + func TestHashStateValue(t *testing.T) { stringHash, err := hashStateValue("hello") require.NoError(t, err)