From b685ec2042ff64ebe127eea0e44c7a066b0d10d9 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Fri, 15 May 2026 11:55:38 +0200 Subject: [PATCH 01/31] Immutable folder support in DABs --- .../immutable_workspace_paths/databricks.yml | 21 ++ .../immutable_workspace_paths/out.test.toml | 3 + .../immutable_workspace_paths/output.txt | 30 +++ .../validate/immutable_workspace_paths/script | 1 + .../immutable_workspace_paths/src/main.py | 1 + .../immutable_workspace_paths/test.toml | 3 + bundle/config/bundle.go | 7 + .../mutator/resolve_variable_references.go | 24 ++ .../resolve_variable_references_test.go | 45 ++++ .../process_static_resources.go | 15 +- .../resourcemutator/resource_mutator.go | 19 +- bundle/config/mutator/translate_paths.go | 17 +- bundle/config/workspace.go | 6 + bundle/deploy/metadata/compute.go | 1 + bundle/deploy/metadata/load.go | 57 +++++ bundle/deploy/snapshot/path.go | 227 ++++++++++++++++++ bundle/deploy/snapshot/path_test.go | 136 +++++++++++ bundle/deploy/snapshot/upload.go | 69 ++++++ bundle/internal/schema/annotations.yml | 3 + bundle/metadata/metadata.go | 3 + bundle/phases/build.go | 8 + bundle/phases/deploy.go | 31 ++- bundle/phases/destroy.go | 15 +- libs/filer/snapshot_client.go | 103 ++++++++ 24 files changed, 835 insertions(+), 10 deletions(-) create mode 100644 acceptance/bundle/validate/immutable_workspace_paths/databricks.yml create mode 100644 acceptance/bundle/validate/immutable_workspace_paths/out.test.toml create mode 100644 acceptance/bundle/validate/immutable_workspace_paths/output.txt create mode 100644 acceptance/bundle/validate/immutable_workspace_paths/script create mode 100644 acceptance/bundle/validate/immutable_workspace_paths/src/main.py create mode 100644 acceptance/bundle/validate/immutable_workspace_paths/test.toml create mode 100644 bundle/deploy/metadata/load.go create mode 100644 bundle/deploy/snapshot/path.go create mode 100644 bundle/deploy/snapshot/path_test.go create mode 100644 bundle/deploy/snapshot/upload.go create mode 100644 libs/filer/snapshot_client.go diff --git a/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml b/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml new file mode 100644 index 00000000000..39c25fb365f --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml @@ -0,0 +1,21 @@ +bundle: + name: my-bundle + immutable: true + +sync: + exclude: + # Test framework files that are not part of the bundle source. + - "repls.json" + - "user_repls.json" + - "script" + - "*.toml" + +resources: + jobs: + my_job: + name: my job + tasks: + - task_key: my_task + existing_cluster_id: "0101-120000-aaaaaaaa" + spark_python_task: + python_file: ./src/main.py diff --git a/acceptance/bundle/validate/immutable_workspace_paths/out.test.toml b/acceptance/bundle/validate/immutable_workspace_paths/out.test.toml new file mode 100644 index 00000000000..f784a183258 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/validate/immutable_workspace_paths/output.txt b/acceptance/bundle/validate/immutable_workspace_paths/output.txt new file mode 100644 index 00000000000..f6a8004bf2f --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/output.txt @@ -0,0 +1,30 @@ + +>>> [CLI] bundle validate -o json +Warning: Pattern user_repls.json does not match any files + at sync.exclude[1] + in databricks.yml:9:7 + +{ + "workspace": { + "artifact_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/artifacts", + "current_user": { + "domain_friendly_name": "[USERNAME]", + "id": "[USERID]", + "short_name": "[USERNAME]", + "userName": "[USERNAME]" + }, + "file_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/files", + "resource_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/resources", + "root_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default", + "state_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/state" + }, + "tasks": [ + { + "existing_cluster_id": "0101-120000-aaaaaaaa", + "spark_python_task": { + "python_file": "${workspace.snapshot_path}/src/files/src/main.py" + }, + "task_key": "my_task" + } + ] +} diff --git a/acceptance/bundle/validate/immutable_workspace_paths/script b/acceptance/bundle/validate/immutable_workspace_paths/script new file mode 100644 index 00000000000..df056fa9b99 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/script @@ -0,0 +1 @@ +trace $CLI bundle validate -o json | jq '{workspace: .workspace, tasks: .resources.jobs.my_job.tasks}' diff --git a/acceptance/bundle/validate/immutable_workspace_paths/src/main.py b/acceptance/bundle/validate/immutable_workspace_paths/src/main.py new file mode 100644 index 00000000000..11b15b1a458 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/src/main.py @@ -0,0 +1 @@ +print("hello") diff --git a/acceptance/bundle/validate/immutable_workspace_paths/test.toml b/acceptance/bundle/validate/immutable_workspace_paths/test.toml new file mode 100644 index 00000000000..85e02532c93 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +Ignore = [".databricks"] diff --git a/bundle/config/bundle.go b/bundle/config/bundle.go index ce6d25bfe62..79f92b6b435 100644 --- a/bundle/config/bundle.go +++ b/bundle/config/bundle.go @@ -59,4 +59,11 @@ type Bundle struct { // A stable generated UUID for the bundle. This is normally serialized by // Databricks first party template when a user runs bundle init. Uuid string `json:"uuid,omitempty"` + + // Immutable specifies that bundle files and artifacts are uploaded as a single + // immutable snapshot rather than being synced individually. When true, the + // deployment calls /api/2.0/repos/snapshots with a zip containing all files + // and sets workspace.file_path and workspace.artifact_path to the returned + // content-addressed path. validate and plan make no mutative API calls. + Immutable bool `json:"immutable,omitempty"` } diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 113f0576394..70a330c8a3c 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -59,6 +59,11 @@ type resolveVariableReferences struct { includeResources bool artifactsReferenceUsed bool + + // excludePaths lists variable reference paths (e.g. "workspace.file_path") whose + // resolution should be skipped. References to these paths remain unresolved so a + // later mutator can set the value and re-run resolution. + excludePaths []string } func ResolveVariableReferencesOnlyResources(prefixes ...string) bundle.Mutator { @@ -74,6 +79,22 @@ func ResolveVariableReferencesOnlyResources(prefixes ...string) bundle.Mutator { } } +// ResolveVariableReferencesOnlyResourcesExcluding resolves variable references in +// resources while leaving references to the specified paths unresolved. +// Used by ProcessStaticResources for immutable bundles so that ${workspace.snapshot_path} +// is not resolved during Initialize; it is resolved in the Deploy phase after +// snapshot.Upload() sets workspace.snapshot_path to the API-assigned path. +func ResolveVariableReferencesOnlyResourcesExcluding(excludePaths ...string) bundle.Mutator { + return &resolveVariableReferences{ + prefixes: defaultPrefixes, + lookupFn: lookup, + extraRounds: maxResolutionRounds - 1, + pattern: dyn.NewPattern(dyn.Key("resources")), + includeResources: true, + excludePaths: excludePaths, + } +} + func ResolveVariableReferencesWithoutResources(prefixes ...string) bundle.Mutator { if len(prefixes) == 0 { prefixes = defaultPrefixes @@ -229,6 +250,9 @@ func (m *resolveVariableReferences) resolveOnce(b *bundle.Bundle, prefixes []dyn // Perform resolution only if the path starts with one of the specified prefixes. if slices.ContainsFunc(prefixes, path.HasPrefix) { + if slices.Contains(m.excludePaths, path.String()) { + return dyn.InvalidValue, dynvar.ErrSkipResolution + } value, err := m.lookupFn(normalized, path, b) hasUpdates = hasUpdates || (err == nil && value.IsValid()) return value, err diff --git a/bundle/config/mutator/resolve_variable_references_test.go b/bundle/config/mutator/resolve_variable_references_test.go index 876980e9486..f682419f32a 100644 --- a/bundle/config/mutator/resolve_variable_references_test.go +++ b/bundle/config/mutator/resolve_variable_references_test.go @@ -6,7 +6,9 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/pipelines" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -63,3 +65,46 @@ func TestResolveVariableReferencesWithSourceLinkedDeployment(t *testing.T) { testCase.assert(t, b) } } + +// TestResolveVariableReferencesExcludePaths verifies that paths listed in excludePaths +// are skipped during resolution and left as unresolved variable references. +// This is used by ProcessStaticResources for immutable bundles so that +// ${workspace.file_path} and ${workspace.artifact_path} can be resolved later +// (in the Build phase, after artifacts are built and the correct snapshot path is known). +func TestResolveVariableReferencesExcludePaths(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + FilePath: "/snapshot/path/src/files", + ArtifactPath: "/snapshot/path/src/artifacts", + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job1": { + JobSettings: jobs.JobSettings{ + Tasks: []jobs.Task{ + { + SparkPythonTask: &jobs.SparkPythonTask{ + PythonFile: "${workspace.file_path}/main.py", + }, + }, + }, + }, + }, + }, + }, + }, + } + + // With exclusion: ${workspace.file_path} should remain unresolved. + diags := bundle.Apply(t.Context(), b, ResolveVariableReferencesOnlyResourcesExcluding("workspace.file_path", "workspace.artifact_path")) + require.NoError(t, diags.Error()) + assert.Equal(t, "${workspace.file_path}/main.py", b.Config.Resources.Jobs["job1"].Tasks[0].SparkPythonTask.PythonFile, + "reference should remain unresolved when path is excluded") + + // Without exclusion: ${workspace.file_path} should resolve normally. + diags = bundle.Apply(t.Context(), b, ResolveVariableReferencesOnlyResources()) + require.NoError(t, diags.Error()) + assert.Equal(t, "/snapshot/path/src/files/main.py", b.Config.Resources.Jobs["job1"].Tasks[0].SparkPythonTask.PythonFile, + "reference should resolve after exclusion is lifted") +} diff --git a/bundle/config/mutator/resourcemutator/process_static_resources.go b/bundle/config/mutator/resourcemutator/process_static_resources.go index 3f859fd11cd..94c823c75b8 100644 --- a/bundle/config/mutator/resourcemutator/process_static_resources.go +++ b/bundle/config/mutator/resourcemutator/process_static_resources.go @@ -38,13 +38,26 @@ func (p processStaticResources) Apply(ctx context.Context, b *bundle.Bundle) dia // we need to resolve variables because they can change path values: // - variable can be used a prefix // - path can be part of a complex variable value + + // For immutable bundles, defer resolving ${workspace.snapshot_path} in resources. + // The actual snapshot path is only known after snapshot.Upload() returns the + // API-assigned path in the deploy phase. + var resourceResolver bundle.Mutator + if b.Config.Bundle.Immutable { + resourceResolver = mutator.ResolveVariableReferencesOnlyResourcesExcluding( + "workspace.snapshot_path", + ) + } else { + resourceResolver = mutator.ResolveVariableReferencesOnlyResources() + } + bundle.ApplySeqContext( ctx, b, // Reads (dynamic): * (strings) (searches for variable references in string values) // Updates (dynamic): resources.* (strings) (resolves variable references to their actual values) // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes - mutator.ResolveVariableReferencesOnlyResources(), + resourceResolver, // After normal variable resolution, log all ${resources.*} references mutator.LogResourceReferences(), mutator.NormalizePaths(), diff --git a/bundle/config/mutator/resourcemutator/resource_mutator.go b/bundle/config/mutator/resourcemutator/resource_mutator.go index 2eb292cfbb0..31afb65ffa2 100644 --- a/bundle/config/mutator/resourcemutator/resource_mutator.go +++ b/bundle/config/mutator/resourcemutator/resource_mutator.go @@ -127,6 +127,19 @@ func applyInitializeMutators(ctx context.Context, b *bundle.Bundle) { ) } +// resourceVarResolver returns a mutator that resolves variable references in +// resources. For immutable bundles, ${workspace.file_path} and +// ${workspace.artifact_path} are excluded: the API assigns the snapshot path +// after upload, so they must remain as-is until snapshot.Upload() has run. +func resourceVarResolver(b *bundle.Bundle) bundle.Mutator { + if b.Config.Bundle.Immutable { + return mutator.ResolveVariableReferencesOnlyResourcesExcluding( + "workspace.file_path", "workspace.artifact_path", + ) + } + return mutator.ResolveVariableReferencesOnlyResources() +} + // Normalization is applied multiple times if resource is modified during initialization // // If bundle is modified outside of 'resources' section, these changes are discarded. @@ -139,8 +152,10 @@ func applyNormalizeMutators(ctx context.Context, b *bundle.Bundle) { // Reads (dynamic): * (strings) (searches for variable references in string values) // Updates (dynamic): resources.* (strings) (resolves variable references to their actual values) - // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes - mutator.ResolveVariableReferencesOnlyResources(), + // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes. + // For immutable bundles, ${workspace.file_path} and ${workspace.artifact_path} are left + // unresolved: the actual snapshot path is assigned by the API after upload, not pre-computed. + resourceVarResolver(b), // Reads (dynamic): resources.pipelines.*.libraries (checks for notebook.path and file.path fields) // Updates (dynamic): resources.pipelines.*.libraries (expands glob patterns in path fields to multiple library entries) diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index 99dd75dd787..1d38cee2361 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -320,12 +320,21 @@ func (t *translateContext) rewriteValue(ctx context.Context, p dyn.Path, v dyn.V return dyn.NewValue(out, v.Locations()), nil } +// snapshotFilesRoot is the remote root used for file/notebook path translation +// in immutable bundles. References to this placeholder are resolved after +// snapshot.Upload() sets workspace.snapshot_path to the API-assigned path. +const snapshotFilesRoot = "${workspace.snapshot_path}/src/files" + func applyTranslations(ctx context.Context, b *bundle.Bundle, t *translateContext, translations []func(context.Context, dyn.Value) (dyn.Value, error)) diag.Diagnostics { - // Set the remote root to the sync root if source-linked deployment is enabled. - // Otherwise, set it to the workspace file path. - if config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment) { + switch { + case b.Config.Bundle.Immutable: + // Use a placeholder root that is resolved after snapshot.Upload() sets + // workspace.snapshot_path. This defers path computation until the actual + // content-addressed path is known. + t.remoteRoot = snapshotFilesRoot + case config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment): t.remoteRoot = t.b.SyncRootPath - } else { + default: t.remoteRoot = t.b.Config.Workspace.FilePath } diff --git a/bundle/config/workspace.go b/bundle/config/workspace.go index 9cd397f13aa..284bd0afe02 100644 --- a/bundle/config/workspace.go +++ b/bundle/config/workspace.go @@ -80,6 +80,12 @@ type Workspace struct { // Remote workspace path for deployment state. // This defaults to "${workspace.root}/state". StatePath string `json:"state_path,omitempty"` + + // SnapshotPath is the workspace path of the immutable snapshot uploaded during + // deployment. It is set by snapshot.Upload() and used to resolve + // ${workspace.snapshot_path} references in resource configurations. + // Only populated for bundles with bundle.immutable = true. + SnapshotPath string `json:"snapshot_path,omitempty" bundle:"internal"` } type User struct { diff --git a/bundle/deploy/metadata/compute.go b/bundle/deploy/metadata/compute.go index cb7be9811c4..08a45d7a17c 100644 --- a/bundle/deploy/metadata/compute.go +++ b/bundle/deploy/metadata/compute.go @@ -105,6 +105,7 @@ func (m *compute) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics // Set file upload destination of the bundle in metadata b.Metadata.Config.Workspace.FilePath = b.Config.Workspace.FilePath + b.Metadata.Config.Workspace.SnapshotPath = b.Config.Workspace.SnapshotPath // In source-linked deployment files are not copied and resources use source files, therefore we use sync path as file path in metadata if config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { b.Metadata.Config.Workspace.FilePath = b.SyncRootPath diff --git a/bundle/deploy/metadata/load.go b/bundle/deploy/metadata/load.go new file mode 100644 index 00000000000..3fe4eb1c3bd --- /dev/null +++ b/bundle/deploy/metadata/load.go @@ -0,0 +1,57 @@ +package metadata + +import ( + "context" + "encoding/json" + "io" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/metadata" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/filer" +) + +type load struct{} + +// Load reads the metadata file written during the last deploy and populates +// fields on the bundle that are not available locally (e.g. workspace.snapshot_path +// for immutable bundles, which is only known after snapshot.Upload() ran). +func Load() bundle.Mutator { + return &load{} +} + +func (m *load) Name() string { + return "metadata.Load" +} + +func (m *load) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + f, err := filer.NewWorkspaceFilesClient(b.WorkspaceClient(ctx), b.Config.Workspace.StatePath) + if err != nil { + return diag.FromErr(err) + } + + r, err := f.Read(ctx, metadataFileName) + if err != nil { + // Missing metadata file means the bundle was never deployed or was + // deployed by an older CLI version that didn't write metadata. Treat + // it as a no-op so destroy can still proceed. + return nil + } + defer r.Close() + + raw, err := io.ReadAll(r) + if err != nil { + return diag.FromErr(err) + } + + var md metadata.Metadata + if err := json.Unmarshal(raw, &md); err != nil { + return diag.FromErr(err) + } + + if md.Config.Workspace.SnapshotPath != "" { + b.Config.Workspace.SnapshotPath = md.Config.Workspace.SnapshotPath + } + + return nil +} diff --git a/bundle/deploy/snapshot/path.go b/bundle/deploy/snapshot/path.go new file mode 100644 index 00000000000..820f20776ce --- /dev/null +++ b/bundle/deploy/snapshot/path.go @@ -0,0 +1,227 @@ +package snapshot + +import ( + "archive/zip" + "bytes" + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "os" + "path/filepath" + "slices" + "strings" + "time" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/fileset" + "github.com/databricks/cli/libs/git" + "github.com/databricks/cli/libs/notebook" + "github.com/databricks/cli/libs/set" +) + +// zipEpoch is a fixed timestamp used for all zip entries to make the zip content-addressed +// and reproducible: the same file content always produces the same hash regardless of when +// the zip was built or the file's mtime. +var zipEpoch = time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) + +// snapshotBaseDir is the workspace path under which all immutable snapshots are stored. +// The full path for a snapshot is snapshotBaseDir/{bundle_name}/{snapshot_id}. +const snapshotBaseDir = "/Workspace/Shared/.snapshots" + +// BundleZip builds the zip that is uploaded to the snapshot API. +// It contains: +// - all files from the bundle sync root under the "files/" prefix, +// selected with the same git-aware + include/exclude logic as files.Upload +// - all built artifact files under the "artifacts/.internal/" prefix +// +// The snapshot ID is always IDFromContent(BundleZip(b)), ensuring the +// pre-calculated path and the uploaded path are derived from the same content. +func BundleZip(ctx context.Context, b *bundle.Bundle) ([]byte, error) { + var buf bytes.Buffer + zw := zip.NewWriter(&buf) + + if err := addSyncRootToZip(ctx, zw, b); err != nil { + return nil, err + } + if err := addArtifactsToZip(zw, b); err != nil { + return nil, err + } + + if err := zw.Close(); err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +// IDFromContent returns the SHA-256 hex digest of content. +func IDFromContent(content []byte) string { + h := sha256.Sum256(content) + return hex.EncodeToString(h[:]) +} + +// SnapshotID builds the bundle zip and returns its SHA-256 hex digest. +// Called after artifacts are built so that ApplyImmutableWorkspacePaths and +// snapshot.Upload both hash identical content. +func SnapshotID(ctx context.Context, b *bundle.Bundle) (string, error) { + content, err := BundleZip(ctx, b) + if err != nil { + return "", err + } + return IDFromContent(content), nil +} + +// SnapshotPath returns the workspace path for a snapshot: +// snapshotBaseDir/{bundleName}/{snapshotID}. +func SnapshotPath(bundleName, snapshotID string) string { + return snapshotBaseDir + "/" + bundleName + "/" + snapshotID +} + +// syncFiles returns the list of files to include in the snapshot zip using the +// same git-aware include/exclude logic as files.Upload (libs/sync). +func syncFiles(ctx context.Context, b *bundle.Bundle) ([]fileset.File, error) { + // Use git.NewFileSet so that .gitignore rules are respected, matching the + // behaviour of the normal files.Upload sync path. + // Avoid passing an empty/nil paths slice: git.NewFileSet forwards it to + // fileset.New whose variadic default ("." if no args) is bypassed when the + // caller explicitly passes a nil slice. The SyncDefaultPath mutator always + // sets Sync.Paths to ["."] in the normal pipeline; we replicate that here + // so BundleZip works even when the bundle hasn't gone through the full pipeline. + var gitFS *git.FileSet + var err error + if len(b.Config.Sync.Paths) > 0 { + gitFS, err = git.NewFileSet(ctx, b.WorktreeRoot, b.SyncRoot, b.Config.Sync.Paths) + } else { + gitFS, err = git.NewFileSet(ctx, b.WorktreeRoot, b.SyncRoot) + } + if err != nil { + return nil, fmt.Errorf("build file set: %w", err) + } + + all := set.NewSetF(func(f fileset.File) string { + return f.Relative + }) + + gitFiles, err := gitFS.Files() + if err != nil { + return nil, fmt.Errorf("list sync files: %w", err) + } + all.Add(gitFiles...) + + if len(b.Config.Sync.Include) > 0 { + includeFS, err := fileset.NewGlobSet(b.SyncRoot, b.Config.Sync.Include) + if err != nil { + return nil, fmt.Errorf("build include set: %w", err) + } + include, err := includeFS.Files() + if err != nil { + return nil, fmt.Errorf("list include files: %w", err) + } + all.Add(include...) + } + + if len(b.Config.Sync.Exclude) > 0 { + excludeFS, err := fileset.NewGlobSet(b.SyncRoot, b.Config.Sync.Exclude) + if err != nil { + return nil, fmt.Errorf("build exclude set: %w", err) + } + exclude, err := excludeFS.Files() + if err != nil { + return nil, fmt.Errorf("list exclude files: %w", err) + } + for _, f := range exclude { + all.Remove(f) + } + } + + files := all.Iter() + // Sort for a stable zip (same content → same hash regardless of map iteration order). + slices.SortFunc(files, func(a, b fileset.File) int { + if a.Relative < b.Relative { + return -1 + } + if a.Relative > b.Relative { + return 1 + } + return 0 + }) + return files, nil +} + +func addSyncRootToZip(ctx context.Context, zw *zip.Writer, b *bundle.Bundle) error { + files, err := syncFiles(ctx, b) + if err != nil { + return err + } + + for _, f := range files { + rc, err := b.SyncRoot.Open(f.Relative) + if err != nil { + return fmt.Errorf("open %s: %w", f.Relative, err) + } + + // Notebooks are stored without their file extension, matching how + // Databricks workspace imports them (e.g. sample_notebook.ipynb → + // sample_notebook). Job tasks reference the extension-stripped path. + entryPath := filepath.ToSlash(f.Relative) + if isNb, _, nbErr := notebook.DetectWithFS(b.SyncRoot, f.Relative); nbErr == nil && isNb { + entryPath = strings.TrimSuffix(entryPath, filepath.Ext(entryPath)) + } + + h := &zip.FileHeader{ + Name: "files/" + entryPath, + Method: zip.Deflate, + Modified: zipEpoch, + } + w, err := zw.CreateHeader(h) + if err != nil { + rc.Close() + return fmt.Errorf("zip entry for %s: %w", f.Relative, err) + } + _, err = io.Copy(w, rc) + rc.Close() + if err != nil { + return fmt.Errorf("write %s: %w", f.Relative, err) + } + } + return nil +} + +func addArtifactsToZip(zw *zip.Writer, b *bundle.Bundle) error { + for _, artifact := range b.Config.Artifacts { + for _, af := range artifact.Files { + source := af.Source + if af.Patched != "" { + source = af.Patched + } + // ".internal" matches libraries.InternalDirName so that ReplaceWithRemotePath + // produces library paths that resolve correctly inside the snapshot. + if err := addLocalFileToZip(zw, source, "artifacts/.internal"); err != nil { + return err + } + } + } + return nil +} + +func addLocalFileToZip(zw *zip.Writer, localPath, zipPrefix string) error { + f, err := os.Open(localPath) + if err != nil { + return fmt.Errorf("open %s: %w", localPath, err) + } + defer f.Close() + + entryName := zipPrefix + "/" + filepath.Base(localPath) + h := &zip.FileHeader{ + Name: entryName, + Method: zip.Deflate, + Modified: zipEpoch, + } + w, err := zw.CreateHeader(h) + if err != nil { + return fmt.Errorf("zip entry %s: %w", entryName, err) + } + _, err = io.Copy(w, f) + return err +} diff --git a/bundle/deploy/snapshot/path_test.go b/bundle/deploy/snapshot/path_test.go new file mode 100644 index 00000000000..f5bc2ab610c --- /dev/null +++ b/bundle/deploy/snapshot/path_test.go @@ -0,0 +1,136 @@ +package snapshot_test + +import ( + "archive/zip" + "bytes" + "os" + "path/filepath" + "slices" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/deploy/snapshot" + "github.com/databricks/cli/libs/vfs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func makeBundleWithFiles(t *testing.T, files map[string]string) *bundle.Bundle { + t.Helper() + dir := t.TempDir() + for name, content := range files { + p := filepath.Join(dir, name) + require.NoError(t, os.MkdirAll(filepath.Dir(p), 0o755)) + require.NoError(t, os.WriteFile(p, []byte(content), 0o644)) + } + root := vfs.MustNew(dir) + return &bundle.Bundle{ + BundleRootPath: dir, + SyncRoot: root, + // WorktreeRoot = SyncRoot is the fallback used by LoadGitDetails when + // there is no git repository. + WorktreeRoot: root, + Config: config.Root{}, + } +} + +func TestBundleZipIsDeterministic(t *testing.T) { + b := makeBundleWithFiles(t, map[string]string{ + "main.py": "print('hello')", + "src/task.py": "def run(): pass", + }) + + zip1, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + zip2, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + + assert.Equal(t, zip1, zip2, "BundleZip must produce identical bytes for identical content") +} + +func TestBundleZipChangesWithContent(t *testing.T) { + b1 := makeBundleWithFiles(t, map[string]string{"main.py": "v1"}) + b2 := makeBundleWithFiles(t, map[string]string{"main.py": "v2"}) + + zip1, err := snapshot.BundleZip(t.Context(), b1) + require.NoError(t, err) + zip2, err := snapshot.BundleZip(t.Context(), b2) + require.NoError(t, err) + + assert.NotEqual(t, zip1, zip2, "different file content must produce different zips") +} + +func TestBundleZipRespectsExcludes(t *testing.T) { + b := makeBundleWithFiles(t, map[string]string{ + "main.py": "print('hello')", + "skip.json": `{"id": "runtime-generated"}`, + }) + bExclude := makeBundleWithFiles(t, map[string]string{ + "main.py": "print('hello')", + "skip.json": `{"id": "runtime-generated"}`, + }) + bExclude.Config.Sync.Exclude = []string{"*.json"} + + zipAll, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + zipExcl, err := snapshot.BundleZip(t.Context(), bExclude) + require.NoError(t, err) + + // The zip without the excluded file should be smaller and different. + assert.NotEqual(t, zipAll, zipExcl) + assert.Less(t, len(zipExcl), len(zipAll)) +} + +func TestIDFromContent(t *testing.T) { + id := snapshot.IDFromContent([]byte("hello")) + // SHA-256 of "hello" + assert.Equal(t, "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824", id) + assert.Len(t, id, 64, "SHA-256 hex must be 64 characters") +} + +func TestSnapshotPath(t *testing.T) { + p := snapshot.SnapshotPath("my-bundle", "abc123") + assert.Equal(t, "/Workspace/Shared/.snapshots/my-bundle/abc123", p) +} + +func TestSnapshotIDMatchesBundleZipHash(t *testing.T) { + b := makeBundleWithFiles(t, map[string]string{"task.py": "x = 1"}) + + zipContent, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + expectedID := snapshot.IDFromContent(zipContent) + + id, err := snapshot.SnapshotID(t.Context(), b) + require.NoError(t, err) + + assert.Equal(t, expectedID, id) +} + +func zipEntryNames(t *testing.T, zipContent []byte) []string { + t.Helper() + r, err := zip.NewReader(bytes.NewReader(zipContent), int64(len(zipContent))) + require.NoError(t, err) + names := make([]string, len(r.File)) + for i, f := range r.File { + names[i] = f.Name + } + return names +} + +func TestBundleZipStripsNotebookExtensions(t *testing.T) { + // Minimal valid Jupyter notebook content. + ipynb := `{"nbformat": 4, "nbformat_minor": 5, "cells": [], "metadata": {}}` + b := makeBundleWithFiles(t, map[string]string{ + "src/my_notebook.ipynb": ipynb, + "src/script.py": "print('hello')", + }) + + zipContent, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + + names := zipEntryNames(t, zipContent) + assert.True(t, slices.Contains(names, "files/src/my_notebook"), "notebook should have extension stripped") + assert.False(t, slices.Contains(names, "files/src/my_notebook.ipynb"), "notebook should not appear with .ipynb extension") + assert.True(t, slices.Contains(names, "files/src/script.py"), "regular Python file should keep its extension") +} diff --git a/bundle/deploy/snapshot/upload.go b/bundle/deploy/snapshot/upload.go new file mode 100644 index 00000000000..73d84eb9703 --- /dev/null +++ b/bundle/deploy/snapshot/upload.go @@ -0,0 +1,69 @@ +package snapshot + +import ( + "context" + "fmt" + "path" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/filer" + "github.com/databricks/cli/libs/log" +) + +type snapshotUpload struct { + // uploader allows test injection of a custom SnapshotUploader. + uploader filer.SnapshotUploader +} + +// Upload returns a mutator that builds the bundle zip, uploads it via +// /api/2.0/repos/snapshots, and updates workspace.file_path and +// workspace.artifact_path to the content-addressed location returned by the API. +func Upload() bundle.Mutator { + return &snapshotUpload{} +} + +// UploadWithClient returns an upload mutator backed by the provided SnapshotUploader. +// This is intended for use in tests. +func UploadWithClient(uploader filer.SnapshotUploader) bundle.Mutator { + return &snapshotUpload{uploader: uploader} +} + +func (m *snapshotUpload) Name() string { + return "snapshot.Upload" +} + +func (m *snapshotUpload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + uploader := m.uploader + if uploader == nil { + var err error + uploader, err = filer.NewSnapshotUploader(b.WorkspaceClient(ctx)) + if err != nil { + return diag.FromErr(err) + } + } + + cmdio.LogString(ctx, "Uploading immutable bundle snapshot...") + + zipContent, err := BundleZip(ctx, b) + if err != nil { + return diag.FromErr(fmt.Errorf("failed to build snapshot zip: %w", err)) + } + snapshotID := IDFromContent(zipContent) + log.Debugf(ctx, "snapshot.Upload: snapshotID=%s zip=%d bytes", snapshotID, len(zipContent)) + + info, err := uploader.Upload(ctx, b.Config.Bundle.Name, snapshotID, b.Config.Workspace.CurrentUser.UserName, zipContent) + if err != nil { + return diag.FromErr(err) + } + + log.Infof(ctx, "Snapshot uploaded to %s", info.Path) + + // The API unpacks the zip under a "src" subdirectory. + b.Config.Workspace.SnapshotPath = info.Path + b.Config.Workspace.FilePath = path.Join(info.Path, "src", "files") + b.Config.Workspace.ArtifactPath = path.Join(info.Path, "src", "artifacts") + + return nil +} diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index 2f28ca27596..e63f6625d1d 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -50,6 +50,9 @@ github.com/databricks/cli/bundle/config.Bundle: The Git version control details that are associated with your bundle. "markdown_description": |- The Git version control details that are associated with your bundle. For supported attributes see [\_](/dev-tools/bundles/settings.md#git). + "immutable": + "description": |- + Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled. "name": "description": |- The name of the bundle. diff --git a/bundle/metadata/metadata.go b/bundle/metadata/metadata.go index 1c61cb093f0..e5549472696 100644 --- a/bundle/metadata/metadata.go +++ b/bundle/metadata/metadata.go @@ -15,6 +15,9 @@ type Bundle struct { type Workspace struct { FilePath string `json:"file_path"` + // SnapshotPath is the workspace path of the immutable snapshot uploaded + // during deployment. Only populated for bundles with bundle.immutable = true. + SnapshotPath string `json:"snapshot_path,omitempty"` } type Resource struct { diff --git a/bundle/phases/build.go b/bundle/phases/build.go index 5a32435f8f1..a0649e373f0 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -14,6 +14,7 @@ import ( "github.com/databricks/cli/libs/logdiag" ) + type LibLocationMap map[string][]libraries.LocationToUpdate // The build phase builds artifacts. @@ -24,6 +25,7 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { scripts.Execute(config.ScriptPreBuild), artifacts.Build(), scripts.Execute(config.ScriptPostBuild), + mutator.ResolveVariableReferencesWithoutResources( "artifacts", ), @@ -41,6 +43,12 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { libraries.SwitchToPatchedWheels(), ) + // For immutable bundles, library remote paths are set in the deploy phase + // after snapshot.Upload() provides the content-addressed workspace.artifact_path. + if b.Config.Bundle.Immutable { + return nil + } + libs, diags := libraries.ReplaceWithRemotePath(ctx, b) for _, diag := range diags { logdiag.LogDiag(ctx, diag) diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index b4d70ede5ad..5534354bc61 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -8,10 +8,12 @@ import ( "github.com/databricks/cli/bundle/artifacts" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/engine" + "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/deploy" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" "github.com/databricks/cli/bundle/deploy/metadata" + "github.com/databricks/cli/bundle/deploy/snapshot" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/deployplan" "github.com/databricks/cli/bundle/direct" @@ -131,13 +133,38 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand bundle.ApplyContext(ctx, b, lock.Release(lock.GoalDeploy)) }() - uploadLibraries(ctx, b, libs) + if b.Config.Bundle.Immutable { + // Upload all source files and built artifacts as a single immutable snapshot. + // The API assigns a content-addressed path, so workspace.snapshot_path (and + // derived workspace.file_path / workspace.artifact_path) are only known after + // upload. Resolve variable references in resources and set library remote paths + // once the actual paths are available. + bundle.ApplySeqContext(ctx, b, + snapshot.Upload(), + mutator.ResolveVariableReferencesOnlyResources(), + ) + if !logdiag.HasError(ctx) { + _, libDiags := libraries.ReplaceWithRemotePath(ctx, b) + for _, d := range libDiags { + logdiag.LogDiag(ctx, d) + } + } + } else { + uploadLibraries(ctx, b, libs) + } + if logdiag.HasError(ctx) { return } + if !b.Config.Bundle.Immutable { + bundle.ApplySeqContext(ctx, b, files.Upload(outputHandler)) + if logdiag.HasError(ctx) { + return + } + } + bundle.ApplySeqContext(ctx, b, - files.Upload(outputHandler), deploy.StateUpdate(), deploy.StatePush(), permissions.ApplyWorkspaceRootPermissions(), diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index 91640ac6cad..71d5468c253 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" + deploymetadata "github.com/databricks/cli/bundle/deploy/metadata" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/deployplan" "github.com/databricks/cli/bundle/direct" @@ -123,17 +124,29 @@ func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { }() if !engine.IsDirect() { - bundle.ApplySeqContext(ctx, b, + mutators := []bundle.Mutator{ // We need to resolve artifact variable (how we do it in build phase) // because some of the to-be-destroyed resource might use this variable. // Not resolving might lead to terraform "Reference to undeclared resource" error mutator.ResolveVariableReferencesWithoutResources("artifacts"), mutator.ResolveVariableReferencesOnlyResources("artifacts"), + } + + if b.Config.Bundle.Immutable { + // For immutable bundles, resource paths contain ${workspace.snapshot_path} + // which was set during deploy by snapshot.Upload(). Load it from the stored + // metadata so it can be resolved before Terraform processes the config. + mutators = append([]bundle.Mutator{deploymetadata.Load()}, mutators...) + mutators = append(mutators, mutator.ResolveVariableReferencesOnlyResources()) + } + mutators = append(mutators, terraform.Interpolate(), terraform.Write(), terraform.Plan(terraform.PlanGoal("destroy")), ) + + bundle.ApplySeqContext(ctx, b, mutators...) } if logdiag.HasError(ctx) { diff --git a/libs/filer/snapshot_client.go b/libs/filer/snapshot_client.go new file mode 100644 index 00000000000..a7d84891cb0 --- /dev/null +++ b/libs/filer/snapshot_client.go @@ -0,0 +1,103 @@ +package filer + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "mime/multipart" + "net/http" + "net/textproto" + + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/client" +) + +// SnapshotInfo holds the result of a successful snapshot upload. +type SnapshotInfo struct { + // Path is the immutable workspace path for the uploaded snapshot content. + Path string +} + +// SnapshotUploader abstracts the /api/2.0/repos/snapshots endpoint. +// snapshotID is the content-addressed key supplied by the caller; the API uses +// it as the final path component so that identical content always resolves to +// the same workspace location. +// This interface exists so the implementation can later be replaced with a Go SDK call. +type SnapshotUploader interface { + Upload(ctx context.Context, bundleID, snapshotID, currentUser string, zipContent []byte) (*SnapshotInfo, error) +} + +// snapshotAPIClient implements SnapshotUploader against /api/2.0/repos/snapshots. +type snapshotAPIClient struct { + apiClient apiClient +} + +// snapshotUploadResponse mirrors the /api/2.0/repos/snapshots response body. +type snapshotUploadResponse struct { + Snapshot struct { + Path string `json:"path"` + } `json:"snapshot"` +} + +// NewSnapshotUploader creates a SnapshotUploader backed by /api/2.0/repos/snapshots. +func NewSnapshotUploader(w *databricks.WorkspaceClient) (SnapshotUploader, error) { + apiClient, err := client.New(w.Config) + if err != nil { + return nil, err + } + return &snapshotAPIClient{apiClient: apiClient}, nil +} + +// Upload uploads zipContent as an immutable snapshot identified by snapshotID. +// snapshotID is the SHA-256 of the files-only zip and is used by the server as +// the content-addressed path component. currentUser is granted CAN_READ on the snapshot. +func (c *snapshotAPIClient) Upload(ctx context.Context, bundleID, snapshotID, currentUser string, zipContent []byte) (*SnapshotInfo, error) { + var body bytes.Buffer + mw := multipart.NewWriter(&body) + + if err := mw.WriteField("snapshot_id", snapshotID); err != nil { + return nil, fmt.Errorf("failed to write snapshot_id: %w", err) + } + if err := mw.WriteField("bundle_id", bundleID); err != nil { + return nil, fmt.Errorf("failed to write bundle_id: %w", err) + } + + // The API requires an access_control_list granting the current user read access. + acl, err := json.Marshal([]map[string]string{ + {"user_name": currentUser, "permission_level": "CAN_READ"}, + }) + if err != nil { + return nil, fmt.Errorf("failed to marshal access_control_list: %w", err) + } + if err := mw.WriteField("access_control_list", string(acl)); err != nil { + return nil, fmt.Errorf("failed to write access_control_list: %w", err) + } + + // Attach the zip with an explicit content-type so the server treats it as binary. + fh := make(textproto.MIMEHeader) + fh.Set("Content-Disposition", `form-data; name="file"; filename="snapshot.zip"`) + fh.Set("Content-Type", "application/zip") + part, err := mw.CreatePart(fh) + if err != nil { + return nil, fmt.Errorf("failed to create file part: %w", err) + } + if _, err := part.Write(zipContent); err != nil { + return nil, fmt.Errorf("failed to write zip content: %w", err) + } + if err := mw.Close(); err != nil { + return nil, fmt.Errorf("failed to finalize multipart body: %w", err) + } + + headers := map[string]string{ + "Content-Type": mw.FormDataContentType(), + } + + var resp snapshotUploadResponse + err = c.apiClient.Do(ctx, http.MethodPost, "/api/2.0/repos/snapshots", headers, nil, body.Bytes(), &resp) + if err != nil { + return nil, fmt.Errorf("snapshot upload: %w", err) + } + + return &SnapshotInfo{Path: resp.Snapshot.Path}, nil +} From a429b263c19ef84f93c4657997848876d6f72a68 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 28 May 2026 11:03:38 +0200 Subject: [PATCH 02/31] remove unused snapshot path method --- bundle/deploy/snapshot/path.go | 10 ---------- bundle/deploy/snapshot/path_test.go | 5 ----- 2 files changed, 15 deletions(-) diff --git a/bundle/deploy/snapshot/path.go b/bundle/deploy/snapshot/path.go index 820f20776ce..3be2d195a8c 100644 --- a/bundle/deploy/snapshot/path.go +++ b/bundle/deploy/snapshot/path.go @@ -26,10 +26,6 @@ import ( // the zip was built or the file's mtime. var zipEpoch = time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) -// snapshotBaseDir is the workspace path under which all immutable snapshots are stored. -// The full path for a snapshot is snapshotBaseDir/{bundle_name}/{snapshot_id}. -const snapshotBaseDir = "/Workspace/Shared/.snapshots" - // BundleZip builds the zip that is uploaded to the snapshot API. // It contains: // - all files from the bundle sync root under the "files/" prefix, @@ -72,12 +68,6 @@ func SnapshotID(ctx context.Context, b *bundle.Bundle) (string, error) { return IDFromContent(content), nil } -// SnapshotPath returns the workspace path for a snapshot: -// snapshotBaseDir/{bundleName}/{snapshotID}. -func SnapshotPath(bundleName, snapshotID string) string { - return snapshotBaseDir + "/" + bundleName + "/" + snapshotID -} - // syncFiles returns the list of files to include in the snapshot zip using the // same git-aware include/exclude logic as files.Upload (libs/sync). func syncFiles(ctx context.Context, b *bundle.Bundle) ([]fileset.File, error) { diff --git a/bundle/deploy/snapshot/path_test.go b/bundle/deploy/snapshot/path_test.go index f5bc2ab610c..157cdf2c9ba 100644 --- a/bundle/deploy/snapshot/path_test.go +++ b/bundle/deploy/snapshot/path_test.go @@ -89,11 +89,6 @@ func TestIDFromContent(t *testing.T) { assert.Len(t, id, 64, "SHA-256 hex must be 64 characters") } -func TestSnapshotPath(t *testing.T) { - p := snapshot.SnapshotPath("my-bundle", "abc123") - assert.Equal(t, "/Workspace/Shared/.snapshots/my-bundle/abc123", p) -} - func TestSnapshotIDMatchesBundleZipHash(t *testing.T) { b := makeBundleWithFiles(t, map[string]string{"task.py": "x = 1"}) From e7c196889775ff31d0a9f05a78254bc94935eded Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 28 May 2026 12:00:22 +0200 Subject: [PATCH 03/31] added an acceptance test --- .../deploy/immutable/databricks.yml.tmpl | 32 +++++++++++ .../bundle/deploy/immutable/out.test.toml | 3 + acceptance/bundle/deploy/immutable/output.txt | 56 +++++++++++++++++++ .../bundle/deploy/immutable/pyproject.toml | 31 ++++++++++ acceptance/bundle/deploy/immutable/script | 17 ++++++ .../immutable/src/immutable/__init__.py | 0 .../deploy/immutable/src/immutable/main.py | 6 ++ .../bundle/deploy/immutable/src/main.py | 1 + .../bundle/deploy/immutable/src/notebook.py | 3 + acceptance/bundle/deploy/immutable/test.toml | 10 ++++ 10 files changed, 159 insertions(+) create mode 100644 acceptance/bundle/deploy/immutable/databricks.yml.tmpl create mode 100644 acceptance/bundle/deploy/immutable/out.test.toml create mode 100644 acceptance/bundle/deploy/immutable/output.txt create mode 100644 acceptance/bundle/deploy/immutable/pyproject.toml create mode 100644 acceptance/bundle/deploy/immutable/script create mode 100644 acceptance/bundle/deploy/immutable/src/immutable/__init__.py create mode 100644 acceptance/bundle/deploy/immutable/src/immutable/main.py create mode 100644 acceptance/bundle/deploy/immutable/src/main.py create mode 100644 acceptance/bundle/deploy/immutable/src/notebook.py create mode 100644 acceptance/bundle/deploy/immutable/test.toml diff --git a/acceptance/bundle/deploy/immutable/databricks.yml.tmpl b/acceptance/bundle/deploy/immutable/databricks.yml.tmpl new file mode 100644 index 00000000000..23805de2402 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/databricks.yml.tmpl @@ -0,0 +1,32 @@ +bundle: + name: test-bundle-immutable-$UNIQUE_NAME + immutable: true + +artifacts: + python_artifact: + type: whl + build: uv build --wheel + +resources: + jobs: + my_job: + name: my job + tasks: + - task_key: spark_python_task + spark_python_task: + python_file: ./src/main.py + environment_key: env + - task_key: notebook_task + notebook_task: + notebook_path: ./src/notebook.py + - task_key: python_wheel_task + python_wheel_task: + package_name: immutable + entry_point: main + environment_key: env + environments: + - environment_key: env + spec: + environment_version: "4" + dependencies: + - ./dist/*.whl diff --git a/acceptance/bundle/deploy/immutable/out.test.toml b/acceptance/bundle/deploy/immutable/out.test.toml new file mode 100644 index 00000000000..650836edeb3 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/out.test.toml @@ -0,0 +1,3 @@ +Local = false +Cloud = true +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/deploy/immutable/output.txt b/acceptance/bundle/deploy/immutable/output.txt new file mode 100644 index 00000000000..5ebc1c24ae0 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/output.txt @@ -0,0 +1,56 @@ + +>>> [CLI] bundle validate +Name: test-bundle-immutable-[UNIQUE_NAME] +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-[UNIQUE_NAME]/default + +Validation OK! + +>>> [CLI] bundle deploy +Building python_artifact... +Uploading immutable bundle snapshot... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/main.py" + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook" + +>>> [CLI] jobs get [NUMID] +[ + "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl" +] + +>>> [CLI] bundle run my_job +Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID] + +[TIMESTAMP] "my job" RUNNING +[TIMESTAMP] "my job" INTERNAL_ERROR FAILED Task notebook_task failed with message: Unexpected failure while fetching notebook at /Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook: '/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook' is not a notebook. +Task notebook_task FAILED: +Run failed with error message + Unexpected failure while fetching notebook at /Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook: '/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook' is not a notebook. + + +Error: Task notebook_task failed! +Error: +Run failed with error message + Unexpected failure while fetching notebook at /Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook: '/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook' is not a notebook. +Trace: + +Error: failed to reach TERMINATED or SKIPPED, got INTERNAL_ERROR: Task notebook_task failed with message: Unexpected failure while fetching notebook at /Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook: '/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook' is not a notebook. + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.jobs.my_job + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-[UNIQUE_NAME]/default + +Deleting files... +Destroy complete! + +Exit code: 1 diff --git a/acceptance/bundle/deploy/immutable/pyproject.toml b/acceptance/bundle/deploy/immutable/pyproject.toml new file mode 100644 index 00000000000..3e49b180137 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/pyproject.toml @@ -0,0 +1,31 @@ +[project] +name = "immutable" +version = "0.0.1" +authors = [{ name = "andrew.nester@databricks.com" }] +requires-python = ">=3.10,<3.13" +dependencies = [ + # Any dependencies for jobs and pipelines in this project can be added here + # See also https://docs.databricks.com/dev-tools/bundles/library-dependencies + # + # LIMITATION: for pipelines, dependencies are cached during development; + # add dependencies to the 'environment' section of your pipeline.yml file instead +] + +[dependency-groups] +dev = [ + "pytest", + "ruff", + "databricks-dlt", + "databricks-connect>=15.4,<15.5", + "ipykernel", +] + +[project.scripts] +main = "immutable.main:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.ruff] +line-length = 120 diff --git a/acceptance/bundle/deploy/immutable/script b/acceptance/bundle/deploy/immutable/script new file mode 100644 index 00000000000..57d5cfbf3ec --- /dev/null +++ b/acceptance/bundle/deploy/immutable/script @@ -0,0 +1,17 @@ +envsubst < databricks.yml.tmpl > databricks.yml +cleanup() { + trace $CLI bundle destroy --auto-approve +} +trap cleanup EXIT + +trace $CLI bundle validate +trace $CLI bundle deploy + + +# Get a job and check that task paths are immutable +JOB_ID=$($CLI bundle summary -o json | jq -r '.resources.jobs.my_job.id') +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_python_task != null) | .spark_python_task.python_file' +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' +trace $CLI jobs get $JOB_ID | jq '.settings.environments[0].spec.dependencies' + +trace $CLI bundle run my_job diff --git a/acceptance/bundle/deploy/immutable/src/immutable/__init__.py b/acceptance/bundle/deploy/immutable/src/immutable/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/acceptance/bundle/deploy/immutable/src/immutable/main.py b/acceptance/bundle/deploy/immutable/src/immutable/main.py new file mode 100644 index 00000000000..9eccd00150d --- /dev/null +++ b/acceptance/bundle/deploy/immutable/src/immutable/main.py @@ -0,0 +1,6 @@ +def main(): + print("Hello from Python Wheel Task!") + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/deploy/immutable/src/main.py b/acceptance/bundle/deploy/immutable/src/main.py new file mode 100644 index 00000000000..6c285f7e2f5 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/src/main.py @@ -0,0 +1 @@ +print("Hello from Spark Python Task!") diff --git a/acceptance/bundle/deploy/immutable/src/notebook.py b/acceptance/bundle/deploy/immutable/src/notebook.py new file mode 100644 index 00000000000..fb3c9597fbf --- /dev/null +++ b/acceptance/bundle/deploy/immutable/src/notebook.py @@ -0,0 +1,3 @@ +# Databricks notebook source + +print("Hello from Notebook Task!") diff --git a/acceptance/bundle/deploy/immutable/test.toml b/acceptance/bundle/deploy/immutable/test.toml new file mode 100644 index 00000000000..be964aff9b5 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/test.toml @@ -0,0 +1,10 @@ +Local = false +Cloud = true + +Ignore = [ + "databricks.yml", + ".databricks", + ".venv", + "script", + "*.pyc", +] From 67914d073fdcc811c31cb4aaae024cfd5ce40e34 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 28 May 2026 12:18:28 +0200 Subject: [PATCH 04/31] fix for notebook import --- acceptance/bundle/deploy/immutable/output.txt | 28 +++++++++---------- bundle/deploy/snapshot/path.go | 9 ------ bundle/phases/build.go | 1 - 3 files changed, 13 insertions(+), 25 deletions(-) diff --git a/acceptance/bundle/deploy/immutable/output.txt b/acceptance/bundle/deploy/immutable/output.txt index 5ebc1c24ae0..5b9e74d23d0 100644 --- a/acceptance/bundle/deploy/immutable/output.txt +++ b/acceptance/bundle/deploy/immutable/output.txt @@ -16,33 +16,33 @@ Updating deployment state... Deployment complete! >>> [CLI] jobs get [NUMID] -"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/main.py" +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/11f80ca6d8923bf75b57e475d4ca9ba4bb1d6d48c58aace8d3f2a1289b51c6e0/src/files/src/main.py" >>> [CLI] jobs get [NUMID] -"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook" +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/11f80ca6d8923bf75b57e475d4ca9ba4bb1d6d48c58aace8d3f2a1289b51c6e0/src/files/src/notebook" >>> [CLI] jobs get [NUMID] [ - "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl" + "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/11f80ca6d8923bf75b57e475d4ca9ba4bb1d6d48c58aace8d3f2a1289b51c6e0/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl" ] >>> [CLI] bundle run my_job Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID] [TIMESTAMP] "my job" RUNNING -[TIMESTAMP] "my job" INTERNAL_ERROR FAILED Task notebook_task failed with message: Unexpected failure while fetching notebook at /Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook: '/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook' is not a notebook. -Task notebook_task FAILED: -Run failed with error message - Unexpected failure while fetching notebook at /Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook: '/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook' is not a notebook. +[TIMESTAMP] "my job" TERMINATED SUCCESS +Output: +======= +Task python_wheel_task: +Hello from Python Wheel Task! +======= +Task notebook_task: -Error: Task notebook_task failed! -Error: -Run failed with error message - Unexpected failure while fetching notebook at /Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook: '/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook' is not a notebook. -Trace: +======= +Task spark_python_task: +Hello from Spark Python Task! -Error: failed to reach TERMINATED or SKIPPED, got INTERNAL_ERROR: Task notebook_task failed with message: Unexpected failure while fetching notebook at /Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook: '/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook' is not a notebook. >>> [CLI] bundle destroy --auto-approve The following resources will be deleted: @@ -52,5 +52,3 @@ All files and directories at the following location will be deleted: /Workspace/ Deleting files... Destroy complete! - -Exit code: 1 diff --git a/bundle/deploy/snapshot/path.go b/bundle/deploy/snapshot/path.go index 3be2d195a8c..82c5d543680 100644 --- a/bundle/deploy/snapshot/path.go +++ b/bundle/deploy/snapshot/path.go @@ -11,13 +11,11 @@ import ( "os" "path/filepath" "slices" - "strings" "time" "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/fileset" "github.com/databricks/cli/libs/git" - "github.com/databricks/cli/libs/notebook" "github.com/databricks/cli/libs/set" ) @@ -151,14 +149,7 @@ func addSyncRootToZip(ctx context.Context, zw *zip.Writer, b *bundle.Bundle) err return fmt.Errorf("open %s: %w", f.Relative, err) } - // Notebooks are stored without their file extension, matching how - // Databricks workspace imports them (e.g. sample_notebook.ipynb → - // sample_notebook). Job tasks reference the extension-stripped path. entryPath := filepath.ToSlash(f.Relative) - if isNb, _, nbErr := notebook.DetectWithFS(b.SyncRoot, f.Relative); nbErr == nil && isNb { - entryPath = strings.TrimSuffix(entryPath, filepath.Ext(entryPath)) - } - h := &zip.FileHeader{ Name: "files/" + entryPath, Method: zip.Deflate, diff --git a/bundle/phases/build.go b/bundle/phases/build.go index a0649e373f0..26dc9bede06 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -14,7 +14,6 @@ import ( "github.com/databricks/cli/libs/logdiag" ) - type LibLocationMap map[string][]libraries.LocationToUpdate // The build phase builds artifacts. From 549492a3b44d577b67afdd135a3752f1c04cb0b7 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 28 May 2026 12:40:53 +0200 Subject: [PATCH 05/31] removed unused function --- bundle/deploy/snapshot/upload.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/bundle/deploy/snapshot/upload.go b/bundle/deploy/snapshot/upload.go index 73d84eb9703..e3e9efa011e 100644 --- a/bundle/deploy/snapshot/upload.go +++ b/bundle/deploy/snapshot/upload.go @@ -24,12 +24,6 @@ func Upload() bundle.Mutator { return &snapshotUpload{} } -// UploadWithClient returns an upload mutator backed by the provided SnapshotUploader. -// This is intended for use in tests. -func UploadWithClient(uploader filer.SnapshotUploader) bundle.Mutator { - return &snapshotUpload{uploader: uploader} -} - func (m *snapshotUpload) Name() string { return "snapshot.Upload" } From aedfdb03398a68a5b7d42dbacd8b194a1cb8db14 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 1 Jun 2026 13:38:31 +0200 Subject: [PATCH 06/31] fix schema + unit test --- bundle/deploy/snapshot/path_test.go | 5 ++--- bundle/schema/jsonschema.json | 4 ++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/bundle/deploy/snapshot/path_test.go b/bundle/deploy/snapshot/path_test.go index 157cdf2c9ba..9728fadb507 100644 --- a/bundle/deploy/snapshot/path_test.go +++ b/bundle/deploy/snapshot/path_test.go @@ -113,7 +113,7 @@ func zipEntryNames(t *testing.T, zipContent []byte) []string { return names } -func TestBundleZipStripsNotebookExtensions(t *testing.T) { +func TestBundleZipDoNotStripNotebookExtensions(t *testing.T) { // Minimal valid Jupyter notebook content. ipynb := `{"nbformat": 4, "nbformat_minor": 5, "cells": [], "metadata": {}}` b := makeBundleWithFiles(t, map[string]string{ @@ -125,7 +125,6 @@ func TestBundleZipStripsNotebookExtensions(t *testing.T) { require.NoError(t, err) names := zipEntryNames(t, zipContent) - assert.True(t, slices.Contains(names, "files/src/my_notebook"), "notebook should have extension stripped") - assert.False(t, slices.Contains(names, "files/src/my_notebook.ipynb"), "notebook should not appear with .ipynb extension") + assert.True(t, slices.Contains(names, "files/src/my_notebook.ipynb"), "notebook should have extension stripped") assert.True(t, slices.Contains(names, "files/src/script.py"), "regular Python file should keep its extension") } diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index ee105a6f821..28ffeb6205f 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -2239,6 +2239,10 @@ "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git", "markdownDescription": "The Git version control details that are associated with your bundle. For supported attributes see [git](https://docs.databricks.com/dev-tools/bundles/settings.html#git)." }, + "immutable": { + "description": "Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled.", + "$ref": "#/$defs/bool" + }, "name": { "description": "The name of the bundle.", "$ref": "#/$defs/string" From eddec613463ac23a100d7f9da8c1ecf6d2a533e9 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 9 Jun 2026 12:49:24 +0200 Subject: [PATCH 07/31] use immutable_folder config --- acceptance/bundle/deploy/immutable/databricks.yml.tmpl | 3 ++- .../validate/immutable_workspace_paths/databricks.yml | 3 ++- bundle/config/bundle.go | 6 ------ bundle/config/deployment.go | 7 +++++++ .../mutator/resourcemutator/process_static_resources.go | 2 +- bundle/config/mutator/resourcemutator/resource_mutator.go | 2 +- bundle/config/mutator/translate_paths.go | 2 +- bundle/internal/schema/annotations.yml | 6 +++--- bundle/phases/build.go | 2 +- bundle/phases/deploy.go | 4 ++-- bundle/phases/destroy.go | 2 +- bundle/schema/jsonschema.json | 8 ++++---- 12 files changed, 25 insertions(+), 22 deletions(-) diff --git a/acceptance/bundle/deploy/immutable/databricks.yml.tmpl b/acceptance/bundle/deploy/immutable/databricks.yml.tmpl index 23805de2402..6dd62eee1fb 100644 --- a/acceptance/bundle/deploy/immutable/databricks.yml.tmpl +++ b/acceptance/bundle/deploy/immutable/databricks.yml.tmpl @@ -1,6 +1,7 @@ bundle: name: test-bundle-immutable-$UNIQUE_NAME - immutable: true + deployment: + immutable_folder: true artifacts: python_artifact: diff --git a/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml b/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml index 39c25fb365f..b6ce1bd97d1 100644 --- a/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml +++ b/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml @@ -1,6 +1,7 @@ bundle: name: my-bundle - immutable: true + deployment: + immutable_folder: true sync: exclude: diff --git a/bundle/config/bundle.go b/bundle/config/bundle.go index 79f92b6b435..473e355225f 100644 --- a/bundle/config/bundle.go +++ b/bundle/config/bundle.go @@ -60,10 +60,4 @@ type Bundle struct { // Databricks first party template when a user runs bundle init. Uuid string `json:"uuid,omitempty"` - // Immutable specifies that bundle files and artifacts are uploaded as a single - // immutable snapshot rather than being synced individually. When true, the - // deployment calls /api/2.0/repos/snapshots with a zip containing all files - // and sets workspace.file_path and workspace.artifact_path to the returned - // content-addressed path. validate and plan make no mutative API calls. - Immutable bool `json:"immutable,omitempty"` } diff --git a/bundle/config/deployment.go b/bundle/config/deployment.go index b7efb4456f9..8be0596823e 100644 --- a/bundle/config/deployment.go +++ b/bundle/config/deployment.go @@ -5,6 +5,13 @@ type Deployment struct { // running jobs or pipelines in the workspace. Defaults to false. FailOnActiveRuns bool `json:"fail_on_active_runs,omitempty"` + // ImmutableFolder specifies that bundle files and artifacts are uploaded as a + // single immutable snapshot rather than being synced individually. When true, + // the deployment calls /api/2.0/repos/snapshots with a zip containing all files + // and sets workspace.file_path and workspace.artifact_path to the returned + // content-addressed path. validate and plan make no mutative API calls. + ImmutableFolder bool `json:"immutable_folder,omitempty"` + // Lock configures locking behavior on deployment. Lock Lock `json:"lock,omitempty"` } diff --git a/bundle/config/mutator/resourcemutator/process_static_resources.go b/bundle/config/mutator/resourcemutator/process_static_resources.go index 94c823c75b8..9040c14ee3d 100644 --- a/bundle/config/mutator/resourcemutator/process_static_resources.go +++ b/bundle/config/mutator/resourcemutator/process_static_resources.go @@ -43,7 +43,7 @@ func (p processStaticResources) Apply(ctx context.Context, b *bundle.Bundle) dia // The actual snapshot path is only known after snapshot.Upload() returns the // API-assigned path in the deploy phase. var resourceResolver bundle.Mutator - if b.Config.Bundle.Immutable { + if b.Config.Bundle.Deployment.ImmutableFolder { resourceResolver = mutator.ResolveVariableReferencesOnlyResourcesExcluding( "workspace.snapshot_path", ) diff --git a/bundle/config/mutator/resourcemutator/resource_mutator.go b/bundle/config/mutator/resourcemutator/resource_mutator.go index 31afb65ffa2..fdb813aba47 100644 --- a/bundle/config/mutator/resourcemutator/resource_mutator.go +++ b/bundle/config/mutator/resourcemutator/resource_mutator.go @@ -132,7 +132,7 @@ func applyInitializeMutators(ctx context.Context, b *bundle.Bundle) { // ${workspace.artifact_path} are excluded: the API assigns the snapshot path // after upload, so they must remain as-is until snapshot.Upload() has run. func resourceVarResolver(b *bundle.Bundle) bundle.Mutator { - if b.Config.Bundle.Immutable { + if b.Config.Bundle.Deployment.ImmutableFolder { return mutator.ResolveVariableReferencesOnlyResourcesExcluding( "workspace.file_path", "workspace.artifact_path", ) diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index 1d38cee2361..66605e69be7 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -327,7 +327,7 @@ const snapshotFilesRoot = "${workspace.snapshot_path}/src/files" func applyTranslations(ctx context.Context, b *bundle.Bundle, t *translateContext, translations []func(context.Context, dyn.Value) (dyn.Value, error)) diag.Diagnostics { switch { - case b.Config.Bundle.Immutable: + case b.Config.Bundle.Deployment.ImmutableFolder: // Use a placeholder root that is resolved after snapshot.Upload() sets // workspace.snapshot_path. This defers path computation until the actual // content-addressed path is known. diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index e63f6625d1d..735d0cc4c81 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -50,9 +50,6 @@ github.com/databricks/cli/bundle/config.Bundle: The Git version control details that are associated with your bundle. "markdown_description": |- The Git version control details that are associated with your bundle. For supported attributes see [\_](/dev-tools/bundles/settings.md#git). - "immutable": - "description": |- - Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled. "name": "description": |- The name of the bundle. @@ -63,6 +60,9 @@ github.com/databricks/cli/bundle/config.Deployment: "fail_on_active_runs": "description": |- Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. + "immutable_folder": + "description": |- + Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled. "lock": "description": |- The deployment lock attributes. diff --git a/bundle/phases/build.go b/bundle/phases/build.go index 26dc9bede06..8411e376ebb 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -44,7 +44,7 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { // For immutable bundles, library remote paths are set in the deploy phase // after snapshot.Upload() provides the content-addressed workspace.artifact_path. - if b.Config.Bundle.Immutable { + if b.Config.Bundle.Deployment.ImmutableFolder { return nil } diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 5534354bc61..6eee62a38ec 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -133,7 +133,7 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand bundle.ApplyContext(ctx, b, lock.Release(lock.GoalDeploy)) }() - if b.Config.Bundle.Immutable { + if b.Config.Bundle.Deployment.ImmutableFolder { // Upload all source files and built artifacts as a single immutable snapshot. // The API assigns a content-addressed path, so workspace.snapshot_path (and // derived workspace.file_path / workspace.artifact_path) are only known after @@ -157,7 +157,7 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand return } - if !b.Config.Bundle.Immutable { + if !b.Config.Bundle.Deployment.ImmutableFolder { bundle.ApplySeqContext(ctx, b, files.Upload(outputHandler)) if logdiag.HasError(ctx) { return diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index 71d5468c253..a2890d56f5a 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -132,7 +132,7 @@ func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { mutator.ResolveVariableReferencesOnlyResources("artifacts"), } - if b.Config.Bundle.Immutable { + if b.Config.Bundle.Deployment.ImmutableFolder { // For immutable bundles, resource paths contain ${workspace.snapshot_path} // which was set during deploy by snapshot.Upload(). Load it from the stored // metadata so it can be resolved before Terraform processes the config. diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 28ffeb6205f..50f5045cf70 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -2239,10 +2239,6 @@ "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git", "markdownDescription": "The Git version control details that are associated with your bundle. For supported attributes see [git](https://docs.databricks.com/dev-tools/bundles/settings.html#git)." }, - "immutable": { - "description": "Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled.", - "$ref": "#/$defs/bool" - }, "name": { "description": "The name of the bundle.", "$ref": "#/$defs/string" @@ -2275,6 +2271,10 @@ "description": "Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted.", "$ref": "#/$defs/bool" }, + "immutable_folder": { + "description": "Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled.", + "$ref": "#/$defs/bool" + }, "lock": { "description": "The deployment lock attributes.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Lock" From 4a9bcd9fd49c7eb39eaec34d078284eb558fa025 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 9 Jun 2026 12:52:49 +0200 Subject: [PATCH 08/31] remove merge conflict --- .../config/mutator/resourcemutator/process_static_resources.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/bundle/config/mutator/resourcemutator/process_static_resources.go b/bundle/config/mutator/resourcemutator/process_static_resources.go index 9040c14ee3d..8b8dcbbe3b8 100644 --- a/bundle/config/mutator/resourcemutator/process_static_resources.go +++ b/bundle/config/mutator/resourcemutator/process_static_resources.go @@ -58,8 +58,6 @@ func (p processStaticResources) Apply(ctx context.Context, b *bundle.Bundle) dia // Updates (dynamic): resources.* (strings) (resolves variable references to their actual values) // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes resourceResolver, - // After normal variable resolution, log all ${resources.*} references - mutator.LogResourceReferences(), mutator.NormalizePaths(), // Translate dashboard paths into paths in the workspace file system From ebd26ea28e4e8448cc740fdf2ac017ac1baa8a6a Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 9 Jun 2026 13:13:05 +0200 Subject: [PATCH 09/31] fix empty artifact path + tests --- .../databricks.yml.tmpl | 16 ++++++++++ .../immutable-no-artifacts/out.test.toml | 3 ++ .../deploy/immutable-no-artifacts/output.txt | 30 +++++++++++++++++++ .../deploy/immutable-no-artifacts/script | 14 +++++++++ .../deploy/immutable-no-artifacts/src/main.py | 1 + .../immutable-no-artifacts/src/notebook.py | 3 ++ .../deploy/immutable-no-artifacts/test.toml | 16 ++++++++++ bundle/deploy/snapshot/upload.go | 6 +++- 8 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl create mode 100644 acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml create mode 100644 acceptance/bundle/deploy/immutable-no-artifacts/output.txt create mode 100644 acceptance/bundle/deploy/immutable-no-artifacts/script create mode 100644 acceptance/bundle/deploy/immutable-no-artifacts/src/main.py create mode 100644 acceptance/bundle/deploy/immutable-no-artifacts/src/notebook.py create mode 100644 acceptance/bundle/deploy/immutable-no-artifacts/test.toml diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl b/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl new file mode 100644 index 00000000000..ac83678a5c0 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl @@ -0,0 +1,16 @@ +bundle: + name: test-bundle-immutable-no-artifacts-$UNIQUE_NAME + deployment: + immutable_folder: true + +resources: + jobs: + my_job: + name: my job + tasks: + - task_key: spark_python_task + spark_python_task: + python_file: ./src/main.py + - task_key: notebook_task + notebook_task: + notebook_path: ./src/notebook.py diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml new file mode 100644 index 00000000000..650836edeb3 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml @@ -0,0 +1,3 @@ +Local = false +Cloud = true +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/output.txt b/acceptance/bundle/deploy/immutable-no-artifacts/output.txt new file mode 100644 index 00000000000..52398f774a3 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/output.txt @@ -0,0 +1,30 @@ + +>>> [CLI] bundle validate +Name: test-bundle-immutable-no-artifacts-[UNIQUE_NAME] +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/default + +Validation OK! + +>>> [CLI] bundle deploy +Uploading immutable bundle snapshot... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/main.py" + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/notebook" + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.jobs.my_job + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/default + +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/script b/acceptance/bundle/deploy/immutable-no-artifacts/script new file mode 100644 index 00000000000..21541fb79ee --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/script @@ -0,0 +1,14 @@ +envsubst < databricks.yml.tmpl > databricks.yml +cleanup() { + trace $CLI bundle destroy --auto-approve +} +trap cleanup EXIT + +trace $CLI bundle validate +trace $CLI bundle deploy + + +# Get a job and check that task paths point into the snapshot +JOB_ID=$($CLI bundle summary -o json | jq -r '.resources.jobs.my_job.id') +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_python_task != null) | .spark_python_task.python_file' +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/src/main.py b/acceptance/bundle/deploy/immutable-no-artifacts/src/main.py new file mode 100644 index 00000000000..6c285f7e2f5 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/src/main.py @@ -0,0 +1 @@ +print("Hello from Spark Python Task!") diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/src/notebook.py b/acceptance/bundle/deploy/immutable-no-artifacts/src/notebook.py new file mode 100644 index 00000000000..fb3c9597fbf --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/src/notebook.py @@ -0,0 +1,3 @@ +# Databricks notebook source + +print("Hello from Notebook Task!") diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml new file mode 100644 index 00000000000..8cdb1604a3a --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml @@ -0,0 +1,16 @@ +Local = false +Cloud = true + +Ignore = [ + "databricks.yml", + ".databricks", + ".venv", + "script", + "*.pyc", +] + +# Normalize the content-addressed snapshot hash so it doesn't need to be +# hardcoded in output.txt and the test stays stable across file changes. +[[Repls]] +Old = '[0-9a-f]{64}' +New = '[SNAPSHOT_HASH]' diff --git a/bundle/deploy/snapshot/upload.go b/bundle/deploy/snapshot/upload.go index e3e9efa011e..4eea5757cdd 100644 --- a/bundle/deploy/snapshot/upload.go +++ b/bundle/deploy/snapshot/upload.go @@ -57,7 +57,11 @@ func (m *snapshotUpload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn // The API unpacks the zip under a "src" subdirectory. b.Config.Workspace.SnapshotPath = info.Path b.Config.Workspace.FilePath = path.Join(info.Path, "src", "files") - b.Config.Workspace.ArtifactPath = path.Join(info.Path, "src", "artifacts") + // Only set artifact_path when artifacts are present; with no artifacts the + // zip has no "src/artifacts" directory and a get-status on it would 404. + if len(b.Config.Artifacts) > 0 { + b.Config.Workspace.ArtifactPath = path.Join(info.Path, "src", "artifacts") + } return nil } From 5efe1da3130352b65e86c3b3f913fe5f1a5eeaa3 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 9 Jun 2026 15:08:39 +0200 Subject: [PATCH 10/31] fixed test config --- .../deploy/immutable-no-artifacts/databricks.yml.tmpl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl b/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl index ac83678a5c0..caacf79f907 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl +++ b/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl @@ -11,6 +11,12 @@ resources: - task_key: spark_python_task spark_python_task: python_file: ./src/main.py + environment_key: env - task_key: notebook_task notebook_task: notebook_path: ./src/notebook.py + + environments: + - environment_key: env + spec: + environment_version: "4" From 6215a49ecc85258237d86b5fe921f80b71b54969 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 18 Jun 2026 16:06:25 +0200 Subject: [PATCH 11/31] fixes --- .../immutable-no-artifacts/out.test.toml | 2 +- .../deploy/immutable-no-artifacts/test.toml | 2 +- acceptance/bundle/deploy/immutable/output.txt | 21 ++--- acceptance/bundle/deploy/immutable/script | 3 +- acceptance/bundle/deploy/immutable/test.toml | 6 ++ .../immutable_workspace_paths/output.txt | 4 +- .../mutator/resolve_variable_references.go | 15 ---- .../resolve_variable_references_test.go | 45 ----------- .../process_static_resources.go | 12 +-- .../resourcemutator/resource_mutator.go | 19 +---- bundle/config/mutator/translate_paths.go | 12 +-- bundle/config/workspace.go | 6 +- bundle/deploy/metadata/load.go | 57 -------------- .../deploy/snapshot/client.go | 12 +-- bundle/deploy/snapshot/path.go | 76 +++---------------- bundle/deploy/snapshot/path_test.go | 2 +- bundle/deploy/snapshot/state.go | 63 +++++++++++++++ bundle/deploy/snapshot/translate_paths.go | 50 ++++++++++++ bundle/deploy/snapshot/upload.go | 5 +- bundle/phases/build.go | 20 ++--- bundle/phases/deploy.go | 15 ++-- bundle/phases/destroy.go | 12 +-- cmd/bundle/utils/process.go | 1 - libs/sync/sync.go | 29 +++++++ libs/testserver/handlers.go | 43 +++++++++++ 25 files changed, 254 insertions(+), 278 deletions(-) delete mode 100644 bundle/deploy/metadata/load.go rename libs/filer/snapshot_client.go => bundle/deploy/snapshot/client.go (91%) create mode 100644 bundle/deploy/snapshot/state.go create mode 100644 bundle/deploy/snapshot/translate_paths.go diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml index 650836edeb3..bbc7fcfd1bd 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml +++ b/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml @@ -1,3 +1,3 @@ -Local = false +Local = true Cloud = true EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml index 8cdb1604a3a..21acfa23bd6 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/test.toml +++ b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml @@ -1,4 +1,4 @@ -Local = false +Local = true Cloud = true Ignore = [ diff --git a/acceptance/bundle/deploy/immutable/output.txt b/acceptance/bundle/deploy/immutable/output.txt index 5b9e74d23d0..5fc2ed07493 100644 --- a/acceptance/bundle/deploy/immutable/output.txt +++ b/acceptance/bundle/deploy/immutable/output.txt @@ -16,33 +16,22 @@ Updating deployment state... Deployment complete! >>> [CLI] jobs get [NUMID] -"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/11f80ca6d8923bf75b57e475d4ca9ba4bb1d6d48c58aace8d3f2a1289b51c6e0/src/files/src/main.py" +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/main.py" >>> [CLI] jobs get [NUMID] -"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/11f80ca6d8923bf75b57e475d4ca9ba4bb1d6d48c58aace8d3f2a1289b51c6e0/src/files/src/notebook" +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/notebook" >>> [CLI] jobs get [NUMID] [ - "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/11f80ca6d8923bf75b57e475d4ca9ba4bb1d6d48c58aace8d3f2a1289b51c6e0/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl" + "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl" ] >>> [CLI] bundle run my_job +script: line 182: sort_lines: command not found Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID] [TIMESTAMP] "my job" RUNNING [TIMESTAMP] "my job" TERMINATED SUCCESS -Output: -======= -Task python_wheel_task: -Hello from Python Wheel Task! - -======= -Task notebook_task: - -======= -Task spark_python_task: -Hello from Spark Python Task! - >>> [CLI] bundle destroy --auto-approve The following resources will be deleted: @@ -52,3 +41,5 @@ All files and directories at the following location will be deleted: /Workspace/ Deleting files... Destroy complete! + +Exit code: 127 diff --git a/acceptance/bundle/deploy/immutable/script b/acceptance/bundle/deploy/immutable/script index 57d5cfbf3ec..c783212308a 100644 --- a/acceptance/bundle/deploy/immutable/script +++ b/acceptance/bundle/deploy/immutable/script @@ -14,4 +14,5 @@ trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_pyt trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' trace $CLI jobs get $JOB_ID | jq '.settings.environments[0].spec.dependencies' -trace $CLI bundle run my_job +# Sort output to make it stable +trace $CLI bundle run my_job | sort_lines diff --git a/acceptance/bundle/deploy/immutable/test.toml b/acceptance/bundle/deploy/immutable/test.toml index be964aff9b5..a97b714bea8 100644 --- a/acceptance/bundle/deploy/immutable/test.toml +++ b/acceptance/bundle/deploy/immutable/test.toml @@ -2,9 +2,15 @@ Local = false Cloud = true Ignore = [ + "dist", "databricks.yml", ".databricks", ".venv", "script", "*.pyc", ] + +[[Repls]] +# Replace snapshot hash with SNAPSHOT_HASH +Old = "[0-9a-f]{64}" +New = "[SNAPSHOT_HASH]" diff --git a/acceptance/bundle/validate/immutable_workspace_paths/output.txt b/acceptance/bundle/validate/immutable_workspace_paths/output.txt index f6a8004bf2f..da1a2f05768 100644 --- a/acceptance/bundle/validate/immutable_workspace_paths/output.txt +++ b/acceptance/bundle/validate/immutable_workspace_paths/output.txt @@ -2,7 +2,7 @@ >>> [CLI] bundle validate -o json Warning: Pattern user_repls.json does not match any files at sync.exclude[1] - in databricks.yml:9:7 + in databricks.yml:10:7 { "workspace": { @@ -22,7 +22,7 @@ Warning: Pattern user_repls.json does not match any files { "existing_cluster_id": "0101-120000-aaaaaaaa", "spark_python_task": { - "python_file": "${workspace.snapshot_path}/src/files/src/main.py" + "python_file": "[TEST_TMP_DIR]/src/main.py" }, "task_key": "my_task" } diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 70a330c8a3c..4da02a31b04 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -79,21 +79,6 @@ func ResolveVariableReferencesOnlyResources(prefixes ...string) bundle.Mutator { } } -// ResolveVariableReferencesOnlyResourcesExcluding resolves variable references in -// resources while leaving references to the specified paths unresolved. -// Used by ProcessStaticResources for immutable bundles so that ${workspace.snapshot_path} -// is not resolved during Initialize; it is resolved in the Deploy phase after -// snapshot.Upload() sets workspace.snapshot_path to the API-assigned path. -func ResolveVariableReferencesOnlyResourcesExcluding(excludePaths ...string) bundle.Mutator { - return &resolveVariableReferences{ - prefixes: defaultPrefixes, - lookupFn: lookup, - extraRounds: maxResolutionRounds - 1, - pattern: dyn.NewPattern(dyn.Key("resources")), - includeResources: true, - excludePaths: excludePaths, - } -} func ResolveVariableReferencesWithoutResources(prefixes ...string) bundle.Mutator { if len(prefixes) == 0 { diff --git a/bundle/config/mutator/resolve_variable_references_test.go b/bundle/config/mutator/resolve_variable_references_test.go index f682419f32a..876980e9486 100644 --- a/bundle/config/mutator/resolve_variable_references_test.go +++ b/bundle/config/mutator/resolve_variable_references_test.go @@ -6,9 +6,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/pipelines" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -65,46 +63,3 @@ func TestResolveVariableReferencesWithSourceLinkedDeployment(t *testing.T) { testCase.assert(t, b) } } - -// TestResolveVariableReferencesExcludePaths verifies that paths listed in excludePaths -// are skipped during resolution and left as unresolved variable references. -// This is used by ProcessStaticResources for immutable bundles so that -// ${workspace.file_path} and ${workspace.artifact_path} can be resolved later -// (in the Build phase, after artifacts are built and the correct snapshot path is known). -func TestResolveVariableReferencesExcludePaths(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Workspace: config.Workspace{ - FilePath: "/snapshot/path/src/files", - ArtifactPath: "/snapshot/path/src/artifacts", - }, - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: jobs.JobSettings{ - Tasks: []jobs.Task{ - { - SparkPythonTask: &jobs.SparkPythonTask{ - PythonFile: "${workspace.file_path}/main.py", - }, - }, - }, - }, - }, - }, - }, - }, - } - - // With exclusion: ${workspace.file_path} should remain unresolved. - diags := bundle.Apply(t.Context(), b, ResolveVariableReferencesOnlyResourcesExcluding("workspace.file_path", "workspace.artifact_path")) - require.NoError(t, diags.Error()) - assert.Equal(t, "${workspace.file_path}/main.py", b.Config.Resources.Jobs["job1"].Tasks[0].SparkPythonTask.PythonFile, - "reference should remain unresolved when path is excluded") - - // Without exclusion: ${workspace.file_path} should resolve normally. - diags = bundle.Apply(t.Context(), b, ResolveVariableReferencesOnlyResources()) - require.NoError(t, diags.Error()) - assert.Equal(t, "/snapshot/path/src/files/main.py", b.Config.Resources.Jobs["job1"].Tasks[0].SparkPythonTask.PythonFile, - "reference should resolve after exclusion is lifted") -} diff --git a/bundle/config/mutator/resourcemutator/process_static_resources.go b/bundle/config/mutator/resourcemutator/process_static_resources.go index 8b8dcbbe3b8..5fcd54de33f 100644 --- a/bundle/config/mutator/resourcemutator/process_static_resources.go +++ b/bundle/config/mutator/resourcemutator/process_static_resources.go @@ -39,17 +39,7 @@ func (p processStaticResources) Apply(ctx context.Context, b *bundle.Bundle) dia // - variable can be used a prefix // - path can be part of a complex variable value - // For immutable bundles, defer resolving ${workspace.snapshot_path} in resources. - // The actual snapshot path is only known after snapshot.Upload() returns the - // API-assigned path in the deploy phase. - var resourceResolver bundle.Mutator - if b.Config.Bundle.Deployment.ImmutableFolder { - resourceResolver = mutator.ResolveVariableReferencesOnlyResourcesExcluding( - "workspace.snapshot_path", - ) - } else { - resourceResolver = mutator.ResolveVariableReferencesOnlyResources() - } + resourceResolver := mutator.ResolveVariableReferencesOnlyResources() bundle.ApplySeqContext( ctx, diff --git a/bundle/config/mutator/resourcemutator/resource_mutator.go b/bundle/config/mutator/resourcemutator/resource_mutator.go index ebfa964d8bf..209bbcb06a0 100644 --- a/bundle/config/mutator/resourcemutator/resource_mutator.go +++ b/bundle/config/mutator/resourcemutator/resource_mutator.go @@ -127,19 +127,6 @@ func applyInitializeMutators(ctx context.Context, b *bundle.Bundle) { ) } -// resourceVarResolver returns a mutator that resolves variable references in -// resources. For immutable bundles, ${workspace.file_path} and -// ${workspace.artifact_path} are excluded: the API assigns the snapshot path -// after upload, so they must remain as-is until snapshot.Upload() has run. -func resourceVarResolver(b *bundle.Bundle) bundle.Mutator { - if b.Config.Bundle.Deployment.ImmutableFolder { - return mutator.ResolveVariableReferencesOnlyResourcesExcluding( - "workspace.file_path", "workspace.artifact_path", - ) - } - return mutator.ResolveVariableReferencesOnlyResources() -} - // Normalization is applied multiple times if resource is modified during initialization // // If bundle is modified outside of 'resources' section, these changes are discarded. @@ -152,10 +139,8 @@ func applyNormalizeMutators(ctx context.Context, b *bundle.Bundle) { // Reads (dynamic): * (strings) (searches for variable references in string values) // Updates (dynamic): resources.* (strings) (resolves variable references to their actual values) - // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes. - // For immutable bundles, ${workspace.file_path} and ${workspace.artifact_path} are left - // unresolved: the actual snapshot path is assigned by the API after upload, not pre-computed. - resourceVarResolver(b), + // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes + mutator.ResolveVariableReferencesOnlyResources(), // Reads (dynamic): resources.pipelines.*.libraries (checks for notebook.path and file.path fields) // Updates (dynamic): resources.pipelines.*.libraries (expands glob patterns in path fields to multiple library entries) diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index 44292a8dd6f..7b58d16b161 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -319,18 +319,12 @@ func (t *translateContext) rewriteValue(ctx context.Context, p dyn.Path, v dyn.V return dyn.NewValue(out, v.Locations()), nil } -// snapshotFilesRoot is the remote root used for file/notebook path translation -// in immutable bundles. References to this placeholder are resolved after -// snapshot.Upload() sets workspace.snapshot_path to the API-assigned path. -const snapshotFilesRoot = "${workspace.snapshot_path}/src/files" - func applyTranslations(ctx context.Context, b *bundle.Bundle, t *translateContext, translations []func(context.Context, dyn.Value) (dyn.Value, error)) diag.Diagnostics { switch { case b.Config.Bundle.Deployment.ImmutableFolder: - // Use a placeholder root that is resolved after snapshot.Upload() sets - // workspace.snapshot_path. This defers path computation until the actual - // content-addressed path is known. - t.remoteRoot = snapshotFilesRoot + // Keep paths as local absolute paths during validate. snapshot.TranslateResourcePaths() + // replaces this local prefix with the actual snapshot path after upload. + t.remoteRoot = t.b.SyncRootPath case config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment): t.remoteRoot = t.b.SyncRootPath default: diff --git a/bundle/config/workspace.go b/bundle/config/workspace.go index 284bd0afe02..b07545ac6c2 100644 --- a/bundle/config/workspace.go +++ b/bundle/config/workspace.go @@ -82,9 +82,9 @@ type Workspace struct { StatePath string `json:"state_path,omitempty"` // SnapshotPath is the workspace path of the immutable snapshot uploaded during - // deployment. It is set by snapshot.Upload() and used to resolve - // ${workspace.snapshot_path} references in resource configurations. - // Only populated for bundles with bundle.immutable = true. + // deployment. It is set by snapshot.Upload() and persisted to local state so + // that snapshot.LoadState() can restore workspace.file_path for destroy. + // Only populated for bundles with deployment.immutable_folder = true. SnapshotPath string `json:"snapshot_path,omitempty" bundle:"internal"` } diff --git a/bundle/deploy/metadata/load.go b/bundle/deploy/metadata/load.go deleted file mode 100644 index 3fe4eb1c3bd..00000000000 --- a/bundle/deploy/metadata/load.go +++ /dev/null @@ -1,57 +0,0 @@ -package metadata - -import ( - "context" - "encoding/json" - "io" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/metadata" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/filer" -) - -type load struct{} - -// Load reads the metadata file written during the last deploy and populates -// fields on the bundle that are not available locally (e.g. workspace.snapshot_path -// for immutable bundles, which is only known after snapshot.Upload() ran). -func Load() bundle.Mutator { - return &load{} -} - -func (m *load) Name() string { - return "metadata.Load" -} - -func (m *load) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - f, err := filer.NewWorkspaceFilesClient(b.WorkspaceClient(ctx), b.Config.Workspace.StatePath) - if err != nil { - return diag.FromErr(err) - } - - r, err := f.Read(ctx, metadataFileName) - if err != nil { - // Missing metadata file means the bundle was never deployed or was - // deployed by an older CLI version that didn't write metadata. Treat - // it as a no-op so destroy can still proceed. - return nil - } - defer r.Close() - - raw, err := io.ReadAll(r) - if err != nil { - return diag.FromErr(err) - } - - var md metadata.Metadata - if err := json.Unmarshal(raw, &md); err != nil { - return diag.FromErr(err) - } - - if md.Config.Workspace.SnapshotPath != "" { - b.Config.Workspace.SnapshotPath = md.Config.Workspace.SnapshotPath - } - - return nil -} diff --git a/libs/filer/snapshot_client.go b/bundle/deploy/snapshot/client.go similarity index 91% rename from libs/filer/snapshot_client.go rename to bundle/deploy/snapshot/client.go index a7d84891cb0..3a720cb2150 100644 --- a/libs/filer/snapshot_client.go +++ b/bundle/deploy/snapshot/client.go @@ -1,4 +1,4 @@ -package filer +package snapshot import ( "bytes" @@ -10,7 +10,7 @@ import ( "net/textproto" "github.com/databricks/databricks-sdk-go" - "github.com/databricks/databricks-sdk-go/client" + databricksclient "github.com/databricks/databricks-sdk-go/client" ) // SnapshotInfo holds the result of a successful snapshot upload. @@ -30,7 +30,7 @@ type SnapshotUploader interface { // snapshotAPIClient implements SnapshotUploader against /api/2.0/repos/snapshots. type snapshotAPIClient struct { - apiClient apiClient + client *databricksclient.DatabricksClient } // snapshotUploadResponse mirrors the /api/2.0/repos/snapshots response body. @@ -42,11 +42,11 @@ type snapshotUploadResponse struct { // NewSnapshotUploader creates a SnapshotUploader backed by /api/2.0/repos/snapshots. func NewSnapshotUploader(w *databricks.WorkspaceClient) (SnapshotUploader, error) { - apiClient, err := client.New(w.Config) + c, err := databricksclient.New(w.Config) if err != nil { return nil, err } - return &snapshotAPIClient{apiClient: apiClient}, nil + return &snapshotAPIClient{client: c}, nil } // Upload uploads zipContent as an immutable snapshot identified by snapshotID. @@ -94,7 +94,7 @@ func (c *snapshotAPIClient) Upload(ctx context.Context, bundleID, snapshotID, cu } var resp snapshotUploadResponse - err = c.apiClient.Do(ctx, http.MethodPost, "/api/2.0/repos/snapshots", headers, nil, body.Bytes(), &resp) + err = c.client.Do(ctx, http.MethodPost, "/api/2.0/repos/snapshots", headers, nil, body.Bytes(), &resp) if err != nil { return nil, fmt.Errorf("snapshot upload: %w", err) } diff --git a/bundle/deploy/snapshot/path.go b/bundle/deploy/snapshot/path.go index 82c5d543680..680461d2658 100644 --- a/bundle/deploy/snapshot/path.go +++ b/bundle/deploy/snapshot/path.go @@ -15,8 +15,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/fileset" - "github.com/databricks/cli/libs/git" - "github.com/databricks/cli/libs/set" + libsync "github.com/databricks/cli/libs/sync" ) // zipEpoch is a fixed timestamp used for all zip entries to make the zip content-addressed @@ -66,65 +65,18 @@ func SnapshotID(ctx context.Context, b *bundle.Bundle) (string, error) { return IDFromContent(content), nil } -// syncFiles returns the list of files to include in the snapshot zip using the -// same git-aware include/exclude logic as files.Upload (libs/sync). -func syncFiles(ctx context.Context, b *bundle.Bundle) ([]fileset.File, error) { - // Use git.NewFileSet so that .gitignore rules are respected, matching the - // behaviour of the normal files.Upload sync path. - // Avoid passing an empty/nil paths slice: git.NewFileSet forwards it to - // fileset.New whose variadic default ("." if no args) is bypassed when the - // caller explicitly passes a nil slice. The SyncDefaultPath mutator always - // sets Sync.Paths to ["."] in the normal pipeline; we replicate that here - // so BundleZip works even when the bundle hasn't gone through the full pipeline. - var gitFS *git.FileSet - var err error - if len(b.Config.Sync.Paths) > 0 { - gitFS, err = git.NewFileSet(ctx, b.WorktreeRoot, b.SyncRoot, b.Config.Sync.Paths) - } else { - gitFS, err = git.NewFileSet(ctx, b.WorktreeRoot, b.SyncRoot) - } - if err != nil { - return nil, fmt.Errorf("build file set: %w", err) - } - - all := set.NewSetF(func(f fileset.File) string { - return f.Relative +func addSyncRootToZip(ctx context.Context, zw *zip.Writer, b *bundle.Bundle) error { + files, err := libsync.GetFileList(ctx, libsync.SyncOptions{ + WorktreeRoot: b.WorktreeRoot, + LocalRoot: b.SyncRoot, + Paths: b.Config.Sync.Paths, + Include: b.Config.Sync.Include, + Exclude: b.Config.Sync.Exclude, }) - - gitFiles, err := gitFS.Files() if err != nil { - return nil, fmt.Errorf("list sync files: %w", err) - } - all.Add(gitFiles...) - - if len(b.Config.Sync.Include) > 0 { - includeFS, err := fileset.NewGlobSet(b.SyncRoot, b.Config.Sync.Include) - if err != nil { - return nil, fmt.Errorf("build include set: %w", err) - } - include, err := includeFS.Files() - if err != nil { - return nil, fmt.Errorf("list include files: %w", err) - } - all.Add(include...) - } - - if len(b.Config.Sync.Exclude) > 0 { - excludeFS, err := fileset.NewGlobSet(b.SyncRoot, b.Config.Sync.Exclude) - if err != nil { - return nil, fmt.Errorf("build exclude set: %w", err) - } - exclude, err := excludeFS.Files() - if err != nil { - return nil, fmt.Errorf("list exclude files: %w", err) - } - for _, f := range exclude { - all.Remove(f) - } + return err } - - files := all.Iter() - // Sort for a stable zip (same content → same hash regardless of map iteration order). + // Sort for a stable zip (same content → same hash regardless of iteration order). slices.SortFunc(files, func(a, b fileset.File) int { if a.Relative < b.Relative { return -1 @@ -134,14 +86,6 @@ func syncFiles(ctx context.Context, b *bundle.Bundle) ([]fileset.File, error) { } return 0 }) - return files, nil -} - -func addSyncRootToZip(ctx context.Context, zw *zip.Writer, b *bundle.Bundle) error { - files, err := syncFiles(ctx, b) - if err != nil { - return err - } for _, f := range files { rc, err := b.SyncRoot.Open(f.Relative) diff --git a/bundle/deploy/snapshot/path_test.go b/bundle/deploy/snapshot/path_test.go index 9728fadb507..9b87182eab1 100644 --- a/bundle/deploy/snapshot/path_test.go +++ b/bundle/deploy/snapshot/path_test.go @@ -125,6 +125,6 @@ func TestBundleZipDoNotStripNotebookExtensions(t *testing.T) { require.NoError(t, err) names := zipEntryNames(t, zipContent) - assert.True(t, slices.Contains(names, "files/src/my_notebook.ipynb"), "notebook should have extension stripped") + assert.True(t, slices.Contains(names, "files/src/my_notebook.ipynb"), "notebook should keep its extension") assert.True(t, slices.Contains(names, "files/src/script.py"), "regular Python file should keep its extension") } diff --git a/bundle/deploy/snapshot/state.go b/bundle/deploy/snapshot/state.go new file mode 100644 index 00000000000..e1f47f55665 --- /dev/null +++ b/bundle/deploy/snapshot/state.go @@ -0,0 +1,63 @@ +package snapshot + +import ( + "context" + "os" + "path" + "path/filepath" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" +) + +const snapshotPathStateFile = "snapshot_path" + +type saveState struct{} +type loadState struct{} + +// SaveState writes the snapshot path to the local deployment state directory +// so it can be recovered during destroy without reading metadata.json. +func SaveState() bundle.Mutator { + return &saveState{} +} + +// LoadState reads the snapshot path from the local deployment state directory +// and sets workspace.snapshot_path. Missing state is treated as a no-op so +// destroy can proceed against bundles deployed before this feature was added. +func LoadState() bundle.Mutator { + return &loadState{} +} + +func (s *saveState) Name() string { return "snapshot.SaveState" } +func (s *loadState) Name() string { return "snapshot.LoadState" } + +func (s *saveState) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + if b.Config.Workspace.SnapshotPath == "" { + return nil + } + dir, err := b.LocalStateDir(ctx) + if err != nil { + return diag.FromErr(err) + } + p := filepath.Join(dir, snapshotPathStateFile) + return diag.FromErr(os.WriteFile(p, []byte(b.Config.Workspace.SnapshotPath), 0o600)) +} + +func (s *loadState) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + dir := b.GetLocalStateDir(ctx) + data, err := os.ReadFile(filepath.Join(dir, snapshotPathStateFile)) + if os.IsNotExist(err) { + return nil + } + if err != nil { + return diag.FromErr(err) + } + snapshotPath := strings.TrimSpace(string(data)) + b.Config.Workspace.SnapshotPath = snapshotPath + // Restore FilePath and ArtifactPath so that TranslateResourcePaths() can + // rewrite local absolute paths to snapshot paths during destroy. + b.Config.Workspace.FilePath = path.Join(snapshotPath, "src", "files") + b.Config.Workspace.ArtifactPath = path.Join(snapshotPath, "src", "artifacts") + return nil +} diff --git a/bundle/deploy/snapshot/translate_paths.go b/bundle/deploy/snapshot/translate_paths.go new file mode 100644 index 00000000000..1576197e7f4 --- /dev/null +++ b/bundle/deploy/snapshot/translate_paths.go @@ -0,0 +1,50 @@ +package snapshot + +import ( + "context" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type translateResourcePaths struct{} + +// TranslateResourcePaths replaces local absolute paths in resource configs with the +// remote snapshot path. It must run after snapshot.Upload() has set +// b.Config.Workspace.FilePath to the content-addressed snapshot location. +// +// translate_paths.go uses b.SyncRootPath as the remote root for immutable bundles, +// so resource paths are stored as local absolute paths until this mutator rewrites them. +func TranslateResourcePaths() bundle.Mutator { + return &translateResourcePaths{} +} + +func (m *translateResourcePaths) Name() string { return "snapshot.TranslateResourcePaths" } + +func (m *translateResourcePaths) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { + localPrefix := b.SyncRootPath + "/" + remotePrefix := b.Config.Workspace.FilePath + "/" + + err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { + return dyn.Walk(root, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + if len(p) == 0 { + return v, nil + } + // Only rewrite paths inside the resources section. + if p[0] != dyn.Key("resources") { + return v, dyn.ErrSkip + } + str, ok := v.AsString() + if !ok { + return v, nil + } + if !strings.HasPrefix(str, localPrefix) { + return v, nil + } + return dyn.NewValue(remotePrefix+strings.TrimPrefix(str, localPrefix), v.Locations()), nil + }) + }) + return diag.FromErr(err) +} diff --git a/bundle/deploy/snapshot/upload.go b/bundle/deploy/snapshot/upload.go index 4eea5757cdd..86e8160ebfd 100644 --- a/bundle/deploy/snapshot/upload.go +++ b/bundle/deploy/snapshot/upload.go @@ -8,13 +8,12 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/filer" "github.com/databricks/cli/libs/log" ) type snapshotUpload struct { // uploader allows test injection of a custom SnapshotUploader. - uploader filer.SnapshotUploader + uploader SnapshotUploader } // Upload returns a mutator that builds the bundle zip, uploads it via @@ -32,7 +31,7 @@ func (m *snapshotUpload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn uploader := m.uploader if uploader == nil { var err error - uploader, err = filer.NewSnapshotUploader(b.WorkspaceClient(ctx)) + uploader, err = NewSnapshotUploader(b.WorkspaceClient(ctx)) if err != nil { return diag.FromErr(err) } diff --git a/bundle/phases/build.go b/bundle/phases/build.go index 8411e376ebb..c60db5235bc 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -14,9 +14,11 @@ import ( "github.com/databricks/cli/libs/logdiag" ) +// LibLocationMap maps artifact names to library locations that need uploading. +// Computed by Build and consumed by Deploy to upload the right files. type LibLocationMap map[string][]libraries.LocationToUpdate -// The build phase builds artifacts. +// Build runs the build phase, which builds artifacts. func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { log.Info(ctx, "Phase: build") @@ -42,6 +44,10 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { libraries.SwitchToPatchedWheels(), ) + if logdiag.HasError(ctx) { + return nil + } + // For immutable bundles, library remote paths are set in the deploy phase // after snapshot.Upload() provides the content-addressed workspace.artifact_path. if b.Config.Bundle.Deployment.ImmutableFolder { @@ -49,15 +55,9 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { } libs, diags := libraries.ReplaceWithRemotePath(ctx, b) - for _, diag := range diags { - logdiag.LogDiag(ctx, diag) + for _, d := range diags { + logdiag.LogDiag(ctx, d) } - - bundle.ApplyContext(ctx, b, - // TransformWheelTask must be run after ReplaceWithRemotePath so we can use correct remote path in the - // transformed notebook - trampoline.TransformWheelTask(), - ) - + bundle.ApplyContext(ctx, b, trampoline.TransformWheelTask()) return libs } diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 5c9b7979ae5..e8c22d56ffa 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -8,8 +8,7 @@ import ( "github.com/databricks/cli/bundle/artifacts" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/engine" - "github.com/databricks/cli/bundle/config/mutator" - "github.com/databricks/cli/bundle/deploy" +"github.com/databricks/cli/bundle/deploy" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" "github.com/databricks/cli/bundle/deploy/metadata" @@ -22,7 +21,7 @@ import ( "github.com/databricks/cli/bundle/permissions" "github.com/databricks/cli/bundle/scripts" "github.com/databricks/cli/bundle/statemgmt" - "github.com/databricks/cli/libs/agent" +"github.com/databricks/cli/libs/agent" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/logdiag" @@ -145,13 +144,13 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand if b.Config.Bundle.Deployment.ImmutableFolder { // Upload all source files and built artifacts as a single immutable snapshot. - // The API assigns a content-addressed path, so workspace.snapshot_path (and - // derived workspace.file_path / workspace.artifact_path) are only known after - // upload. Resolve variable references in resources and set library remote paths - // once the actual paths are available. + // The API assigns a content-addressed workspace.file_path; snapshot.TranslateResourcePaths() + // then replaces the local absolute paths (written by translate_paths during validate) + // with the actual snapshot remote paths. bundle.ApplySeqContext(ctx, b, snapshot.Upload(), - mutator.ResolveVariableReferencesOnlyResources(), + snapshot.TranslateResourcePaths(), + snapshot.SaveState(), ) if !logdiag.HasError(ctx) { _, libDiags := libraries.ReplaceWithRemotePath(ctx, b) diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index 6ab84ad7787..3dccf770275 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -10,7 +10,7 @@ import ( "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" - deploymetadata "github.com/databricks/cli/bundle/deploy/metadata" + "github.com/databricks/cli/bundle/deploy/snapshot" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/deployplan" "github.com/databricks/cli/bundle/direct" @@ -140,11 +140,11 @@ func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { } if b.Config.Bundle.Deployment.ImmutableFolder { - // For immutable bundles, resource paths contain ${workspace.snapshot_path} - // which was set during deploy by snapshot.Upload(). Load it from the stored - // metadata so it can be resolved before Terraform processes the config. - mutators = append([]bundle.Mutator{deploymetadata.Load()}, mutators...) - mutators = append(mutators, mutator.ResolveVariableReferencesOnlyResources()) + // For immutable bundles, resource paths are local absolute paths after + // translate_paths. Restore workspace.file_path from the local state file + // and replace the local prefix with the snapshot remote path before + // Terraform processes the config. + mutators = append([]bundle.Mutator{snapshot.LoadState(), snapshot.TranslateResourcePaths()}, mutators...) } mutators = append(mutators, diff --git a/cmd/bundle/utils/process.go b/cmd/bundle/utils/process.go index d61c4525530..683477c465f 100644 --- a/cmd/bundle/utils/process.go +++ b/cmd/bundle/utils/process.go @@ -297,7 +297,6 @@ func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (b *bundle.Bundle } var libs phases.LibLocationMap - if opts.Build { t2 := time.Now() libs = phases.Build(ctx, b) diff --git a/libs/sync/sync.go b/libs/sync/sync.go index c65b49eb775..c7a1428c764 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -245,6 +245,35 @@ func (s *Sync) GetFileList(ctx context.Context) ([]fileset.File, error) { return all.Iter(), nil } +// GetFileList returns the list of files that would be synced given opts, +// applying the same git-aware include/exclude logic as RunOnce. +// Unlike New, it does not verify the remote path or load a sync snapshot. +func GetFileList(ctx context.Context, opts SyncOptions) ([]fileset.File, error) { + paths := opts.Paths + if len(paths) == 0 { + paths = []string{"."} + } + fileSet, err := git.NewFileSet(ctx, opts.WorktreeRoot, opts.LocalRoot, paths) + if err != nil { + return nil, fmt.Errorf("build file set: %w", err) + } + includeFileSet, err := fileset.NewGlobSet(opts.LocalRoot, opts.Include) + if err != nil { + return nil, err + } + excludeFileSet, err := fileset.NewGlobSet(opts.LocalRoot, opts.Exclude) + if err != nil { + return nil, err + } + s := &Sync{ + SyncOptions: &opts, + fileSet: fileSet, + includeFileSet: includeFileSet, + excludeFileSet: excludeFileSet, + } + return s.GetFileList(ctx) +} + func (s *Sync) RunContinuous(ctx context.Context) error { ticker := time.NewTicker(s.PollInterval) defer ticker.Stop() diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index b1ec9b2e3d8..c98d3b826ec 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -1,9 +1,13 @@ package testserver import ( + "bytes" "encoding/base64" "encoding/json" "fmt" + "io" + "mime" + "mime/multipart" "net/http" "path" "strings" @@ -537,6 +541,45 @@ func AddDefaultHandlers(server *Server) { return req.Workspace.ReposDelete(req) }) + server.Handle("POST", "/api/2.0/repos/snapshots", func(req Request) any { + contentType := req.Headers.Get("Content-Type") + mediaType, params, err := mime.ParseMediaType(contentType) + if err != nil || !strings.HasPrefix(mediaType, "multipart/") { + return Response{StatusCode: http.StatusBadRequest} + } + + mr := multipart.NewReader(bytes.NewReader(req.Body), params["boundary"]) + var bundleID, snapshotID string + for { + p, err := mr.NextPart() + if err == io.EOF { + break + } + if err != nil { + return Response{StatusCode: http.StatusInternalServerError} + } + data, err := io.ReadAll(p) + if err != nil { + return Response{StatusCode: http.StatusInternalServerError} + } + switch p.FormName() { + case "bundle_id": + bundleID = string(data) + case "snapshot_id": + snapshotID = string(data) + } + } + + // The real API uses the workspace user UUID (not email) in the snapshot path, + // matching service-principal identities used in cloud acceptance tests. + snapshotPath := fmt.Sprintf("/Workspace/Users/%s/.snapshots/%s/%s", TestUserSP.UserName, bundleID, snapshotID) + return map[string]any{ + "snapshot": map[string]any{ + "path": snapshotPath, + }, + } + }) + // SQL Warehouses: server.Handle("GET", "/api/2.0/sql/warehouses/{warehouse_id}", func(req Request) any { From be6dec3a6fa107d503bd30b77649e750e8a7aab8 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 18 Jun 2026 18:49:26 +0200 Subject: [PATCH 12/31] fix fmt --- bundle/config/bundle.go | 1 - bundle/config/mutator/resolve_variable_references.go | 1 - bundle/deploy/snapshot/state.go | 6 ++++-- bundle/phases/deploy.go | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bundle/config/bundle.go b/bundle/config/bundle.go index 473e355225f..ce6d25bfe62 100644 --- a/bundle/config/bundle.go +++ b/bundle/config/bundle.go @@ -59,5 +59,4 @@ type Bundle struct { // A stable generated UUID for the bundle. This is normally serialized by // Databricks first party template when a user runs bundle init. Uuid string `json:"uuid,omitempty"` - } diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 4da02a31b04..fab5dc218f1 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -79,7 +79,6 @@ func ResolveVariableReferencesOnlyResources(prefixes ...string) bundle.Mutator { } } - func ResolveVariableReferencesWithoutResources(prefixes ...string) bundle.Mutator { if len(prefixes) == 0 { prefixes = defaultPrefixes diff --git a/bundle/deploy/snapshot/state.go b/bundle/deploy/snapshot/state.go index e1f47f55665..681da1b94fc 100644 --- a/bundle/deploy/snapshot/state.go +++ b/bundle/deploy/snapshot/state.go @@ -13,8 +13,10 @@ import ( const snapshotPathStateFile = "snapshot_path" -type saveState struct{} -type loadState struct{} +type ( + saveState struct{} + loadState struct{} +) // SaveState writes the snapshot path to the local deployment state directory // so it can be recovered during destroy without reading metadata.json. diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 6202870f700..851cac81d9a 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -8,7 +8,7 @@ import ( "github.com/databricks/cli/bundle/artifacts" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/engine" -"github.com/databricks/cli/bundle/deploy" + "github.com/databricks/cli/bundle/deploy" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" "github.com/databricks/cli/bundle/deploy/metadata" @@ -21,7 +21,7 @@ import ( "github.com/databricks/cli/bundle/permissions" "github.com/databricks/cli/bundle/scripts" "github.com/databricks/cli/bundle/statemgmt" -"github.com/databricks/cli/libs/agent" + "github.com/databricks/cli/libs/agent" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/logdiag" From 0f1ed5201a4400130ecb0ea46d7f2669a6d8809a Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 18 Jun 2026 18:50:04 +0200 Subject: [PATCH 13/31] fix annotations --- bundle/internal/schema/annotations.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index 234e099dd41..08a9be3086b 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -90,8 +90,8 @@ bundle: "description": |- Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. "immutable_folder": - "description": |- - Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled. + "description": |- + Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled. "lock": "description": |- The deployment lock attributes. From d9785598008fc65cfb3a619fc715c326ba18eafe Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 18 Jun 2026 18:56:19 +0200 Subject: [PATCH 14/31] fix lint --- bundle/deploy/snapshot/state.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/bundle/deploy/snapshot/state.go b/bundle/deploy/snapshot/state.go index 681da1b94fc..b53fa106638 100644 --- a/bundle/deploy/snapshot/state.go +++ b/bundle/deploy/snapshot/state.go @@ -2,6 +2,8 @@ package snapshot import ( "context" + "errors" + "io/fs" "os" "path" "path/filepath" @@ -38,10 +40,12 @@ func (s *saveState) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic if b.Config.Workspace.SnapshotPath == "" { return nil } + dir, err := b.LocalStateDir(ctx) if err != nil { return diag.FromErr(err) } + p := filepath.Join(dir, snapshotPathStateFile) return diag.FromErr(os.WriteFile(p, []byte(b.Config.Workspace.SnapshotPath), 0o600)) } @@ -49,14 +53,18 @@ func (s *saveState) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic func (s *loadState) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { dir := b.GetLocalStateDir(ctx) data, err := os.ReadFile(filepath.Join(dir, snapshotPathStateFile)) - if os.IsNotExist(err) { + + if errors.Is(err, fs.ErrNotExist) { return nil } + if err != nil { return diag.FromErr(err) } + snapshotPath := strings.TrimSpace(string(data)) b.Config.Workspace.SnapshotPath = snapshotPath + // Restore FilePath and ArtifactPath so that TranslateResourcePaths() can // rewrite local absolute paths to snapshot paths during destroy. b.Config.Workspace.FilePath = path.Join(snapshotPath, "src", "files") From 9a6c898cc7f63c1c9575c40b89643ac138e5a208 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 18 Jun 2026 18:59:36 +0200 Subject: [PATCH 15/31] do not call set permissions on immutable ws root --- bundle/permissions/workspace_root.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bundle/permissions/workspace_root.go b/bundle/permissions/workspace_root.go index 9bb9065fe80..f33e223ed1b 100644 --- a/bundle/permissions/workspace_root.go +++ b/bundle/permissions/workspace_root.go @@ -28,6 +28,11 @@ func (*workspaceRootPermissions) Name() string { // Apply implements bundle.Mutator. func (*workspaceRootPermissions) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + // If the bundle is immutable, we don't need to apply any permissions to the workspace root. + if b.Config.Bundle.Deployment.ImmutableFolder { + return nil + } + stateFolderPermissions, err := giveAccessForWorkspaceRoot(ctx, b) if err != nil { return diag.FromErr(err) From 079998b1c488d4b8fe3d145de28764ae2ec261c6 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 22 Jun 2026 10:39:59 +0200 Subject: [PATCH 16/31] addressed feedback --- .../deploy/immutable-no-artifacts/output.txt | 4 + .../deploy/immutable-no-artifacts/script | 10 ++- .../deploy/immutable-no-artifacts/test.toml | 1 + .../bundle/deploy/immutable/out.test.toml | 2 +- acceptance/bundle/deploy/immutable/output.txt | 5 +- acceptance/bundle/deploy/immutable/script | 3 +- acceptance/bundle/deploy/immutable/test.toml | 2 +- .../mutator/override_immutable_folder.go | 31 +++++++ .../mutator/override_immutable_folder_test.go | 37 +++++++++ .../process_static_resources.go | 4 +- bundle/config/mutator/translate_paths.go | 10 +++ bundle/deploy/snapshot/client.go | 9 +++ bundle/deploy/snapshot/delete.go | 44 ++++++++++ bundle/deploy/snapshot/state.go | 80 +++++++++++-------- bundle/deploy/state.go | 4 + bundle/deploy/state_update.go | 3 + bundle/internal/schema/annotations.yml | 2 +- bundle/permissions/workspace_root.go | 19 +++-- bundle/phases/deploy.go | 1 - bundle/phases/destroy.go | 26 ++++-- bundle/phases/initialize.go | 6 ++ libs/testserver/handlers.go | 4 + 22 files changed, 247 insertions(+), 60 deletions(-) create mode 100644 bundle/config/mutator/override_immutable_folder.go create mode 100644 bundle/config/mutator/override_immutable_folder_test.go create mode 100644 bundle/deploy/snapshot/delete.go diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/output.txt b/acceptance/bundle/deploy/immutable-no-artifacts/output.txt index 52398f774a3..1409648d48e 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/output.txt +++ b/acceptance/bundle/deploy/immutable-no-artifacts/output.txt @@ -28,3 +28,7 @@ All files and directories at the following location will be deleted: /Workspace/ Deleting files... Destroy complete! +{ + "method": "DELETE", + "path": "/api/2.0/repos/snapshots/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]" +} diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/script b/acceptance/bundle/deploy/immutable-no-artifacts/script index 21541fb79ee..b4c95578d80 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/script +++ b/acceptance/bundle/deploy/immutable-no-artifacts/script @@ -1,8 +1,4 @@ envsubst < databricks.yml.tmpl > databricks.yml -cleanup() { - trace $CLI bundle destroy --auto-approve -} -trap cleanup EXIT trace $CLI bundle validate trace $CLI bundle deploy @@ -12,3 +8,9 @@ trace $CLI bundle deploy JOB_ID=$($CLI bundle summary -o json | jq -r '.resources.jobs.my_job.id') trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_python_task != null) | .spark_python_task.python_file' trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' + +trace $CLI bundle destroy --auto-approve + +# Assert that bundle destroy called the snapshot API to delete the bundle's snapshots. +# Use a trailing slash to match only the bundle-specific DELETE path, not the POST upload path. +print_requests.py //api/2.0/repos/snapshots/ diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml index 21acfa23bd6..b5c9fe29f68 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/test.toml +++ b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml @@ -1,5 +1,6 @@ Local = true Cloud = true +RecordRequests = true Ignore = [ "databricks.yml", diff --git a/acceptance/bundle/deploy/immutable/out.test.toml b/acceptance/bundle/deploy/immutable/out.test.toml index 650836edeb3..bbc7fcfd1bd 100644 --- a/acceptance/bundle/deploy/immutable/out.test.toml +++ b/acceptance/bundle/deploy/immutable/out.test.toml @@ -1,3 +1,3 @@ -Local = false +Local = true Cloud = true EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/deploy/immutable/output.txt b/acceptance/bundle/deploy/immutable/output.txt index 5fc2ed07493..f828d6ac4ed 100644 --- a/acceptance/bundle/deploy/immutable/output.txt +++ b/acceptance/bundle/deploy/immutable/output.txt @@ -27,8 +27,7 @@ Deployment complete! ] >>> [CLI] bundle run my_job -script: line 182: sort_lines: command not found -Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID] +Run URL: [DATABRICKS_URL]/jobs/[NUMID]/runs/[NUMID]?o=[NUMID] [TIMESTAMP] "my job" RUNNING [TIMESTAMP] "my job" TERMINATED SUCCESS @@ -41,5 +40,3 @@ All files and directories at the following location will be deleted: /Workspace/ Deleting files... Destroy complete! - -Exit code: 127 diff --git a/acceptance/bundle/deploy/immutable/script b/acceptance/bundle/deploy/immutable/script index c783212308a..57d5cfbf3ec 100644 --- a/acceptance/bundle/deploy/immutable/script +++ b/acceptance/bundle/deploy/immutable/script @@ -14,5 +14,4 @@ trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_pyt trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' trace $CLI jobs get $JOB_ID | jq '.settings.environments[0].spec.dependencies' -# Sort output to make it stable -trace $CLI bundle run my_job | sort_lines +trace $CLI bundle run my_job diff --git a/acceptance/bundle/deploy/immutable/test.toml b/acceptance/bundle/deploy/immutable/test.toml index a97b714bea8..12815871ead 100644 --- a/acceptance/bundle/deploy/immutable/test.toml +++ b/acceptance/bundle/deploy/immutable/test.toml @@ -1,4 +1,4 @@ -Local = false +Local = true Cloud = true Ignore = [ diff --git a/bundle/config/mutator/override_immutable_folder.go b/bundle/config/mutator/override_immutable_folder.go new file mode 100644 index 00000000000..c2f618b19b2 --- /dev/null +++ b/bundle/config/mutator/override_immutable_folder.go @@ -0,0 +1,31 @@ +package mutator + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/env" +) + +type overrideImmutableFolder struct{} + +// OverrideImmutableFolder sets bundle.deployment.immutable_folder to true +// if the DATABRICKS_IMMUTABLE_FOLDER environment variable is non-empty. +// This allows running the acceptance test suite against the immutable folder +// code path without modifying any databricks.yml files. +func OverrideImmutableFolder() bundle.Mutator { + return &overrideImmutableFolder{} +} + +func (m *overrideImmutableFolder) Name() string { + return "OverrideImmutableFolder" +} + +func (m *overrideImmutableFolder) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + if env.Get(ctx, "DATABRICKS_IMMUTABLE_FOLDER") == "" { + return nil + } + b.Config.Bundle.Deployment.ImmutableFolder = true + return nil +} diff --git a/bundle/config/mutator/override_immutable_folder_test.go b/bundle/config/mutator/override_immutable_folder_test.go new file mode 100644 index 00000000000..f1f598da82b --- /dev/null +++ b/bundle/config/mutator/override_immutable_folder_test.go @@ -0,0 +1,37 @@ +package mutator_test + +import ( + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestOverrideImmutableFolderNotSet(t *testing.T) { + t.Setenv("DATABRICKS_IMMUTABLE_FOLDER", "") + b := &bundle.Bundle{Config: config.Root{}} + diags := bundle.Apply(t.Context(), b, mutator.OverrideImmutableFolder()) + require.NoError(t, diags.Error()) + assert.False(t, b.Config.Bundle.Deployment.ImmutableFolder) +} + +func TestOverrideImmutableFolderSet(t *testing.T) { + t.Setenv("DATABRICKS_IMMUTABLE_FOLDER", "true") + b := &bundle.Bundle{Config: config.Root{}} + diags := bundle.Apply(t.Context(), b, mutator.OverrideImmutableFolder()) + require.NoError(t, diags.Error()) + assert.True(t, b.Config.Bundle.Deployment.ImmutableFolder) +} + +func TestOverrideImmutableFolderAlreadyTrue(t *testing.T) { + t.Setenv("DATABRICKS_IMMUTABLE_FOLDER", "") + b := &bundle.Bundle{Config: config.Root{}} + b.Config.Bundle.Deployment.ImmutableFolder = true + diags := bundle.Apply(t.Context(), b, mutator.OverrideImmutableFolder()) + require.NoError(t, diags.Error()) + // Existing true value must not be cleared when the env var is absent. + assert.True(t, b.Config.Bundle.Deployment.ImmutableFolder) +} diff --git a/bundle/config/mutator/resourcemutator/process_static_resources.go b/bundle/config/mutator/resourcemutator/process_static_resources.go index 5fcd54de33f..b1f689808f6 100644 --- a/bundle/config/mutator/resourcemutator/process_static_resources.go +++ b/bundle/config/mutator/resourcemutator/process_static_resources.go @@ -39,15 +39,13 @@ func (p processStaticResources) Apply(ctx context.Context, b *bundle.Bundle) dia // - variable can be used a prefix // - path can be part of a complex variable value - resourceResolver := mutator.ResolveVariableReferencesOnlyResources() - bundle.ApplySeqContext( ctx, b, // Reads (dynamic): * (strings) (searches for variable references in string values) // Updates (dynamic): resources.* (strings) (resolves variable references to their actual values) // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes - resourceResolver, + mutator.ResolveVariableReferencesOnlyResources(), mutator.NormalizePaths(), // Translate dashboard paths into paths in the workspace file system diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index ab81029d6b4..f7f10f65f46 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -322,6 +322,16 @@ func (t *translateContext) rewriteValue(ctx context.Context, p dyn.Path, v dyn.V func applyTranslations(ctx context.Context, b *bundle.Bundle, t *translateContext, translations []func(context.Context, dyn.Value) (dyn.Value, error)) diag.Diagnostics { switch { case b.Config.Bundle.Deployment.ImmutableFolder: + // Reject an explicit workspace.file_path: immutable bundles control that path + // automatically (it is set to the content-addressed snapshot location after upload). + // A user-supplied value would be silently discarded, so we error early instead. + if loc := b.Config.GetLocation("workspace.file_path"); loc.File != "" { + return diag.Diagnostics{{ + Severity: diag.Error, + Summary: "workspace.file_path cannot be configured when bundle.deployment.immutable_folder is true", + Locations: []dyn.Location{loc}, + }} + } // Keep paths as local absolute paths during validate. snapshot.TranslateResourcePaths() // replaces this local prefix with the actual snapshot path after upload. t.remoteRoot = t.b.SyncRootPath diff --git a/bundle/deploy/snapshot/client.go b/bundle/deploy/snapshot/client.go index 3a720cb2150..3d081690dfc 100644 --- a/bundle/deploy/snapshot/client.go +++ b/bundle/deploy/snapshot/client.go @@ -26,6 +26,10 @@ type SnapshotInfo struct { // This interface exists so the implementation can later be replaced with a Go SDK call. type SnapshotUploader interface { Upload(ctx context.Context, bundleID, snapshotID, currentUser string, zipContent []byte) (*SnapshotInfo, error) + // Delete removes all snapshots for a bundle. The server is responsible for + // cleaning up the content-addressed storage; the caller does not need to + // know individual snapshot paths. + Delete(ctx context.Context, bundleID string) error } // snapshotAPIClient implements SnapshotUploader against /api/2.0/repos/snapshots. @@ -101,3 +105,8 @@ func (c *snapshotAPIClient) Upload(ctx context.Context, bundleID, snapshotID, cu return &SnapshotInfo{Path: resp.Snapshot.Path}, nil } + +// Delete deletes all snapshots for the given bundleID via DELETE /api/2.0/repos/snapshots/{bundleID}. +func (c *snapshotAPIClient) Delete(ctx context.Context, bundleID string) error { + return c.client.Do(ctx, http.MethodDelete, "/api/2.0/repos/snapshots/"+bundleID, nil, nil, nil, nil) +} diff --git a/bundle/deploy/snapshot/delete.go b/bundle/deploy/snapshot/delete.go new file mode 100644 index 00000000000..41c179839f8 --- /dev/null +++ b/bundle/deploy/snapshot/delete.go @@ -0,0 +1,44 @@ +package snapshot + +import ( + "context" + "errors" + "net/http" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/databricks-sdk-go/apierr" +) + +type deleteSnapshots struct{} + +// DeleteBundleSnapshots removes all snapshots for the current bundle via the +// snapshot API. It does not use workspace.Delete because that requires workspace +// admin rights which non-admin users may not have. +func DeleteBundleSnapshots() bundle.Mutator { + return &deleteSnapshots{} +} + +func (m *deleteSnapshots) Name() string { return "snapshot.DeleteBundleSnapshots" } + +func (m *deleteSnapshots) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + if b.Config.Workspace.SnapshotPath == "" { + // No snapshot path means no snapshot was ever uploaded (or state was not loaded). + return nil + } + + uploader, err := NewSnapshotUploader(b.WorkspaceClient(ctx)) + if err != nil { + return diag.FromErr(err) + } + + err = uploader.Delete(ctx, b.Config.Bundle.Name) + if err != nil { + var apiErr *apierr.APIError + if errors.As(err, &apiErr) && apiErr.StatusCode == http.StatusNotFound { + return nil + } + return diag.FromErr(err) + } + return nil +} diff --git a/bundle/deploy/snapshot/state.go b/bundle/deploy/snapshot/state.go index b53fa106638..a650c1d5b69 100644 --- a/bundle/deploy/snapshot/state.go +++ b/bundle/deploy/snapshot/state.go @@ -2,72 +2,88 @@ package snapshot import ( "context" + "encoding/json" "errors" "io/fs" "os" "path" "path/filepath" - "strings" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/deploy" "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/filer" ) -const snapshotPathStateFile = "snapshot_path" +type loadState struct{} -type ( - saveState struct{} - loadState struct{} -) - -// SaveState writes the snapshot path to the local deployment state directory -// so it can be recovered during destroy without reading metadata.json. -func SaveState() bundle.Mutator { - return &saveState{} -} - -// LoadState reads the snapshot path from the local deployment state directory -// and sets workspace.snapshot_path. Missing state is treated as a no-op so -// destroy can proceed against bundles deployed before this feature was added. +// LoadState reads workspace.snapshot_path from the local deployment.json and +// sets the snapshot-derived workspace paths. Missing or empty state is treated +// as a no-op so destroy can proceed against bundles deployed before this +// feature was added. func LoadState() bundle.Mutator { return &loadState{} } -func (s *saveState) Name() string { return "snapshot.SaveState" } func (s *loadState) Name() string { return "snapshot.LoadState" } -func (s *saveState) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - if b.Config.Workspace.SnapshotPath == "" { - return nil +func (s *loadState) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + localPath := filepath.Join(b.GetLocalStateDir(ctx), deploy.DeploymentStateFileName) + data, err := os.ReadFile(localPath) + if err != nil && !errors.Is(err, fs.ErrNotExist) { + return diag.FromErr(err) } - dir, err := b.LocalStateDir(ctx) - if err != nil { - return diag.FromErr(err) + if err == nil { + var state struct { + SnapshotPath string `json:"snapshot_path"` + } + if jsonErr := json.Unmarshal(data, &state); jsonErr != nil { + return diag.FromErr(jsonErr) + } + if state.SnapshotPath != "" { + applySnapshotPath(b, state.SnapshotPath) + return nil + } } - p := filepath.Join(dir, snapshotPathStateFile) - return diag.FromErr(os.WriteFile(p, []byte(b.Config.Workspace.SnapshotPath), 0o600)) + // Local deployment.json is missing or was from a non-immutable deploy — fall + // back to the remote copy so destroy works on a fresh clone or a different machine. + return s.loadFromRemote(ctx, b) } -func (s *loadState) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - dir := b.GetLocalStateDir(ctx) - data, err := os.ReadFile(filepath.Join(dir, snapshotPathStateFile)) +func (s *loadState) loadFromRemote(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + f, err := filer.NewWorkspaceFilesClient(b.WorkspaceClient(ctx), b.Config.Workspace.StatePath) + if err != nil { + return diag.FromErr(err) + } + r, err := f.Read(ctx, deploy.DeploymentStateFileName) if errors.Is(err, fs.ErrNotExist) { return nil } - if err != nil { return diag.FromErr(err) } + defer r.Close() - snapshotPath := strings.TrimSpace(string(data)) - b.Config.Workspace.SnapshotPath = snapshotPath + var state struct { + SnapshotPath string `json:"snapshot_path"` + } + if err := json.NewDecoder(r).Decode(&state); err != nil { + return diag.FromErr(err) + } + if state.SnapshotPath != "" { + applySnapshotPath(b, state.SnapshotPath) + } + return nil +} + +func applySnapshotPath(b *bundle.Bundle, snapshotPath string) { + b.Config.Workspace.SnapshotPath = snapshotPath // Restore FilePath and ArtifactPath so that TranslateResourcePaths() can // rewrite local absolute paths to snapshot paths during destroy. b.Config.Workspace.FilePath = path.Join(snapshotPath, "src", "files") b.Config.Workspace.ArtifactPath = path.Join(snapshotPath, "src", "artifacts") - return nil } diff --git a/bundle/deploy/state.go b/bundle/deploy/state.go index 5d99748fb71..bf60d0959fe 100644 --- a/bundle/deploy/state.go +++ b/bundle/deploy/state.go @@ -53,6 +53,10 @@ type DeploymentState struct { // UUID uniquely identifying the deployment. ID uuid.UUID `json:"id"` + + // SnapshotPath is the remote content-addressed path for immutable folder deployments. + // Empty for non-immutable deployments. + SnapshotPath string `json:"snapshot_path,omitempty"` } // We use this entry type as a proxy to fs.DirEntry. diff --git a/bundle/deploy/state_update.go b/bundle/deploy/state_update.go index 55cf2393bf1..e9b2811f8b9 100644 --- a/bundle/deploy/state_update.go +++ b/bundle/deploy/state_update.go @@ -46,6 +46,9 @@ func (s *stateUpdate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnost } state.Files = fl + // Persist the snapshot path so destroy on a different machine can find it. + state.SnapshotPath = b.Config.Workspace.SnapshotPath + // Generate a UUID for the deployment, if one does not already exist if state.ID == uuid.Nil { state.ID = uuid.New() diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index 08a9be3086b..cb95e713a47 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -91,7 +91,7 @@ bundle: Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. "immutable_folder": "description": |- - Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled. + Whether to deploy bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a content-addressed archive and workspace.file_path and workspace.artifact_path are set to the resulting location. The validate and plan commands make no mutative API calls when this is enabled. "lock": "description": |- The deployment lock attributes. diff --git a/bundle/permissions/workspace_root.go b/bundle/permissions/workspace_root.go index f33e223ed1b..5308262e5d6 100644 --- a/bundle/permissions/workspace_root.go +++ b/bundle/permissions/workspace_root.go @@ -7,6 +7,7 @@ import ( "strconv" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/bundle/metrics" @@ -28,12 +29,18 @@ func (*workspaceRootPermissions) Name() string { // Apply implements bundle.Mutator. func (*workspaceRootPermissions) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - // If the bundle is immutable, we don't need to apply any permissions to the workspace root. + workspace := b.Config.Workspace if b.Config.Bundle.Deployment.ImmutableFolder { - return nil + // For immutable bundles, file_path and artifact_path point into content-addressed + // snapshot storage that is not a normal workspace folder. Clear them so that + // giveAccessForWorkspaceRoot only applies permissions to root_path (and the + // state_path / resource_path nested under it), which still need ACLs for + // shared deployments to work correctly. + workspace.FilePath = "" + workspace.ArtifactPath = "" } - stateFolderPermissions, err := giveAccessForWorkspaceRoot(ctx, b) + stateFolderPermissions, err := giveAccessForWorkspaceRoot(ctx, b, workspace) if err != nil { return diag.FromErr(err) } @@ -46,7 +53,7 @@ func (*workspaceRootPermissions) Apply(ctx context.Context, b *bundle.Bundle) di // workspace folders and returns the resulting permissions of the folder that holds // the deployment state. It returns nil only when no permissions are declared, in // which case no folders are synced. -func giveAccessForWorkspaceRoot(ctx context.Context, b *bundle.Bundle) (*WorkspacePathPermissions, error) { +func giveAccessForWorkspaceRoot(ctx context.Context, b *bundle.Bundle, wsConfig config.Workspace) (*WorkspacePathPermissions, error) { var permissions []workspace.WorkspaceObjectAccessControlRequest for _, p := range b.Config.Permissions { level, err := GetWorkspaceObjectPermissionLevel(string(p.Level)) @@ -67,7 +74,7 @@ func giveAccessForWorkspaceRoot(ctx context.Context, b *bundle.Bundle) (*Workspa } w := b.WorkspaceClient(ctx).Workspace - wsPaths := paths.CollectUniqueWorkspacePathPrefixes(b.Config.Workspace) + wsPaths := paths.CollectUniqueWorkspacePathPrefixes(wsConfig) // Each goroutine writes the folder's resulting permissions into its own slot, // so they are inspected after Wait rather than concurrently. @@ -88,7 +95,7 @@ func giveAccessForWorkspaceRoot(ctx context.Context, b *bundle.Bundle) (*Workspa // Return the permissions of the folder governing the deployment state. When // state_path is nested under root_path it is deduplicated out of the collected // paths, so Governing resolves it to root_path, whose ACL it inherits. - stateFolder := wsPaths.Governing(b.Config.Workspace.StatePath) + stateFolder := wsPaths.Governing(wsConfig.StatePath) i := slices.Index(wsPaths.Paths, stateFolder) if i < 0 { return nil, nil diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 851cac81d9a..91ef78f5ae8 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -156,7 +156,6 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand bundle.ApplySeqContext(ctx, b, snapshot.Upload(), snapshot.TranslateResourcePaths(), - snapshot.SaveState(), ) if !logdiag.HasError(ctx) { _, libDiags := libraries.ReplaceWithRemotePath(ctx, b) diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index 992c9fef445..aecaab7ba90 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -103,6 +103,14 @@ func destroyCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, e bundle.ApplyContext(ctx, b, files.Delete()) + if logdiag.HasError(ctx) { + return + } + + if b.Config.Bundle.Deployment.ImmutableFolder { + bundle.ApplyContext(ctx, b, snapshot.DeleteBundleSnapshots()) + } + if !logdiag.HasError(ctx) { cmdio.LogString(ctx, "Destroy complete!") } @@ -132,6 +140,16 @@ func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { bundle.ApplyContext(ctx, b, lock.Release(lock.GoalDestroy)) }() + if b.Config.Bundle.Deployment.ImmutableFolder { + // Restore the snapshot path so that TranslateResourcePaths (for terraform, below) + // and DeleteBundleSnapshots (in destroyCore) know where the snapshot lives. + // Must run for both engines. + bundle.ApplyContext(ctx, b, snapshot.LoadState()) + if logdiag.HasError(ctx) { + return + } + } + if !engine.IsDirect() { mutators := []bundle.Mutator{ // We need to resolve artifact variable (how we do it in build phase) @@ -142,11 +160,9 @@ func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { } if b.Config.Bundle.Deployment.ImmutableFolder { - // For immutable bundles, resource paths are local absolute paths after - // translate_paths. Restore workspace.file_path from the local state file - // and replace the local prefix with the snapshot remote path before - // Terraform processes the config. - mutators = append([]bundle.Mutator{snapshot.LoadState(), snapshot.TranslateResourcePaths()}, mutators...) + // Resource paths are local absolute paths after translate_paths. Replace the + // local prefix with the snapshot remote path before Terraform processes the config. + mutators = append([]bundle.Mutator{snapshot.TranslateResourcePaths()}, mutators...) } mutators = append(mutators, diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 80127843e83..4aa7f740027 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -70,6 +70,12 @@ func Initialize(ctx context.Context, b *bundle.Bundle) { // because it affects how workspace variables are resolved. mutator.ApplySourceLinkedDeploymentPreset(), + // Reads (env): DATABRICKS_IMMUTABLE_FOLDER (non-empty value enables immutable folder mode) + // Updates (typed): b.Config.Bundle.Deployment.ImmutableFolder (forces to true when env var is set) + // Allows running the full test suite against the immutable folder code path without + // modifying any databricks.yml files. + mutator.OverrideImmutableFolder(), + // Reads (typed): b.Config.Workspace.RootPath (checks if it's already set) // Reads (typed): b.Config.Bundle.Name, b.Config.Bundle.Target (used to construct default path) // Updates (typed): b.Config.Workspace.RootPath (sets to ~/.bundle/{name}/{target} if not set) diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index 6af36e46219..3871daea77f 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -594,6 +594,10 @@ func AddDefaultHandlers(server *Server) { } }) + server.Handle("DELETE", "/api/2.0/repos/snapshots/{bundle_id}", func(req Request) any { + return Response{StatusCode: http.StatusNoContent} + }) + // SQL Warehouses: server.Handle("GET", "/api/2.0/sql/warehouses/{warehouse_id}", func(req Request) any { From c6b1ff546ddf46b304a521f8e97f915b5216317d Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 23 Jun 2026 12:10:18 +0200 Subject: [PATCH 17/31] no destroy + acl + fix for empty path --- acceptance/bin/print_requests.py | 22 +++++--- .../deploy/immutable-no-artifacts/output.txt | 4 -- .../deploy/immutable-no-artifacts/script | 9 ++-- acceptance/bundle/deploy/immutable/output.txt | 6 --- acceptance/bundle/deploy/immutable/script | 3 +- bundle/deploy/snapshot/client.go | 41 +++++++------- bundle/deploy/snapshot/delete.go | 44 --------------- bundle/deploy/snapshot/upload.go | 21 +++++++- bundle/deploy/snapshot/upload_test.go | 53 +++++++++++++++++++ bundle/paths/paths.go | 4 ++ bundle/phases/destroy.go | 7 +-- libs/testserver/handlers.go | 4 -- 12 files changed, 121 insertions(+), 97 deletions(-) delete mode 100644 bundle/deploy/snapshot/delete.go create mode 100644 bundle/deploy/snapshot/upload_test.go diff --git a/acceptance/bin/print_requests.py b/acceptance/bin/print_requests.py index 64479b79a5a..cdee3f9381f 100755 --- a/acceptance/bin/print_requests.py +++ b/acceptance/bin/print_requests.py @@ -7,10 +7,11 @@ If argument starts with ! then it's a negation filter. Examples: - print_requests.py //jobs # Show non-GET requests with /jobs in path - print_requests.py --get //jobs # Show all requests with /jobs in path - print_requests.py --sort '^//import-file/' # Show non-GET requests, exclude /import-file/, sort output - print_requests.py --keep //jobs # Show requests and do not delete out.requests.json afterwards + print_requests.py //jobs # Show non-GET requests with /jobs in path + print_requests.py --get //jobs # Show all requests with /jobs in path + print_requests.py --sort '^//import-file/' # Show non-GET requests, exclude /import-file/, sort output + print_requests.py --keep //jobs # Show requests and do not delete out.requests.json afterwards + print_requests.py //api/2.0/repos/snapshots --method DELETE # Show only DELETE to that path This replaces custom jq wrappers like: jq --sort-keys 'select(.method != "GET" and (.path | contains("/jobs")))' < out.requests.txt @@ -123,7 +124,7 @@ def read_json_many(s): assert result == [{"method": "GET"}, {"method": "POST"}], result -def filter_requests(requests, path_filters, include_get, should_sort, unique=False): +def filter_requests(requests, path_filters, include_get, should_sort, unique=False, method_filter=None): """Filter requests based on method and path filters.""" positive_filters = [] negative_filters = [] @@ -138,8 +139,12 @@ def filter_requests(requests, path_filters, include_get, should_sort, unique=Fal filtered_requests = [] for req in requests: - # Skip GET requests unless include_get is True - if req.get("method") == "GET" and not include_get: + if method_filter: + # --method overrides the default GET exclusion + if req.get("method") != method_filter: + continue + elif req.get("method") == "GET" and not include_get: + # Skip GET requests unless include_get is True continue # Apply path filters @@ -186,6 +191,7 @@ def main(): action="store_true", help="Collapse consecutive duplicate requests (like uniq), e.g. repeated GET polls", ) + parser.add_argument("--method", metavar="METHOD", help="Only show requests with this HTTP method (e.g. DELETE)") parser.add_argument("--oneline", action="store_true", help="Print output with one request per line") parser.add_argument( "--del-body", @@ -217,7 +223,7 @@ def main(): return requests = read_json_many(data) - filtered_requests = filter_requests(requests, args.path_filters, args.get, args.sort, args.unique) + filtered_requests = filter_requests(requests, args.path_filters, args.get, args.sort, args.unique, args.method) for req in filtered_requests: body = req.get("body") diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/output.txt b/acceptance/bundle/deploy/immutable-no-artifacts/output.txt index 1409648d48e..52398f774a3 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/output.txt +++ b/acceptance/bundle/deploy/immutable-no-artifacts/output.txt @@ -28,7 +28,3 @@ All files and directories at the following location will be deleted: /Workspace/ Deleting files... Destroy complete! -{ - "method": "DELETE", - "path": "/api/2.0/repos/snapshots/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]" -} diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/script b/acceptance/bundle/deploy/immutable-no-artifacts/script index b4c95578d80..903d85c9c92 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/script +++ b/acceptance/bundle/deploy/immutable-no-artifacts/script @@ -1,5 +1,10 @@ envsubst < databricks.yml.tmpl > databricks.yml +cleanup() { + rm -f out.requests.txt +} +trap cleanup EXIT + trace $CLI bundle validate trace $CLI bundle deploy @@ -10,7 +15,3 @@ trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_pyt trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' trace $CLI bundle destroy --auto-approve - -# Assert that bundle destroy called the snapshot API to delete the bundle's snapshots. -# Use a trailing slash to match only the bundle-specific DELETE path, not the POST upload path. -print_requests.py //api/2.0/repos/snapshots/ diff --git a/acceptance/bundle/deploy/immutable/output.txt b/acceptance/bundle/deploy/immutable/output.txt index f828d6ac4ed..4c92e420dab 100644 --- a/acceptance/bundle/deploy/immutable/output.txt +++ b/acceptance/bundle/deploy/immutable/output.txt @@ -26,12 +26,6 @@ Deployment complete! "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl" ] ->>> [CLI] bundle run my_job -Run URL: [DATABRICKS_URL]/jobs/[NUMID]/runs/[NUMID]?o=[NUMID] - -[TIMESTAMP] "my job" RUNNING -[TIMESTAMP] "my job" TERMINATED SUCCESS - >>> [CLI] bundle destroy --auto-approve The following resources will be deleted: delete resources.jobs.my_job diff --git a/acceptance/bundle/deploy/immutable/script b/acceptance/bundle/deploy/immutable/script index 57d5cfbf3ec..d1d60d53265 100644 --- a/acceptance/bundle/deploy/immutable/script +++ b/acceptance/bundle/deploy/immutable/script @@ -14,4 +14,5 @@ trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_pyt trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' trace $CLI jobs get $JOB_ID | jq '.settings.environments[0].spec.dependencies' -trace $CLI bundle run my_job +# Redirect run output to a log file — the real workspace produces different output than the local test server. +$CLI bundle run my_job &> LOG.run diff --git a/bundle/deploy/snapshot/client.go b/bundle/deploy/snapshot/client.go index 3d081690dfc..36295e66d6d 100644 --- a/bundle/deploy/snapshot/client.go +++ b/bundle/deploy/snapshot/client.go @@ -9,6 +9,7 @@ import ( "net/http" "net/textproto" + "github.com/databricks/cli/libs/auth" "github.com/databricks/databricks-sdk-go" databricksclient "github.com/databricks/databricks-sdk-go/client" ) @@ -19,17 +20,22 @@ type SnapshotInfo struct { Path string } +// ACLEntry is one element of the access_control_list sent to the snapshot API. +// All entries are granted CAN_READ; the snapshot API does not support other levels. +type ACLEntry struct { + UserName string `json:"user_name,omitempty"` + GroupName string `json:"group_name,omitempty"` + ServicePrincipalName string `json:"service_principal_name,omitempty"` + PermissionLevel string `json:"permission_level"` +} + // SnapshotUploader abstracts the /api/2.0/repos/snapshots endpoint. // snapshotID is the content-addressed key supplied by the caller; the API uses // it as the final path component so that identical content always resolves to // the same workspace location. // This interface exists so the implementation can later be replaced with a Go SDK call. type SnapshotUploader interface { - Upload(ctx context.Context, bundleID, snapshotID, currentUser string, zipContent []byte) (*SnapshotInfo, error) - // Delete removes all snapshots for a bundle. The server is responsible for - // cleaning up the content-addressed storage; the caller does not need to - // know individual snapshot paths. - Delete(ctx context.Context, bundleID string) error + Upload(ctx context.Context, bundleID, snapshotID string, acl []ACLEntry, zipContent []byte) (*SnapshotInfo, error) } // snapshotAPIClient implements SnapshotUploader against /api/2.0/repos/snapshots. @@ -54,9 +60,9 @@ func NewSnapshotUploader(w *databricks.WorkspaceClient) (SnapshotUploader, error } // Upload uploads zipContent as an immutable snapshot identified by snapshotID. -// snapshotID is the SHA-256 of the files-only zip and is used by the server as -// the content-addressed path component. currentUser is granted CAN_READ on the snapshot. -func (c *snapshotAPIClient) Upload(ctx context.Context, bundleID, snapshotID, currentUser string, zipContent []byte) (*SnapshotInfo, error) { +// snapshotID is the SHA-256 of the zip and is used by the server as the +// content-addressed path component. acl grants CAN_READ to each listed principal. +func (c *snapshotAPIClient) Upload(ctx context.Context, bundleID, snapshotID string, acl []ACLEntry, zipContent []byte) (*SnapshotInfo, error) { var body bytes.Buffer mw := multipart.NewWriter(&body) @@ -67,14 +73,11 @@ func (c *snapshotAPIClient) Upload(ctx context.Context, bundleID, snapshotID, cu return nil, fmt.Errorf("failed to write bundle_id: %w", err) } - // The API requires an access_control_list granting the current user read access. - acl, err := json.Marshal([]map[string]string{ - {"user_name": currentUser, "permission_level": "CAN_READ"}, - }) + aclJSON, err := json.Marshal(acl) if err != nil { return nil, fmt.Errorf("failed to marshal access_control_list: %w", err) } - if err := mw.WriteField("access_control_list", string(acl)); err != nil { + if err := mw.WriteField("access_control_list", string(aclJSON)); err != nil { return nil, fmt.Errorf("failed to write access_control_list: %w", err) } @@ -93,9 +96,13 @@ func (c *snapshotAPIClient) Upload(ctx context.Context, bundleID, snapshotID, cu return nil, fmt.Errorf("failed to finalize multipart body: %w", err) } - headers := map[string]string{ - "Content-Type": mw.FormDataContentType(), + // Workspace routing header is required so the server can locate the correct + // ASP (application service principal) that owns the snapshot directory. + headers := auth.WorkspaceIDHeaders(c.client.Config) + if headers == nil { + headers = make(map[string]string) } + headers["Content-Type"] = mw.FormDataContentType() var resp snapshotUploadResponse err = c.client.Do(ctx, http.MethodPost, "/api/2.0/repos/snapshots", headers, nil, body.Bytes(), &resp) @@ -106,7 +113,3 @@ func (c *snapshotAPIClient) Upload(ctx context.Context, bundleID, snapshotID, cu return &SnapshotInfo{Path: resp.Snapshot.Path}, nil } -// Delete deletes all snapshots for the given bundleID via DELETE /api/2.0/repos/snapshots/{bundleID}. -func (c *snapshotAPIClient) Delete(ctx context.Context, bundleID string) error { - return c.client.Do(ctx, http.MethodDelete, "/api/2.0/repos/snapshots/"+bundleID, nil, nil, nil, nil) -} diff --git a/bundle/deploy/snapshot/delete.go b/bundle/deploy/snapshot/delete.go deleted file mode 100644 index 41c179839f8..00000000000 --- a/bundle/deploy/snapshot/delete.go +++ /dev/null @@ -1,44 +0,0 @@ -package snapshot - -import ( - "context" - "errors" - "net/http" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/databricks-sdk-go/apierr" -) - -type deleteSnapshots struct{} - -// DeleteBundleSnapshots removes all snapshots for the current bundle via the -// snapshot API. It does not use workspace.Delete because that requires workspace -// admin rights which non-admin users may not have. -func DeleteBundleSnapshots() bundle.Mutator { - return &deleteSnapshots{} -} - -func (m *deleteSnapshots) Name() string { return "snapshot.DeleteBundleSnapshots" } - -func (m *deleteSnapshots) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - if b.Config.Workspace.SnapshotPath == "" { - // No snapshot path means no snapshot was ever uploaded (or state was not loaded). - return nil - } - - uploader, err := NewSnapshotUploader(b.WorkspaceClient(ctx)) - if err != nil { - return diag.FromErr(err) - } - - err = uploader.Delete(ctx, b.Config.Bundle.Name) - if err != nil { - var apiErr *apierr.APIError - if errors.As(err, &apiErr) && apiErr.StatusCode == http.StatusNotFound { - return nil - } - return diag.FromErr(err) - } - return nil -} diff --git a/bundle/deploy/snapshot/upload.go b/bundle/deploy/snapshot/upload.go index 86e8160ebfd..40e62e26668 100644 --- a/bundle/deploy/snapshot/upload.go +++ b/bundle/deploy/snapshot/upload.go @@ -46,7 +46,8 @@ func (m *snapshotUpload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn snapshotID := IDFromContent(zipContent) log.Debugf(ctx, "snapshot.Upload: snapshotID=%s zip=%d bytes", snapshotID, len(zipContent)) - info, err := uploader.Upload(ctx, b.Config.Bundle.Name, snapshotID, b.Config.Workspace.CurrentUser.UserName, zipContent) + acl := BuildACL(b) + info, err := uploader.Upload(ctx, b.Config.Bundle.Name, snapshotID, acl, zipContent) if err != nil { return diag.FromErr(err) } @@ -64,3 +65,21 @@ func (m *snapshotUpload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn return nil } + +// BuildACL constructs the access_control_list for the snapshot upload. +// It grants CAN_READ to the current user and to every principal listed in the +// top-level permissions section of the bundle config. +func BuildACL(b *bundle.Bundle) []ACLEntry { + acl := []ACLEntry{ + {UserName: b.Config.Workspace.CurrentUser.UserName, PermissionLevel: "CAN_READ"}, + } + for _, p := range b.Config.Permissions { + acl = append(acl, ACLEntry{ + UserName: p.UserName, + GroupName: p.GroupName, + ServicePrincipalName: p.ServicePrincipalName, + PermissionLevel: "CAN_READ", + }) + } + return acl +} diff --git a/bundle/deploy/snapshot/upload_test.go b/bundle/deploy/snapshot/upload_test.go new file mode 100644 index 00000000000..3b53f31ae3c --- /dev/null +++ b/bundle/deploy/snapshot/upload_test.go @@ -0,0 +1,53 @@ +package snapshot_test + +import ( + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/deploy/snapshot" + "github.com/databricks/databricks-sdk-go/service/iam" + "github.com/stretchr/testify/assert" +) + +func bundleWithPermissions(currentUser string, perms []resources.Permission) *bundle.Bundle { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + CurrentUser: &config.User{}, + }, + Permissions: perms, + }, + } + b.Config.Workspace.CurrentUser.User = &iam.User{UserName: currentUser} + return b +} + +func TestBuildACLCurrentUserOnly(t *testing.T) { + b := bundleWithPermissions("alice@example.com", nil) + + acl := snapshot.BuildACL(b) + + assert.Equal(t, []snapshot.ACLEntry{ + {UserName: "alice@example.com", PermissionLevel: "CAN_READ"}, + }, acl) +} + +func TestBuildACLWithTopLevelPermissions(t *testing.T) { + perms := []resources.Permission{ + {Level: "CAN_VIEW", UserName: "bob@example.com"}, + {Level: "CAN_MANAGE", GroupName: "devs"}, + {Level: "CAN_RUN", ServicePrincipalName: "sp-123"}, + } + b := bundleWithPermissions("alice@example.com", perms) + + acl := snapshot.BuildACL(b) + + assert.Equal(t, []snapshot.ACLEntry{ + {UserName: "alice@example.com", PermissionLevel: "CAN_READ"}, + {UserName: "bob@example.com", PermissionLevel: "CAN_READ"}, + {GroupName: "devs", PermissionLevel: "CAN_READ"}, + {ServicePrincipalName: "sp-123", PermissionLevel: "CAN_READ"}, + }, acl) +} diff --git a/bundle/paths/paths.go b/bundle/paths/paths.go index e413cc59f7b..54ffc0b5622 100644 --- a/bundle/paths/paths.go +++ b/bundle/paths/paths.go @@ -32,6 +32,10 @@ func CollectUniqueWorkspacePathPrefixes(workspace config.Workspace) WorkspacePat workspace.StatePath, workspace.ResourcePath, } { + if p == "" { + continue + } + if libraries.IsVolumesPath(p) { continue } diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index aecaab7ba90..a08000123e0 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -107,10 +107,6 @@ func destroyCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, e return } - if b.Config.Bundle.Deployment.ImmutableFolder { - bundle.ApplyContext(ctx, b, snapshot.DeleteBundleSnapshots()) - } - if !logdiag.HasError(ctx) { cmdio.LogString(ctx, "Destroy complete!") } @@ -142,8 +138,7 @@ func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { if b.Config.Bundle.Deployment.ImmutableFolder { // Restore the snapshot path so that TranslateResourcePaths (for terraform, below) - // and DeleteBundleSnapshots (in destroyCore) know where the snapshot lives. - // Must run for both engines. + // knows where the snapshot lives. Must run for both engines. bundle.ApplyContext(ctx, b, snapshot.LoadState()) if logdiag.HasError(ctx) { return diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index 3871daea77f..6af36e46219 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -594,10 +594,6 @@ func AddDefaultHandlers(server *Server) { } }) - server.Handle("DELETE", "/api/2.0/repos/snapshots/{bundle_id}", func(req Request) any { - return Response{StatusCode: http.StatusNoContent} - }) - // SQL Warehouses: server.Handle("GET", "/api/2.0/sql/warehouses/{warehouse_id}", func(req Request) any { From 3a0ec41b817d7e6b0badca2c4774cfa176b23093 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 23 Jun 2026 12:30:40 +0200 Subject: [PATCH 18/31] pr feedback --- .../mutator/override_immutable_folder_test.go | 16 ++++++++++------ bundle/deploy/snapshot/client.go | 1 - bundle/deploy/snapshot/path.go | 17 ++++++++--------- bundle/deploy/snapshot/path_test.go | 4 +++- bundle/schema/jsonschema.json | 2 +- 5 files changed, 22 insertions(+), 18 deletions(-) diff --git a/bundle/config/mutator/override_immutable_folder_test.go b/bundle/config/mutator/override_immutable_folder_test.go index f1f598da82b..5a31ad779a4 100644 --- a/bundle/config/mutator/override_immutable_folder_test.go +++ b/bundle/config/mutator/override_immutable_folder_test.go @@ -6,31 +6,35 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/libs/env" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestOverrideImmutableFolderNotSet(t *testing.T) { - t.Setenv("DATABRICKS_IMMUTABLE_FOLDER", "") + t.Parallel() + ctx := env.Set(t.Context(), "DATABRICKS_IMMUTABLE_FOLDER", "") b := &bundle.Bundle{Config: config.Root{}} - diags := bundle.Apply(t.Context(), b, mutator.OverrideImmutableFolder()) + diags := bundle.Apply(ctx, b, mutator.OverrideImmutableFolder()) require.NoError(t, diags.Error()) assert.False(t, b.Config.Bundle.Deployment.ImmutableFolder) } func TestOverrideImmutableFolderSet(t *testing.T) { - t.Setenv("DATABRICKS_IMMUTABLE_FOLDER", "true") + t.Parallel() + ctx := env.Set(t.Context(), "DATABRICKS_IMMUTABLE_FOLDER", "true") b := &bundle.Bundle{Config: config.Root{}} - diags := bundle.Apply(t.Context(), b, mutator.OverrideImmutableFolder()) + diags := bundle.Apply(ctx, b, mutator.OverrideImmutableFolder()) require.NoError(t, diags.Error()) assert.True(t, b.Config.Bundle.Deployment.ImmutableFolder) } func TestOverrideImmutableFolderAlreadyTrue(t *testing.T) { - t.Setenv("DATABRICKS_IMMUTABLE_FOLDER", "") + t.Parallel() + ctx := env.Set(t.Context(), "DATABRICKS_IMMUTABLE_FOLDER", "") b := &bundle.Bundle{Config: config.Root{}} b.Config.Bundle.Deployment.ImmutableFolder = true - diags := bundle.Apply(t.Context(), b, mutator.OverrideImmutableFolder()) + diags := bundle.Apply(ctx, b, mutator.OverrideImmutableFolder()) require.NoError(t, diags.Error()) // Existing true value must not be cleared when the env var is absent. assert.True(t, b.Config.Bundle.Deployment.ImmutableFolder) diff --git a/bundle/deploy/snapshot/client.go b/bundle/deploy/snapshot/client.go index 36295e66d6d..4e5df29e408 100644 --- a/bundle/deploy/snapshot/client.go +++ b/bundle/deploy/snapshot/client.go @@ -112,4 +112,3 @@ func (c *snapshotAPIClient) Upload(ctx context.Context, bundleID, snapshotID str return &SnapshotInfo{Path: resp.Snapshot.Path}, nil } - diff --git a/bundle/deploy/snapshot/path.go b/bundle/deploy/snapshot/path.go index 680461d2658..95a24bef2cf 100644 --- a/bundle/deploy/snapshot/path.go +++ b/bundle/deploy/snapshot/path.go @@ -14,6 +14,7 @@ import ( "time" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/libs/fileset" libsync "github.com/databricks/cli/libs/sync" ) @@ -66,18 +67,16 @@ func SnapshotID(ctx context.Context, b *bundle.Bundle) (string, error) { } func addSyncRootToZip(ctx context.Context, zw *zip.Writer, b *bundle.Bundle) error { - files, err := libsync.GetFileList(ctx, libsync.SyncOptions{ - WorktreeRoot: b.WorktreeRoot, - LocalRoot: b.SyncRoot, - Paths: b.Config.Sync.Paths, - Include: b.Config.Sync.Include, - Exclude: b.Config.Sync.Exclude, - }) + opts, err := files.GetSyncOptions(ctx, b) + if err != nil { + return err + } + fileList, err := libsync.GetFileList(ctx, *opts) if err != nil { return err } // Sort for a stable zip (same content → same hash regardless of iteration order). - slices.SortFunc(files, func(a, b fileset.File) int { + slices.SortFunc(fileList, func(a, b fileset.File) int { if a.Relative < b.Relative { return -1 } @@ -87,7 +86,7 @@ func addSyncRootToZip(ctx context.Context, zw *zip.Writer, b *bundle.Bundle) err return 0 }) - for _, f := range files { + for _, f := range fileList { rc, err := b.SyncRoot.Open(f.Relative) if err != nil { return fmt.Errorf("open %s: %w", f.Relative, err) diff --git a/bundle/deploy/snapshot/path_test.go b/bundle/deploy/snapshot/path_test.go index 9b87182eab1..f782a45bb42 100644 --- a/bundle/deploy/snapshot/path_test.go +++ b/bundle/deploy/snapshot/path_test.go @@ -31,7 +31,9 @@ func makeBundleWithFiles(t *testing.T, files map[string]string) *bundle.Bundle { // WorktreeRoot = SyncRoot is the fallback used by LoadGitDetails when // there is no git repository. WorktreeRoot: root, - Config: config.Root{}, + Config: config.Root{ + Bundle: config.Bundle{Target: "default"}, + }, } } diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 782597b3a5e..58137d50f5e 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -2642,7 +2642,7 @@ "$ref": "#/$defs/bool" }, "immutable_folder": { - "description": "Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled.", + "description": "Whether to deploy bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a content-addressed archive and workspace.file_path and workspace.artifact_path are set to the resulting location. The validate and plan commands make no mutative API calls when this is enabled.", "$ref": "#/$defs/bool" }, "lock": { From b9f67f7bc830f1761388774d14a46e4201abe0b2 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 23 Jun 2026 13:47:36 +0200 Subject: [PATCH 19/31] fixed app deploying --- .../resources/apps/immutable/app/app.py | 2 + .../apps/immutable/databricks.yml.tmpl | 10 +++++ .../resources/apps/immutable/out.test.toml | 3 ++ .../resources/apps/immutable/output.txt | 42 +++++++++++++++++++ .../bundle/resources/apps/immutable/script | 13 ++++++ .../bundle/resources/apps/immutable/test.toml | 22 ++++++++++ cmd/bundle/run.go | 16 +++++++ 7 files changed, 108 insertions(+) create mode 100644 acceptance/bundle/resources/apps/immutable/app/app.py create mode 100644 acceptance/bundle/resources/apps/immutable/databricks.yml.tmpl create mode 100644 acceptance/bundle/resources/apps/immutable/out.test.toml create mode 100644 acceptance/bundle/resources/apps/immutable/output.txt create mode 100644 acceptance/bundle/resources/apps/immutable/script create mode 100644 acceptance/bundle/resources/apps/immutable/test.toml diff --git a/acceptance/bundle/resources/apps/immutable/app/app.py b/acceptance/bundle/resources/apps/immutable/app/app.py new file mode 100644 index 00000000000..8d2493f9e91 --- /dev/null +++ b/acceptance/bundle/resources/apps/immutable/app/app.py @@ -0,0 +1,2 @@ +import streamlit as st +st.write("hello") diff --git a/acceptance/bundle/resources/apps/immutable/databricks.yml.tmpl b/acceptance/bundle/resources/apps/immutable/databricks.yml.tmpl new file mode 100644 index 00000000000..8b4ee9c6e7a --- /dev/null +++ b/acceptance/bundle/resources/apps/immutable/databricks.yml.tmpl @@ -0,0 +1,10 @@ +bundle: + name: test-bundle-immutable-app-$UNIQUE_NAME + deployment: + immutable_folder: true + +resources: + apps: + my_app: + name: my-immutable-app + source_code_path: ./app diff --git a/acceptance/bundle/resources/apps/immutable/out.test.toml b/acceptance/bundle/resources/apps/immutable/out.test.toml new file mode 100644 index 00000000000..e90b6d5d1ba --- /dev/null +++ b/acceptance/bundle/resources/apps/immutable/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/resources/apps/immutable/output.txt b/acceptance/bundle/resources/apps/immutable/output.txt new file mode 100644 index 00000000000..2833e7f54ee --- /dev/null +++ b/acceptance/bundle/resources/apps/immutable/output.txt @@ -0,0 +1,42 @@ + +>>> [CLI] bundle deploy +Uploading immutable bundle snapshot... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] bundle run my_app +✓ Getting the status of the app my-immutable-app +✓ App is in RUNNING state +✓ App compute is in STOPPED state +✓ Starting the app my-immutable-app +✓ App is starting... +✓ App is started! +✓ Deployment succeeded +You can access the app at my-immutable-app-123.cloud.databricksapps.com + +>>> print_requests.py //apps +{ + "method": "POST", + "path": "/api/2.0/apps", + "q": { + "no_compute": "true" + }, + "body": { + "description": "", + "name": "my-immutable-app" + } +} +{ + "method": "POST", + "path": "/api/2.0/apps/my-immutable-app/start", + "body": {} +} +{ + "method": "POST", + "path": "/api/2.0/apps/my-immutable-app/deployments", + "body": { + "mode": "SNAPSHOT", + "source_code_path": "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-app-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/app" + } +} diff --git a/acceptance/bundle/resources/apps/immutable/script b/acceptance/bundle/resources/apps/immutable/script new file mode 100644 index 00000000000..968143b2ef5 --- /dev/null +++ b/acceptance/bundle/resources/apps/immutable/script @@ -0,0 +1,13 @@ +envsubst < databricks.yml.tmpl > databricks.yml + +cleanup() { + rm -f out.requests.txt +} +trap cleanup EXIT + +trace $CLI bundle deploy +trace $CLI bundle run my_app + +# Print the app requests to verify that source_code_path in the deployment +# points to the content-addressed snapshot path rather than a local path. +trace print_requests.py //apps diff --git a/acceptance/bundle/resources/apps/immutable/test.toml b/acceptance/bundle/resources/apps/immutable/test.toml new file mode 100644 index 00000000000..7779b6714cd --- /dev/null +++ b/acceptance/bundle/resources/apps/immutable/test.toml @@ -0,0 +1,22 @@ +Local = true +Cloud = false +RecordRequests = true + +# EnvMatrix here inherits the default DATABRICKS_BUNDLE_ENGINE matrix. +# Override to opt out because immutable_folder is engine-agnostic and +# running both variants would produce identical output but double the cost. +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] + +Ignore = [ + "databricks.yml", + ".databricks", + ".venv", + "script", + "*.pyc", +] + +# Normalize the content-addressed snapshot hash so the test doesn't need +# to be updated whenever the bundle content changes. +[[Repls]] +Old = '[0-9a-f]{64}' +New = '[SNAPSHOT_HASH]' diff --git a/cmd/bundle/run.go b/cmd/bundle/run.go index e98fe59ac4e..59558c4e09f 100644 --- a/cmd/bundle/run.go +++ b/cmd/bundle/run.go @@ -10,6 +10,7 @@ import ( "slices" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/deploy/snapshot" "github.com/databricks/cli/bundle/env" "github.com/databricks/cli/bundle/resources" "github.com/databricks/cli/bundle/run" @@ -171,6 +172,21 @@ Example usage: return nil }, PostStateFunc: func(ctx context.Context, b *bundle.Bundle, stateDesc *statemgmt.StateDesc) error { + if b.Config.Bundle.Deployment.ImmutableFolder { + // Restore the snapshot path and rewrite resource paths from local + // absolute paths (set by translate_paths during Initialize) to the + // actual content-addressed snapshot paths. Without this, source_code_path + // for apps and other path fields remain as local filesystem paths, which + // the Databricks API rejects. + bundle.ApplySeqContext(ctx, b, + snapshot.LoadState(), + snapshot.TranslateResourcePaths(), + ) + if logdiag.HasError(ctx) { + return root.ErrAlreadyPrinted + } + } + runner, err := keyToRunner(b, key) if err != nil { return err From 01136f5b66b3b6e8e4172efcef4239573e754116 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 23 Jun 2026 17:48:42 +0200 Subject: [PATCH 20/31] use snapshot_path veriable prefixes and move to experimetal --- .../databricks.yml.tmpl | 8 +- .../immutable-no-artifacts/out.test.toml | 2 +- .../deploy/immutable-no-artifacts/output.txt | 7 +- .../deploy/immutable-no-artifacts/script | 1 + .../deploy/immutable-no-artifacts/test.toml | 3 + .../deploy/immutable/databricks.yml.tmpl | 5 +- .../bundle/deploy/immutable/out.test.toml | 2 +- acceptance/bundle/deploy/immutable/output.txt | 149 +++++++++++++++--- acceptance/bundle/deploy/immutable/test.toml | 3 + .../resources/apps/immutable/app/app.py | 1 + .../apps/immutable/databricks.yml.tmpl | 5 +- .../resources/apps/immutable/output.txt | 2 +- .../bundle/resources/apps/immutable/test.toml | 4 +- .../immutable_workspace_paths/databricks.yml | 5 +- .../immutable_workspace_paths/output.txt | 4 +- bundle/config/deployment.go | 7 - bundle/config/experimental.go | 7 + .../mutator/override_immutable_folder.go | 6 +- .../mutator/override_immutable_folder_test.go | 10 +- .../mutator/resolve_variable_references.go | 18 +++ .../process_static_resources.go | 2 +- .../resourcemutator/resource_mutator.go | 16 +- bundle/config/mutator/translate_paths.go | 12 +- bundle/config/workspace.go | 4 +- bundle/deploy/ensure_deployment_id.go | 40 +++++ bundle/deploy/snapshot/state.go | 4 +- bundle/deploy/snapshot/translate_paths.go | 50 ------ bundle/deploy/snapshot/upload.go | 6 +- bundle/deploy/state_update.go | 7 +- bundle/permissions/workspace_root.go | 2 +- bundle/phases/build.go | 2 +- bundle/phases/deploy.go | 20 ++- bundle/phases/destroy.go | 25 ++- bundle/phases/initialize.go | 2 +- cmd/bundle/run.go | 13 +- 35 files changed, 310 insertions(+), 144 deletions(-) create mode 100644 bundle/deploy/ensure_deployment_id.go delete mode 100644 bundle/deploy/snapshot/translate_paths.go diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl b/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl index caacf79f907..6c41e997af6 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl +++ b/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl @@ -1,7 +1,8 @@ bundle: name: test-bundle-immutable-no-artifacts-$UNIQUE_NAME - deployment: - immutable_folder: true + +experimental: + immutable_folder: true resources: jobs: @@ -15,6 +16,9 @@ resources: - task_key: notebook_task notebook_task: notebook_path: ./src/notebook.py + base_parameters: + path: ${workspace.file_path}/some_path + environments: - environment_key: env diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml index bbc7fcfd1bd..9cfad3fb0d5 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml +++ b/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml @@ -1,3 +1,3 @@ Local = true Cloud = true -EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/output.txt b/acceptance/bundle/deploy/immutable-no-artifacts/output.txt index 52398f774a3..44360ad4ea6 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/output.txt +++ b/acceptance/bundle/deploy/immutable-no-artifacts/output.txt @@ -15,10 +15,13 @@ Updating deployment state... Deployment complete! >>> [CLI] jobs get [NUMID] -"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/main.py" +"/Workspace/Users/[UUID]/.snapshots/[UUID]/[SNAPSHOT_HASH]/src/files/src/main.py" >>> [CLI] jobs get [NUMID] -"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/notebook" +"/Workspace/Users/[UUID]/.snapshots/[UUID]/[SNAPSHOT_HASH]/src/files/src/notebook" + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/[UUID]/[SNAPSHOT_HASH]/src/files/some_path" >>> [CLI] bundle destroy --auto-approve The following resources will be deleted: diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/script b/acceptance/bundle/deploy/immutable-no-artifacts/script index 903d85c9c92..a7aad718750 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/script +++ b/acceptance/bundle/deploy/immutable-no-artifacts/script @@ -13,5 +13,6 @@ trace $CLI bundle deploy JOB_ID=$($CLI bundle summary -o json | jq -r '.resources.jobs.my_job.id') trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_python_task != null) | .spark_python_task.python_file' trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.base_parameters.path' trace $CLI bundle destroy --auto-approve diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml index b5c9fe29f68..22d576a39b9 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/test.toml +++ b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml @@ -2,6 +2,9 @@ Local = true Cloud = true RecordRequests = true +# immutable_folder only works with the direct engine. +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] + Ignore = [ "databricks.yml", ".databricks", diff --git a/acceptance/bundle/deploy/immutable/databricks.yml.tmpl b/acceptance/bundle/deploy/immutable/databricks.yml.tmpl index 6dd62eee1fb..407a1bbbe6f 100644 --- a/acceptance/bundle/deploy/immutable/databricks.yml.tmpl +++ b/acceptance/bundle/deploy/immutable/databricks.yml.tmpl @@ -1,7 +1,8 @@ bundle: name: test-bundle-immutable-$UNIQUE_NAME - deployment: - immutable_folder: true + +experimental: + immutable_folder: true artifacts: python_artifact: diff --git a/acceptance/bundle/deploy/immutable/out.test.toml b/acceptance/bundle/deploy/immutable/out.test.toml index bbc7fcfd1bd..9cfad3fb0d5 100644 --- a/acceptance/bundle/deploy/immutable/out.test.toml +++ b/acceptance/bundle/deploy/immutable/out.test.toml @@ -1,3 +1,3 @@ Local = true Cloud = true -EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/deploy/immutable/output.txt b/acceptance/bundle/deploy/immutable/output.txt index 4c92e420dab..4ab992b4bbb 100644 --- a/acceptance/bundle/deploy/immutable/output.txt +++ b/acceptance/bundle/deploy/immutable/output.txt @@ -10,27 +10,140 @@ Validation OK! >>> [CLI] bundle deploy Building python_artifact... -Uploading immutable bundle snapshot... -Deploying resources... -Updating deployment state... -Deployment complete! +Error: build failed python_artifact, error: exit status 2, output: Building wheel... + × Failed to build + │ `[TEST_TMP_DIR]` + ├─▶ Failed to resolve requirements from `build-system.requires` + ├─▶ No solution found when resolving: `hatchling` + ╰─▶ Because only the following versions of hatchling are available: + hatchling==0.8.0 + hatchling==0.8.1 + hatchling==0.8.2 + hatchling==0.9.0 + hatchling==0.10.0 + hatchling==0.11.0 + hatchling==0.11.1 + hatchling==0.11.2 + hatchling==0.11.3 + hatchling==0.12.0 + hatchling==0.13.0 + hatchling==0.14.0 + hatchling==0.15.0 + hatchling==0.16.0 + hatchling==0.17.0 + hatchling==0.18.0 + hatchling==0.19.0 + hatchling==0.20.0 + hatchling==0.20.1 + hatchling==0.21.0 + hatchling==0.21.1 + hatchling==0.22.0 + hatchling==0.23.0 + hatchling==0.24.0 + hatchling==0.25.0 + hatchling==0.25.1 + hatchling==1.0.0 + hatchling==1.1.0 + hatchling==1.2.0 + hatchling==1.3.0 + hatchling==1.3.1 + hatchling==1.4.0 + hatchling==1.4.1 + hatchling==1.5.0 + hatchling==1.6.0 + hatchling==1.7.0 + hatchling==1.7.1 + hatchling==1.8.0 + hatchling==1.8.1 + hatchling==1.9.0 + hatchling==1.10.0 + hatchling==1.11.0 + hatchling==1.11.1 + hatchling==1.12.0 + hatchling==1.12.1 + hatchling==1.12.2 + hatchling==1.13.0 + hatchling==1.14.0 + hatchling==1.14.1 + hatchling==1.15.0 + hatchling==1.16.0 + hatchling==1.16.1 + hatchling==1.17.0 + hatchling==1.17.1 + hatchling==1.18.0 + hatchling==1.19.0 + hatchling==1.19.1 + hatchling==1.20.0 + hatchling==1.21.0 + hatchling==1.21.1 + hatchling==1.22.0 + hatchling==1.22.1 + hatchling==1.22.2 + hatchling==1.22.3 + hatchling==1.22.4 + hatchling==1.22.5 + hatchling==1.23.0 + hatchling==1.24.0 + hatchling==1.24.1 + hatchling==1.24.2 + hatchling==1.25.0 + hatchling==1.26.0 + hatchling==1.26.1 + hatchling==1.26.2 + hatchling==1.26.3 + hatchling==1.27.0 + hatchling==1.28.0 + hatchling==1.29.0 + and hatchling<=1.3.1 needs to be downloaded from a registry, we can + conclude that hatchling<=1.3.1 cannot be used. + And because hatchling==1.4.0 was yanked (reason: Building wheels from + sdists is broken), we can conclude that hatchling<1.4.1 cannot be used. + And because hatchling>=1.4.1,<=1.19.0 needs to be downloaded + from a registry and hatchling==1.19.1 was yanked (reason: + https://github.com/pypa/hatch/issues/1129), we can conclude that + hatchling<1.20.0 cannot be used. + And because hatchling>=1.20.0,<=1.21.1 needs to be downloaded from + a registry and hatchling>=1.22.0,<=1.22.1 was yanked (reason: Broken + builds from sdists), we can conclude that hatchling>=1.22.0,<=1.22.1 + cannot be used. + And because hatchling>=1.22.2,<=1.25.0 needs to be downloaded from a + registry and hatchling==1.26.0 was yanked (reason: Incompatible with + currently released Hatch), we can conclude that hatchling<1.26.1 cannot + be used. + And because hatchling>=1.26.1,<=1.26.2 was yanked (reason: Upload + issues) and hatchling>=1.26.3,<=1.28.0 needs to be downloaded from a + registry, we can conclude that hatchling>=1.26.3,<=1.28.0 cannot be + used. (1) ->>> [CLI] jobs get [NUMID] -"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/main.py" + Because only the following versions of tomli{python_full_version < + '3.11'} are available: + tomli{python_full_version < '3.11'}<=1.2.2 + tomli{python_full_version < '3.11'}==1.2.3 + tomli{python_full_version < '3.11'}==2.0.0 + tomli{python_full_version < '3.11'}==2.0.1 + tomli{python_full_version < '3.11'}==2.0.2 + tomli{python_full_version < '3.11'}==2.1.0 + tomli{python_full_version < '3.11'}==2.2.1 + tomli{python_full_version < '3.11'}==2.3.0 + tomli{python_full_version < '3.11'}==2.3.1 + tomli{python_full_version < '3.11'}==2.4.0 + tomli{python_full_version < '3.11'}==2.4.1 + and tomli>=1.2.2 needs to be downloaded from a registry, we can conclude + that tomli{python_full_version < '3.11'}>=1.2.2,<1.2.3 cannot be used. + And because hatchling==1.29.0 depends on tomli{python_full_version < + '3.11'}>=1.2.2, we can conclude that hatchling==1.29.0 cannot be used. + And because we know from (1) that hatchling>=1.26.3,<=1.28.0 cannot be + used, we can conclude that all versions of hatchling cannot be used. + And because you require hatchling, we can conclude that your + requirements are unsatisfiable. ->>> [CLI] jobs get [NUMID] -"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/notebook" + hint: Packages were unavailable because the network was disabled. When + the network is disabled, registry packages may only be read from the + cache. ->>> [CLI] jobs get [NUMID] -[ - "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl" -] ->>> [CLI] bundle destroy --auto-approve -The following resources will be deleted: - delete resources.jobs.my_job -All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-[UNIQUE_NAME]/default +>>> [CLI] bundle destroy --auto-approve +No active deployment found to destroy! -Deleting files... -Destroy complete! +Exit code: 1 diff --git a/acceptance/bundle/deploy/immutable/test.toml b/acceptance/bundle/deploy/immutable/test.toml index 12815871ead..1654d9472c8 100644 --- a/acceptance/bundle/deploy/immutable/test.toml +++ b/acceptance/bundle/deploy/immutable/test.toml @@ -1,6 +1,9 @@ Local = true Cloud = true +# immutable_folder only works with the direct engine. +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] + Ignore = [ "dist", "databricks.yml", diff --git a/acceptance/bundle/resources/apps/immutable/app/app.py b/acceptance/bundle/resources/apps/immutable/app/app.py index 8d2493f9e91..184b9c5c592 100644 --- a/acceptance/bundle/resources/apps/immutable/app/app.py +++ b/acceptance/bundle/resources/apps/immutable/app/app.py @@ -1,2 +1,3 @@ import streamlit as st + st.write("hello") diff --git a/acceptance/bundle/resources/apps/immutable/databricks.yml.tmpl b/acceptance/bundle/resources/apps/immutable/databricks.yml.tmpl index 8b4ee9c6e7a..6b9b696ef01 100644 --- a/acceptance/bundle/resources/apps/immutable/databricks.yml.tmpl +++ b/acceptance/bundle/resources/apps/immutable/databricks.yml.tmpl @@ -1,7 +1,8 @@ bundle: name: test-bundle-immutable-app-$UNIQUE_NAME - deployment: - immutable_folder: true + +experimental: + immutable_folder: true resources: apps: diff --git a/acceptance/bundle/resources/apps/immutable/output.txt b/acceptance/bundle/resources/apps/immutable/output.txt index 2833e7f54ee..76fc42f91cd 100644 --- a/acceptance/bundle/resources/apps/immutable/output.txt +++ b/acceptance/bundle/resources/apps/immutable/output.txt @@ -37,6 +37,6 @@ You can access the app at my-immutable-app-123.cloud.databricksapps.com "path": "/api/2.0/apps/my-immutable-app/deployments", "body": { "mode": "SNAPSHOT", - "source_code_path": "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-app-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/app" + "source_code_path": "/Workspace/Users/[UUID]/.snapshots/[UUID]/[SNAPSHOT_HASH]/src/files/app" } } diff --git a/acceptance/bundle/resources/apps/immutable/test.toml b/acceptance/bundle/resources/apps/immutable/test.toml index 7779b6714cd..680263f56d1 100644 --- a/acceptance/bundle/resources/apps/immutable/test.toml +++ b/acceptance/bundle/resources/apps/immutable/test.toml @@ -2,9 +2,7 @@ Local = true Cloud = false RecordRequests = true -# EnvMatrix here inherits the default DATABRICKS_BUNDLE_ENGINE matrix. -# Override to opt out because immutable_folder is engine-agnostic and -# running both variants would produce identical output but double the cost. +# immutable_folder only works with the direct engine. EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] Ignore = [ diff --git a/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml b/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml index b6ce1bd97d1..6e3a4cb46ee 100644 --- a/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml +++ b/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml @@ -1,7 +1,8 @@ bundle: name: my-bundle - deployment: - immutable_folder: true + +experimental: + immutable_folder: true sync: exclude: diff --git a/acceptance/bundle/validate/immutable_workspace_paths/output.txt b/acceptance/bundle/validate/immutable_workspace_paths/output.txt index da1a2f05768..33c5f2be3a8 100644 --- a/acceptance/bundle/validate/immutable_workspace_paths/output.txt +++ b/acceptance/bundle/validate/immutable_workspace_paths/output.txt @@ -2,7 +2,7 @@ >>> [CLI] bundle validate -o json Warning: Pattern user_repls.json does not match any files at sync.exclude[1] - in databricks.yml:10:7 + in databricks.yml:11:7 { "workspace": { @@ -22,7 +22,7 @@ Warning: Pattern user_repls.json does not match any files { "existing_cluster_id": "0101-120000-aaaaaaaa", "spark_python_task": { - "python_file": "[TEST_TMP_DIR]/src/main.py" + "python_file": "${workspace.snapshot_path}/src/files/src/main.py" }, "task_key": "my_task" } diff --git a/bundle/config/deployment.go b/bundle/config/deployment.go index 8be0596823e..b7efb4456f9 100644 --- a/bundle/config/deployment.go +++ b/bundle/config/deployment.go @@ -5,13 +5,6 @@ type Deployment struct { // running jobs or pipelines in the workspace. Defaults to false. FailOnActiveRuns bool `json:"fail_on_active_runs,omitempty"` - // ImmutableFolder specifies that bundle files and artifacts are uploaded as a - // single immutable snapshot rather than being synced individually. When true, - // the deployment calls /api/2.0/repos/snapshots with a zip containing all files - // and sets workspace.file_path and workspace.artifact_path to the returned - // content-addressed path. validate and plan make no mutative API calls. - ImmutableFolder bool `json:"immutable_folder,omitempty"` - // Lock configures locking behavior on deployment. Lock Lock `json:"lock,omitempty"` } diff --git a/bundle/config/experimental.go b/bundle/config/experimental.go index b8984adaddd..658f1cea819 100644 --- a/bundle/config/experimental.go +++ b/bundle/config/experimental.go @@ -3,6 +3,13 @@ package config type Experimental struct { Scripts map[ScriptHook]Command `json:"scripts,omitempty"` + // ImmutableFolder specifies that bundle files and artifacts are uploaded as a + // single immutable snapshot rather than being synced individually. When true, + // the deployment calls /api/2.0/repos/snapshots with a zip of all files and sets + // workspace.file_path and workspace.artifact_path to the returned content-addressed + // path. Only supported with the direct deployment engine. + ImmutableFolder bool `json:"immutable_folder,omitempty"` + // By default Python wheel tasks deployed as is to Databricks platform. // If notebook wrapper required (for example, used in DBR < 13.1 or other configuration differences), users can provide a following experimental setting // experimental: diff --git a/bundle/config/mutator/override_immutable_folder.go b/bundle/config/mutator/override_immutable_folder.go index c2f618b19b2..51307523d82 100644 --- a/bundle/config/mutator/override_immutable_folder.go +++ b/bundle/config/mutator/override_immutable_folder.go @@ -4,6 +4,7 @@ import ( "context" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/env" ) @@ -26,6 +27,9 @@ func (m *overrideImmutableFolder) Apply(ctx context.Context, b *bundle.Bundle) d if env.Get(ctx, "DATABRICKS_IMMUTABLE_FOLDER") == "" { return nil } - b.Config.Bundle.Deployment.ImmutableFolder = true + if b.Config.Experimental == nil { + b.Config.Experimental = &config.Experimental{} + } + b.Config.Experimental.ImmutableFolder = true return nil } diff --git a/bundle/config/mutator/override_immutable_folder_test.go b/bundle/config/mutator/override_immutable_folder_test.go index 5a31ad779a4..e538fa1ffbb 100644 --- a/bundle/config/mutator/override_immutable_folder_test.go +++ b/bundle/config/mutator/override_immutable_folder_test.go @@ -17,7 +17,7 @@ func TestOverrideImmutableFolderNotSet(t *testing.T) { b := &bundle.Bundle{Config: config.Root{}} diags := bundle.Apply(ctx, b, mutator.OverrideImmutableFolder()) require.NoError(t, diags.Error()) - assert.False(t, b.Config.Bundle.Deployment.ImmutableFolder) + assert.True(t, b.Config.Experimental == nil || !b.Config.Experimental.ImmutableFolder) } func TestOverrideImmutableFolderSet(t *testing.T) { @@ -26,16 +26,18 @@ func TestOverrideImmutableFolderSet(t *testing.T) { b := &bundle.Bundle{Config: config.Root{}} diags := bundle.Apply(ctx, b, mutator.OverrideImmutableFolder()) require.NoError(t, diags.Error()) - assert.True(t, b.Config.Bundle.Deployment.ImmutableFolder) + require.NotNil(t, b.Config.Experimental) + assert.True(t, b.Config.Experimental.ImmutableFolder) } func TestOverrideImmutableFolderAlreadyTrue(t *testing.T) { t.Parallel() ctx := env.Set(t.Context(), "DATABRICKS_IMMUTABLE_FOLDER", "") b := &bundle.Bundle{Config: config.Root{}} - b.Config.Bundle.Deployment.ImmutableFolder = true + b.Config.Experimental = &config.Experimental{ImmutableFolder: true} diags := bundle.Apply(ctx, b, mutator.OverrideImmutableFolder()) require.NoError(t, diags.Error()) // Existing true value must not be cleared when the env var is absent. - assert.True(t, b.Config.Bundle.Deployment.ImmutableFolder) + require.NotNil(t, b.Config.Experimental) + assert.True(t, b.Config.Experimental.ImmutableFolder) } diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index fab5dc218f1..0aa73b575dd 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -79,6 +79,24 @@ func ResolveVariableReferencesOnlyResources(prefixes ...string) bundle.Mutator { } } +// ResolveVariableReferencesOnlyResourcesExcluding is like ResolveVariableReferencesOnlyResources +// but leaves the listed variable reference paths unresolved. Use this when a workspace path will +// be updated by a later mutator (e.g. snapshot.Upload sets workspace.file_path to the snapshot +// location) and the final value should be substituted at that later point. +func ResolveVariableReferencesOnlyResourcesExcluding(excluded []string, prefixes ...string) bundle.Mutator { + if len(prefixes) == 0 { + prefixes = defaultPrefixes + } + return &resolveVariableReferences{ + prefixes: prefixes, + lookupFn: lookup, + extraRounds: maxResolutionRounds - 1, + pattern: dyn.NewPattern(dyn.Key("resources")), + includeResources: true, + excludePaths: excluded, + } +} + func ResolveVariableReferencesWithoutResources(prefixes ...string) bundle.Mutator { if len(prefixes) == 0 { prefixes = defaultPrefixes diff --git a/bundle/config/mutator/resourcemutator/process_static_resources.go b/bundle/config/mutator/resourcemutator/process_static_resources.go index b1f689808f6..28a7cbefe78 100644 --- a/bundle/config/mutator/resourcemutator/process_static_resources.go +++ b/bundle/config/mutator/resourcemutator/process_static_resources.go @@ -45,7 +45,7 @@ func (p processStaticResources) Apply(ctx context.Context, b *bundle.Bundle) dia // Reads (dynamic): * (strings) (searches for variable references in string values) // Updates (dynamic): resources.* (strings) (resolves variable references to their actual values) // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes - mutator.ResolveVariableReferencesOnlyResources(), + immutableExcludingResolver(b), mutator.NormalizePaths(), // Translate dashboard paths into paths in the workspace file system diff --git a/bundle/config/mutator/resourcemutator/resource_mutator.go b/bundle/config/mutator/resourcemutator/resource_mutator.go index 45740f53599..21c13bb906a 100644 --- a/bundle/config/mutator/resourcemutator/resource_mutator.go +++ b/bundle/config/mutator/resourcemutator/resource_mutator.go @@ -133,6 +133,20 @@ func applyInitializeMutators(ctx context.Context, b *bundle.Bundle) { ) } +// immutableExcludingResolver returns a variable reference resolver for the resources +// section. When experimental.immutable_folder is enabled it excludes +// workspace.file_path and workspace.artifact_path from resolution: those paths are +// updated to the snapshot location by snapshot.Upload() in the Deploy phase, so +// resolving them here would freeze them to the default bundle path instead. +func immutableExcludingResolver(b *bundle.Bundle) bundle.Mutator { + if b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder { + return mutator.ResolveVariableReferencesOnlyResourcesExcluding( + []string{"workspace.file_path", "workspace.artifact_path"}, + ) + } + return mutator.ResolveVariableReferencesOnlyResources() +} + // Normalization is applied multiple times if resource is modified during initialization // // If bundle is modified outside of 'resources' section, these changes are discarded. @@ -146,7 +160,7 @@ func applyNormalizeMutators(ctx context.Context, b *bundle.Bundle) { // Reads (dynamic): * (strings) (searches for variable references in string values) // Updates (dynamic): resources.* (strings) (resolves variable references to their actual values) // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes - mutator.ResolveVariableReferencesOnlyResources(), + immutableExcludingResolver(b), // Reads (dynamic): resources.pipelines.*.libraries (checks for notebook.path and file.path fields) // Updates (dynamic): resources.pipelines.*.libraries (expands glob patterns in path fields to multiple library entries) diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index f7f10f65f46..8945d20ac47 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -321,20 +321,22 @@ func (t *translateContext) rewriteValue(ctx context.Context, p dyn.Path, v dyn.V func applyTranslations(ctx context.Context, b *bundle.Bundle, t *translateContext, translations []func(context.Context, dyn.Value) (dyn.Value, error)) diag.Diagnostics { switch { - case b.Config.Bundle.Deployment.ImmutableFolder: + case b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder: // Reject an explicit workspace.file_path: immutable bundles control that path // automatically (it is set to the content-addressed snapshot location after upload). // A user-supplied value would be silently discarded, so we error early instead. if loc := b.Config.GetLocation("workspace.file_path"); loc.File != "" { return diag.Diagnostics{{ Severity: diag.Error, - Summary: "workspace.file_path cannot be configured when bundle.deployment.immutable_folder is true", + Summary: "workspace.file_path cannot be configured when experimental.immutable_folder is true", Locations: []dyn.Location{loc}, }} } - // Keep paths as local absolute paths during validate. snapshot.TranslateResourcePaths() - // replaces this local prefix with the actual snapshot path after upload. - t.remoteRoot = t.b.SyncRootPath + // Use a placeholder referencing workspace.snapshot_path so that paths are stored + // as ${workspace.snapshot_path}/src/files/ during validate. After + // snapshot.Upload() sets workspace.snapshot_path, a variable-resolution pass + // expands these references to the actual content-addressed paths. + t.remoteRoot = "${workspace.snapshot_path}/src/files" case config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment): t.remoteRoot = t.b.SyncRootPath default: diff --git a/bundle/config/workspace.go b/bundle/config/workspace.go index 1df602d2088..81b3837f044 100644 --- a/bundle/config/workspace.go +++ b/bundle/config/workspace.go @@ -81,8 +81,8 @@ type Workspace struct { // SnapshotPath is the workspace path of the immutable snapshot uploaded during // deployment. It is set by snapshot.Upload() and persisted to local state so - // that snapshot.LoadState() can restore workspace.file_path for destroy. - // Only populated for bundles with deployment.immutable_folder = true. + // that snapshot.LoadState() can restore workspace.snapshot_path for destroy/run. + // Only populated for bundles with experimental.immutable_folder = true. SnapshotPath string `json:"snapshot_path,omitempty" bundle:"internal"` } diff --git a/bundle/deploy/ensure_deployment_id.go b/bundle/deploy/ensure_deployment_id.go new file mode 100644 index 00000000000..883b71cfdd8 --- /dev/null +++ b/bundle/deploy/ensure_deployment_id.go @@ -0,0 +1,40 @@ +package deploy + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/google/uuid" +) + +type ensureDeploymentID struct{} + +// EnsureDeploymentID loads the local deployment state and ensures +// b.Metrics.DeploymentId is populated before the snapshot upload. +// snapshot.Upload uses it as the bundle_id so the snapshot is keyed to this +// deployment lineage rather than to the bundle name. +// StateUpdate reads the same field back and persists it to disk. +func EnsureDeploymentID() bundle.Mutator { + return &ensureDeploymentID{} +} + +func (*ensureDeploymentID) Name() string { return "deploy:ensure-deployment-id" } + +func (*ensureDeploymentID) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + // Already set (e.g. by a prior call in the same session). + if b.Metrics.DeploymentId != uuid.Nil { + return nil + } + + state, err := load(ctx, b) + if err != nil { + return diag.FromErr(err) + } + + if state.ID == uuid.Nil { + state.ID = uuid.New() + } + b.Metrics.DeploymentId = state.ID + return nil +} diff --git a/bundle/deploy/snapshot/state.go b/bundle/deploy/snapshot/state.go index a650c1d5b69..981efada289 100644 --- a/bundle/deploy/snapshot/state.go +++ b/bundle/deploy/snapshot/state.go @@ -82,8 +82,8 @@ func (s *loadState) loadFromRemote(ctx context.Context, b *bundle.Bundle) diag.D func applySnapshotPath(b *bundle.Bundle, snapshotPath string) { b.Config.Workspace.SnapshotPath = snapshotPath - // Restore FilePath and ArtifactPath so that TranslateResourcePaths() can - // rewrite local absolute paths to snapshot paths during destroy. + // Restore FilePath and ArtifactPath for other callers (permissions checks, etc.). + // The resource paths themselves are resolved later via ResolveVariableReferencesOnlyResources("workspace"). b.Config.Workspace.FilePath = path.Join(snapshotPath, "src", "files") b.Config.Workspace.ArtifactPath = path.Join(snapshotPath, "src", "artifacts") } diff --git a/bundle/deploy/snapshot/translate_paths.go b/bundle/deploy/snapshot/translate_paths.go deleted file mode 100644 index 1576197e7f4..00000000000 --- a/bundle/deploy/snapshot/translate_paths.go +++ /dev/null @@ -1,50 +0,0 @@ -package snapshot - -import ( - "context" - "strings" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/dyn" -) - -type translateResourcePaths struct{} - -// TranslateResourcePaths replaces local absolute paths in resource configs with the -// remote snapshot path. It must run after snapshot.Upload() has set -// b.Config.Workspace.FilePath to the content-addressed snapshot location. -// -// translate_paths.go uses b.SyncRootPath as the remote root for immutable bundles, -// so resource paths are stored as local absolute paths until this mutator rewrites them. -func TranslateResourcePaths() bundle.Mutator { - return &translateResourcePaths{} -} - -func (m *translateResourcePaths) Name() string { return "snapshot.TranslateResourcePaths" } - -func (m *translateResourcePaths) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { - localPrefix := b.SyncRootPath + "/" - remotePrefix := b.Config.Workspace.FilePath + "/" - - err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { - return dyn.Walk(root, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { - if len(p) == 0 { - return v, nil - } - // Only rewrite paths inside the resources section. - if p[0] != dyn.Key("resources") { - return v, dyn.ErrSkip - } - str, ok := v.AsString() - if !ok { - return v, nil - } - if !strings.HasPrefix(str, localPrefix) { - return v, nil - } - return dyn.NewValue(remotePrefix+strings.TrimPrefix(str, localPrefix), v.Locations()), nil - }) - }) - return diag.FromErr(err) -} diff --git a/bundle/deploy/snapshot/upload.go b/bundle/deploy/snapshot/upload.go index 40e62e26668..9788fa49658 100644 --- a/bundle/deploy/snapshot/upload.go +++ b/bundle/deploy/snapshot/upload.go @@ -47,7 +47,11 @@ func (m *snapshotUpload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn log.Debugf(ctx, "snapshot.Upload: snapshotID=%s zip=%d bytes", snapshotID, len(zipContent)) acl := BuildACL(b) - info, err := uploader.Upload(ctx, b.Config.Bundle.Name, snapshotID, acl, zipContent) + // Use the deployment lineage UUID as bundle_id so the snapshot directory is + // keyed to this specific deployment (not to the bundle name, which can be + // reused across unrelated deployments). + bundleID := b.Metrics.DeploymentId.String() + info, err := uploader.Upload(ctx, bundleID, snapshotID, acl, zipContent) if err != nil { return diag.FromErr(err) } diff --git a/bundle/deploy/state_update.go b/bundle/deploy/state_update.go index e9b2811f8b9..f7d057b3a5c 100644 --- a/bundle/deploy/state_update.go +++ b/bundle/deploy/state_update.go @@ -49,8 +49,11 @@ func (s *stateUpdate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnost // Persist the snapshot path so destroy on a different machine can find it. state.SnapshotPath = b.Config.Workspace.SnapshotPath - // Generate a UUID for the deployment, if one does not already exist - if state.ID == uuid.Nil { + // Use the UUID already set by EnsureDeploymentID if available; otherwise + // generate a new one (covers paths that skip EnsureDeploymentID). + if b.Metrics.DeploymentId != uuid.Nil { + state.ID = b.Metrics.DeploymentId + } else if state.ID == uuid.Nil { state.ID = uuid.New() } b.Metrics.DeploymentId = state.ID diff --git a/bundle/permissions/workspace_root.go b/bundle/permissions/workspace_root.go index 5308262e5d6..307869621ea 100644 --- a/bundle/permissions/workspace_root.go +++ b/bundle/permissions/workspace_root.go @@ -30,7 +30,7 @@ func (*workspaceRootPermissions) Name() string { // Apply implements bundle.Mutator. func (*workspaceRootPermissions) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { workspace := b.Config.Workspace - if b.Config.Bundle.Deployment.ImmutableFolder { + if b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder { // For immutable bundles, file_path and artifact_path point into content-addressed // snapshot storage that is not a normal workspace folder. Clear them so that // giveAccessForWorkspaceRoot only applies permissions to root_path (and the diff --git a/bundle/phases/build.go b/bundle/phases/build.go index c60db5235bc..90eb13a1c6f 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -50,7 +50,7 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { // For immutable bundles, library remote paths are set in the deploy phase // after snapshot.Upload() provides the content-addressed workspace.artifact_path. - if b.Config.Bundle.Deployment.ImmutableFolder { + if b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder { return nil } diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 91ef78f5ae8..6002256e8f0 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -8,6 +8,7 @@ import ( "github.com/databricks/cli/bundle/artifacts" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/engine" + "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/deploy" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" @@ -148,14 +149,21 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand bundle.ApplyContext(ctx, b, lock.Release(lock.GoalDeploy)) }() - if b.Config.Bundle.Deployment.ImmutableFolder { + if b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder && !engine.IsDirect() { + logdiag.LogError(ctx, errors.New("experimental.immutable_folder is only supported with the direct deployment engine")) + return + } + + if b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder { // Upload all source files and built artifacts as a single immutable snapshot. - // The API assigns a content-addressed workspace.file_path; snapshot.TranslateResourcePaths() - // then replaces the local absolute paths (written by translate_paths during validate) - // with the actual snapshot remote paths. + // EnsureDeploymentID populates b.Metrics.DeploymentId (the lineage UUID) so + // Upload can use it as bundle_id. snapshot.Upload() then sets + // workspace.snapshot_path; the variable-resolution pass expands + // ${workspace.snapshot_path} placeholders written by translate_paths. bundle.ApplySeqContext(ctx, b, + deploy.EnsureDeploymentID(), snapshot.Upload(), - snapshot.TranslateResourcePaths(), + mutator.ResolveVariableReferencesOnlyResources("workspace"), ) if !logdiag.HasError(ctx) { _, libDiags := libraries.ReplaceWithRemotePath(ctx, b) @@ -171,7 +179,7 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand return } - if !b.Config.Bundle.Deployment.ImmutableFolder { + if b.Config.Experimental == nil || !b.Config.Experimental.ImmutableFolder { bundle.ApplySeqContext(ctx, b, files.Upload(outputHandler)) if logdiag.HasError(ctx) { return diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index a08000123e0..415df0a1cfc 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -136,37 +136,30 @@ func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { bundle.ApplyContext(ctx, b, lock.Release(lock.GoalDestroy)) }() - if b.Config.Bundle.Deployment.ImmutableFolder { - // Restore the snapshot path so that TranslateResourcePaths (for terraform, below) - // knows where the snapshot lives. Must run for both engines. - bundle.ApplyContext(ctx, b, snapshot.LoadState()) + if b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder { + // Restore snapshot_path from local (or remote) state so that the + // ${workspace.snapshot_path} placeholders written by translate_paths can be + // resolved before Terraform (or direct) processes the resource config. + bundle.ApplySeqContext(ctx, b, + snapshot.LoadState(), + mutator.ResolveVariableReferencesOnlyResources("workspace"), + ) if logdiag.HasError(ctx) { return } } if !engine.IsDirect() { - mutators := []bundle.Mutator{ + bundle.ApplySeqContext(ctx, b, // We need to resolve artifact variable (how we do it in build phase) // because some of the to-be-destroyed resource might use this variable. // Not resolving might lead to terraform "Reference to undeclared resource" error mutator.ResolveVariableReferencesWithoutResources("artifacts"), mutator.ResolveVariableReferencesOnlyResources("artifacts"), - } - - if b.Config.Bundle.Deployment.ImmutableFolder { - // Resource paths are local absolute paths after translate_paths. Replace the - // local prefix with the snapshot remote path before Terraform processes the config. - mutators = append([]bundle.Mutator{snapshot.TranslateResourcePaths()}, mutators...) - } - - mutators = append(mutators, terraform.Interpolate(), terraform.Write(), terraform.Plan(terraform.PlanGoal("destroy")), ) - - bundle.ApplySeqContext(ctx, b, mutators...) } if logdiag.HasError(ctx) { diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 4aa7f740027..0513c870139 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -71,7 +71,7 @@ func Initialize(ctx context.Context, b *bundle.Bundle) { mutator.ApplySourceLinkedDeploymentPreset(), // Reads (env): DATABRICKS_IMMUTABLE_FOLDER (non-empty value enables immutable folder mode) - // Updates (typed): b.Config.Bundle.Deployment.ImmutableFolder (forces to true when env var is set) + // Updates (typed): b.Config.Experimental.ImmutableFolder (forces to true when env var is set) // Allows running the full test suite against the immutable folder code path without // modifying any databricks.yml files. mutator.OverrideImmutableFolder(), diff --git a/cmd/bundle/run.go b/cmd/bundle/run.go index 59558c4e09f..e05da4be5ab 100644 --- a/cmd/bundle/run.go +++ b/cmd/bundle/run.go @@ -10,6 +10,7 @@ import ( "slices" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/deploy/snapshot" "github.com/databricks/cli/bundle/env" "github.com/databricks/cli/bundle/resources" @@ -172,15 +173,13 @@ Example usage: return nil }, PostStateFunc: func(ctx context.Context, b *bundle.Bundle, stateDesc *statemgmt.StateDesc) error { - if b.Config.Bundle.Deployment.ImmutableFolder { - // Restore the snapshot path and rewrite resource paths from local - // absolute paths (set by translate_paths during Initialize) to the - // actual content-addressed snapshot paths. Without this, source_code_path - // for apps and other path fields remain as local filesystem paths, which - // the Databricks API rejects. + if b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder { + // Restore workspace.snapshot_path from local state so that the + // ${workspace.snapshot_path} placeholders written by translate_paths + // resolve to the actual content-addressed remote paths before running. bundle.ApplySeqContext(ctx, b, snapshot.LoadState(), - snapshot.TranslateResourcePaths(), + mutator.ResolveVariableReferencesOnlyResources("workspace"), ) if logdiag.HasError(ctx) { return root.ErrAlreadyPrinted From e26f08577afa483fb22b8bd94ab9bc6fe1f77579 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 24 Jun 2026 13:10:07 +0200 Subject: [PATCH 21/31] addressed feedback --- .../bundle/deploy/immutable/out.plan.json | 62 +++++++ acceptance/bundle/deploy/immutable/output.txt | 152 +++--------------- .../bundle/deploy/immutable/pyproject.toml | 7 +- acceptance/bundle/deploy/immutable/script | 1 + acceptance/bundle/deploy/immutable/test.toml | 2 + bundle/bundle.go | 5 + .../resourcemutator/resource_mutator.go | 10 +- bundle/config/mutator/translate_paths.go | 2 +- bundle/deploy/ensure_deployment_id.go | 40 ----- bundle/deploy/snapshot/upload.go | 2 +- bundle/deploy/state_update.go | 6 +- bundle/direct/bundle_plan.go | 11 ++ bundle/internal/schema/annotations.yml | 6 +- bundle/permissions/workspace_root.go | 2 +- bundle/phases/build.go | 2 +- bundle/phases/deploy.go | 34 +++- bundle/phases/destroy.go | 18 +-- bundle/schema/jsonschema.json | 8 +- 18 files changed, 152 insertions(+), 218 deletions(-) create mode 100644 acceptance/bundle/deploy/immutable/out.plan.json delete mode 100644 bundle/deploy/ensure_deployment_id.go diff --git a/acceptance/bundle/deploy/immutable/out.plan.json b/acceptance/bundle/deploy/immutable/out.plan.json new file mode 100644 index 00000000000..846b794c926 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/out.plan.json @@ -0,0 +1,62 @@ +{ + "plan_version": 2, + "cli_version": "[DEV_VERSION]", + "plan": { + "resources.jobs.my_job": { + "action": "create", + "new_state": { + "value": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-[UNIQUE_NAME]/default/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "environments": [ + { + "environment_key": "env", + "spec": { + "dependencies": [ + "dist/immutable-0.0.1-py3-none-any.whl" + ], + "environment_version": "4" + } + } + ], + "format": "MULTI_TASK", + "max_concurrent_runs": 1, + "name": "my job", + "queue": { + "enabled": true + }, + "tasks": [ + { + "notebook_task": { + "notebook_path": "${workspace.snapshot_path}/src/files/src/notebook" + }, + "task_key": "notebook_task" + }, + { + "environment_key": "env", + "python_wheel_task": { + "entry_point": "main", + "package_name": "immutable" + }, + "task_key": "python_wheel_task" + }, + { + "environment_key": "env", + "spark_python_task": { + "python_file": "${workspace.snapshot_path}/src/files/src/main.py" + }, + "task_key": "spark_python_task" + } + ] + }, + "vars": { + "tasks[0].notebook_task.notebook_path": "${workspace.snapshot_path}/src/files/src/notebook", + "tasks[2].spark_python_task.python_file": "${workspace.snapshot_path}/src/files/src/main.py" + } + } + } + } +} diff --git a/acceptance/bundle/deploy/immutable/output.txt b/acceptance/bundle/deploy/immutable/output.txt index 4ab992b4bbb..e7dee7f2537 100644 --- a/acceptance/bundle/deploy/immutable/output.txt +++ b/acceptance/bundle/deploy/immutable/output.txt @@ -8,142 +8,32 @@ Workspace: Validation OK! ->>> [CLI] bundle deploy +>>> [CLI] bundle plan -o json Building python_artifact... -Error: build failed python_artifact, error: exit status 2, output: Building wheel... - × Failed to build - │ `[TEST_TMP_DIR]` - ├─▶ Failed to resolve requirements from `build-system.requires` - ├─▶ No solution found when resolving: `hatchling` - ╰─▶ Because only the following versions of hatchling are available: - hatchling==0.8.0 - hatchling==0.8.1 - hatchling==0.8.2 - hatchling==0.9.0 - hatchling==0.10.0 - hatchling==0.11.0 - hatchling==0.11.1 - hatchling==0.11.2 - hatchling==0.11.3 - hatchling==0.12.0 - hatchling==0.13.0 - hatchling==0.14.0 - hatchling==0.15.0 - hatchling==0.16.0 - hatchling==0.17.0 - hatchling==0.18.0 - hatchling==0.19.0 - hatchling==0.20.0 - hatchling==0.20.1 - hatchling==0.21.0 - hatchling==0.21.1 - hatchling==0.22.0 - hatchling==0.23.0 - hatchling==0.24.0 - hatchling==0.25.0 - hatchling==0.25.1 - hatchling==1.0.0 - hatchling==1.1.0 - hatchling==1.2.0 - hatchling==1.3.0 - hatchling==1.3.1 - hatchling==1.4.0 - hatchling==1.4.1 - hatchling==1.5.0 - hatchling==1.6.0 - hatchling==1.7.0 - hatchling==1.7.1 - hatchling==1.8.0 - hatchling==1.8.1 - hatchling==1.9.0 - hatchling==1.10.0 - hatchling==1.11.0 - hatchling==1.11.1 - hatchling==1.12.0 - hatchling==1.12.1 - hatchling==1.12.2 - hatchling==1.13.0 - hatchling==1.14.0 - hatchling==1.14.1 - hatchling==1.15.0 - hatchling==1.16.0 - hatchling==1.16.1 - hatchling==1.17.0 - hatchling==1.17.1 - hatchling==1.18.0 - hatchling==1.19.0 - hatchling==1.19.1 - hatchling==1.20.0 - hatchling==1.21.0 - hatchling==1.21.1 - hatchling==1.22.0 - hatchling==1.22.1 - hatchling==1.22.2 - hatchling==1.22.3 - hatchling==1.22.4 - hatchling==1.22.5 - hatchling==1.23.0 - hatchling==1.24.0 - hatchling==1.24.1 - hatchling==1.24.2 - hatchling==1.25.0 - hatchling==1.26.0 - hatchling==1.26.1 - hatchling==1.26.2 - hatchling==1.26.3 - hatchling==1.27.0 - hatchling==1.28.0 - hatchling==1.29.0 - and hatchling<=1.3.1 needs to be downloaded from a registry, we can - conclude that hatchling<=1.3.1 cannot be used. - And because hatchling==1.4.0 was yanked (reason: Building wheels from - sdists is broken), we can conclude that hatchling<1.4.1 cannot be used. - And because hatchling>=1.4.1,<=1.19.0 needs to be downloaded - from a registry and hatchling==1.19.1 was yanked (reason: - https://github.com/pypa/hatch/issues/1129), we can conclude that - hatchling<1.20.0 cannot be used. - And because hatchling>=1.20.0,<=1.21.1 needs to be downloaded from - a registry and hatchling>=1.22.0,<=1.22.1 was yanked (reason: Broken - builds from sdists), we can conclude that hatchling>=1.22.0,<=1.22.1 - cannot be used. - And because hatchling>=1.22.2,<=1.25.0 needs to be downloaded from a - registry and hatchling==1.26.0 was yanked (reason: Incompatible with - currently released Hatch), we can conclude that hatchling<1.26.1 cannot - be used. - And because hatchling>=1.26.1,<=1.26.2 was yanked (reason: Upload - issues) and hatchling>=1.26.3,<=1.28.0 needs to be downloaded from a - registry, we can conclude that hatchling>=1.26.3,<=1.28.0 cannot be - used. (1) - Because only the following versions of tomli{python_full_version < - '3.11'} are available: - tomli{python_full_version < '3.11'}<=1.2.2 - tomli{python_full_version < '3.11'}==1.2.3 - tomli{python_full_version < '3.11'}==2.0.0 - tomli{python_full_version < '3.11'}==2.0.1 - tomli{python_full_version < '3.11'}==2.0.2 - tomli{python_full_version < '3.11'}==2.1.0 - tomli{python_full_version < '3.11'}==2.2.1 - tomli{python_full_version < '3.11'}==2.3.0 - tomli{python_full_version < '3.11'}==2.3.1 - tomli{python_full_version < '3.11'}==2.4.0 - tomli{python_full_version < '3.11'}==2.4.1 - and tomli>=1.2.2 needs to be downloaded from a registry, we can conclude - that tomli{python_full_version < '3.11'}>=1.2.2,<1.2.3 cannot be used. - And because hatchling==1.29.0 depends on tomli{python_full_version < - '3.11'}>=1.2.2, we can conclude that hatchling==1.29.0 cannot be used. - And because we know from (1) that hatchling>=1.26.3,<=1.28.0 cannot be - used, we can conclude that all versions of hatchling cannot be used. - And because you require hatchling, we can conclude that your - requirements are unsatisfiable. +>>> [CLI] bundle deploy +Building python_artifact... +Uploading immutable bundle snapshot... +Deploying resources... +Updating deployment state... +Deployment complete! - hint: Packages were unavailable because the network was disabled. When - the network is disabled, registry packages may only be read from the - cache. +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/[UUID]/[SNAPSHOT_HASH]/src/files/src/main.py" +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/[UUID]/[SNAPSHOT_HASH]/src/files/src/notebook" +>>> [CLI] jobs get [NUMID] +[ + "/Workspace/Users/[UUID]/.snapshots/[UUID]/[SNAPSHOT_HASH]/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl" +] >>> [CLI] bundle destroy --auto-approve -No active deployment found to destroy! +The following resources will be deleted: + delete resources.jobs.my_job + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-[UNIQUE_NAME]/default -Exit code: 1 +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/deploy/immutable/pyproject.toml b/acceptance/bundle/deploy/immutable/pyproject.toml index 3e49b180137..4e796c3f93a 100644 --- a/acceptance/bundle/deploy/immutable/pyproject.toml +++ b/acceptance/bundle/deploy/immutable/pyproject.toml @@ -24,8 +24,11 @@ dev = [ main = "immutable.main:main" [build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" +requires = ["setuptools>=40.8.0"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +where = ["src"] [tool.ruff] line-length = 120 diff --git a/acceptance/bundle/deploy/immutable/script b/acceptance/bundle/deploy/immutable/script index d1d60d53265..806eebb7755 100644 --- a/acceptance/bundle/deploy/immutable/script +++ b/acceptance/bundle/deploy/immutable/script @@ -5,6 +5,7 @@ cleanup() { trap cleanup EXIT trace $CLI bundle validate +trace $CLI bundle plan -o json > out.plan.json trace $CLI bundle deploy diff --git a/acceptance/bundle/deploy/immutable/test.toml b/acceptance/bundle/deploy/immutable/test.toml index 1654d9472c8..410adac7245 100644 --- a/acceptance/bundle/deploy/immutable/test.toml +++ b/acceptance/bundle/deploy/immutable/test.toml @@ -6,11 +6,13 @@ EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] Ignore = [ "dist", + "build", "databricks.yml", ".databricks", ".venv", "script", "*.pyc", + "src/*.egg-info", ] [[Repls]] diff --git a/bundle/bundle.go b/bundle/bundle.go index a471a5b9b2e..53332f73bdb 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -379,3 +379,8 @@ func (b *Bundle) StateFilenameTerraform(ctx context.Context) (string, string) { func (b *Bundle) StateFilenameConfigSnapshot(ctx context.Context) (string, string) { return configSnapshotFilename, filepath.ToSlash(filepath.Join(b.GetLocalStateDir(ctx), configSnapshotFilename)) } + +// IsImmutableFolder reports whether experimental.immutable_folder is enabled. +func (b *Bundle) IsImmutableFolder() bool { + return b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder +} diff --git a/bundle/config/mutator/resourcemutator/resource_mutator.go b/bundle/config/mutator/resourcemutator/resource_mutator.go index 21c13bb906a..4a552a63310 100644 --- a/bundle/config/mutator/resourcemutator/resource_mutator.go +++ b/bundle/config/mutator/resourcemutator/resource_mutator.go @@ -135,13 +135,15 @@ func applyInitializeMutators(ctx context.Context, b *bundle.Bundle) { // immutableExcludingResolver returns a variable reference resolver for the resources // section. When experimental.immutable_folder is enabled it excludes -// workspace.file_path and workspace.artifact_path from resolution: those paths are -// updated to the snapshot location by snapshot.Upload() in the Deploy phase, so +// workspace.file_path, workspace.artifact_path, and workspace.snapshot_path from +// resolution: those paths are set by snapshot.Upload() in the Deploy phase, so // resolving them here would freeze them to the default bundle path instead. +// workspace.snapshot_path is also excluded so it stays as a literal ${...} template +// in the plan output (making the pre-upload intent visible). func immutableExcludingResolver(b *bundle.Bundle) bundle.Mutator { - if b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder { + if b.IsImmutableFolder() { return mutator.ResolveVariableReferencesOnlyResourcesExcluding( - []string{"workspace.file_path", "workspace.artifact_path"}, + []string{"workspace.file_path", "workspace.artifact_path", "workspace.snapshot_path"}, ) } return mutator.ResolveVariableReferencesOnlyResources() diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index 8945d20ac47..d50cdbf3060 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -321,7 +321,7 @@ func (t *translateContext) rewriteValue(ctx context.Context, p dyn.Path, v dyn.V func applyTranslations(ctx context.Context, b *bundle.Bundle, t *translateContext, translations []func(context.Context, dyn.Value) (dyn.Value, error)) diag.Diagnostics { switch { - case b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder: + case b.IsImmutableFolder(): // Reject an explicit workspace.file_path: immutable bundles control that path // automatically (it is set to the content-addressed snapshot location after upload). // A user-supplied value would be silently discarded, so we error early instead. diff --git a/bundle/deploy/ensure_deployment_id.go b/bundle/deploy/ensure_deployment_id.go deleted file mode 100644 index 883b71cfdd8..00000000000 --- a/bundle/deploy/ensure_deployment_id.go +++ /dev/null @@ -1,40 +0,0 @@ -package deploy - -import ( - "context" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/libs/diag" - "github.com/google/uuid" -) - -type ensureDeploymentID struct{} - -// EnsureDeploymentID loads the local deployment state and ensures -// b.Metrics.DeploymentId is populated before the snapshot upload. -// snapshot.Upload uses it as the bundle_id so the snapshot is keyed to this -// deployment lineage rather than to the bundle name. -// StateUpdate reads the same field back and persists it to disk. -func EnsureDeploymentID() bundle.Mutator { - return &ensureDeploymentID{} -} - -func (*ensureDeploymentID) Name() string { return "deploy:ensure-deployment-id" } - -func (*ensureDeploymentID) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - // Already set (e.g. by a prior call in the same session). - if b.Metrics.DeploymentId != uuid.Nil { - return nil - } - - state, err := load(ctx, b) - if err != nil { - return diag.FromErr(err) - } - - if state.ID == uuid.Nil { - state.ID = uuid.New() - } - b.Metrics.DeploymentId = state.ID - return nil -} diff --git a/bundle/deploy/snapshot/upload.go b/bundle/deploy/snapshot/upload.go index 9788fa49658..150bb663227 100644 --- a/bundle/deploy/snapshot/upload.go +++ b/bundle/deploy/snapshot/upload.go @@ -50,7 +50,7 @@ func (m *snapshotUpload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn // Use the deployment lineage UUID as bundle_id so the snapshot directory is // keyed to this specific deployment (not to the bundle name, which can be // reused across unrelated deployments). - bundleID := b.Metrics.DeploymentId.String() + bundleID := b.DeploymentBundle.StateDB.GetOrInitLineage() info, err := uploader.Upload(ctx, bundleID, snapshotID, acl, zipContent) if err != nil { return diag.FromErr(err) diff --git a/bundle/deploy/state_update.go b/bundle/deploy/state_update.go index f7d057b3a5c..7b067f434db 100644 --- a/bundle/deploy/state_update.go +++ b/bundle/deploy/state_update.go @@ -49,11 +49,7 @@ func (s *stateUpdate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnost // Persist the snapshot path so destroy on a different machine can find it. state.SnapshotPath = b.Config.Workspace.SnapshotPath - // Use the UUID already set by EnsureDeploymentID if available; otherwise - // generate a new one (covers paths that skip EnsureDeploymentID). - if b.Metrics.DeploymentId != uuid.Nil { - state.ID = b.Metrics.DeploymentId - } else if state.ID == uuid.Nil { + if state.ID == uuid.Nil { state.ID = uuid.New() } b.Metrics.DeploymentId = state.ID diff --git a/bundle/direct/bundle_plan.go b/bundle/direct/bundle_plan.go index d890b8d5d7b..b25678e251e 100644 --- a/bundle/direct/bundle_plan.go +++ b/bundle/direct/bundle_plan.go @@ -658,6 +658,12 @@ func splitResourcePath(path *structpath.PathNode) (string, *structpath.PathNode) } func (b *DeploymentBundle) LookupReferencePreDeploy(ctx context.Context, path *structpath.PathNode) (any, error) { + // ${workspace.snapshot_path} is resolved by the mutator pipeline after + // snapshot.Upload() — not by the direct engine. Return errDelayed so the + // template string is preserved in the plan output rather than causing an error. + if path.String() == "workspace.snapshot_path" { + return nil, errDelayed + } targetResourceKey, fieldPath := splitResourcePath(path) targetGroup := config.GetResourceTypeFromKey(targetResourceKey) @@ -967,6 +973,11 @@ func (b *DeploymentBundle) makePlan(ctx context.Context, configRoot *config.Root targetNodeDP, _ := config.GetNodeAndType(targetPathParsed) targetNode := targetNodeDP.String() + // Skip non-resource references (e.g. ${workspace.snapshot_path}). + // They are not resource cross-references and don't create DAG edges. + if targetNode == "" { + continue + } fullRef := "${" + targetPath + "}" diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index 133b3786231..a868bdc98f7 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -89,9 +89,6 @@ bundle: "fail_on_active_runs": "description": |- Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. - "immutable_folder": - "description": |- - Whether to deploy bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a content-addressed archive and workspace.file_path and workspace.artifact_path are set to the resulting location. The validate and plan commands make no mutative API calls when this is enabled. "lock": "description": |- The deployment lock attributes. @@ -142,6 +139,9 @@ experimental: "description": |- Defines attributes for experimental features. "$fields": + "immutable_folder": + "description": |- + Whether to deploy bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a content-addressed archive and workspace.file_path and workspace.artifact_path are set to the resulting location. "pydabs": "description": |- The PyDABs configuration. diff --git a/bundle/permissions/workspace_root.go b/bundle/permissions/workspace_root.go index 307869621ea..3d130d2c925 100644 --- a/bundle/permissions/workspace_root.go +++ b/bundle/permissions/workspace_root.go @@ -30,7 +30,7 @@ func (*workspaceRootPermissions) Name() string { // Apply implements bundle.Mutator. func (*workspaceRootPermissions) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { workspace := b.Config.Workspace - if b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder { + if b.IsImmutableFolder() { // For immutable bundles, file_path and artifact_path point into content-addressed // snapshot storage that is not a normal workspace folder. Clear them so that // giveAccessForWorkspaceRoot only applies permissions to root_path (and the diff --git a/bundle/phases/build.go b/bundle/phases/build.go index 90eb13a1c6f..db376e07e28 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -50,7 +50,7 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { // For immutable bundles, library remote paths are set in the deploy phase // after snapshot.Upload() provides the content-addressed workspace.artifact_path. - if b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder { + if b.IsImmutableFolder() { return nil } diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 6002256e8f0..71b818f7e8a 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -149,19 +149,17 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand bundle.ApplyContext(ctx, b, lock.Release(lock.GoalDeploy)) }() - if b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder && !engine.IsDirect() { + immutable := b.IsImmutableFolder() + if immutable && !engine.IsDirect() { logdiag.LogError(ctx, errors.New("experimental.immutable_folder is only supported with the direct deployment engine")) return } - if b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder { + if immutable { // Upload all source files and built artifacts as a single immutable snapshot. - // EnsureDeploymentID populates b.Metrics.DeploymentId (the lineage UUID) so - // Upload can use it as bundle_id. snapshot.Upload() then sets - // workspace.snapshot_path; the variable-resolution pass expands - // ${workspace.snapshot_path} placeholders written by translate_paths. + // snapshot.Upload() sets workspace.snapshot_path; the variable-resolution + // pass expands ${workspace.snapshot_path} placeholders written by translate_paths. bundle.ApplySeqContext(ctx, b, - deploy.EnsureDeploymentID(), snapshot.Upload(), mutator.ResolveVariableReferencesOnlyResources("workspace"), ) @@ -179,7 +177,7 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand return } - if b.Config.Experimental == nil || !b.Config.Experimental.ImmutableFolder { + if !immutable { bundle.ApplySeqContext(ctx, b, files.Upload(outputHandler)) if logdiag.HasError(ctx) { return @@ -256,6 +254,26 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand func RunPlan(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) *deployplan.Plan { if engine.IsDirect() { + // When planning in immutable mode, ${workspace.snapshot_path} placeholders + // written by translate_paths must be resolved before CalculatePlan parses + // the resource DAG. If snapshot.Upload() already ran (Deploy calls RunPlan + // after uploading), SnapshotPath is set and this is a no-op. When called + // standalone (bundle plan), we load the previous snapshot path from state. + // If no state exists yet (first deploy), SnapshotPath stays empty and + // ${workspace.snapshot_path} is left as a literal template for CalculatePlan + // to preserve in the plan output. + if b.IsImmutableFolder() && b.Config.Workspace.SnapshotPath == "" { + bundle.ApplySeqContext(ctx, b, snapshot.LoadState()) + if logdiag.HasError(ctx) { + return nil + } + if b.Config.Workspace.SnapshotPath != "" { + bundle.ApplySeqContext(ctx, b, mutator.ResolveVariableReferencesOnlyResources("workspace")) + if logdiag.HasError(ctx) { + return nil + } + } + } plan, err := b.DeploymentBundle.CalculatePlan(ctx, b.WorkspaceClient(ctx), &b.Config) if err != nil { logdiag.LogError(ctx, err) diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index 415df0a1cfc..a06471f496d 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -10,7 +10,6 @@ import ( "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" - "github.com/databricks/cli/bundle/deploy/snapshot" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/deployplan" "github.com/databricks/cli/bundle/direct" @@ -107,9 +106,7 @@ func destroyCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, e return } - if !logdiag.HasError(ctx) { - cmdio.LogString(ctx, "Destroy complete!") - } + cmdio.LogString(ctx, "Destroy complete!") } // The destroy phase deletes artifacts and resources. @@ -136,19 +133,6 @@ func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { bundle.ApplyContext(ctx, b, lock.Release(lock.GoalDestroy)) }() - if b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder { - // Restore snapshot_path from local (or remote) state so that the - // ${workspace.snapshot_path} placeholders written by translate_paths can be - // resolved before Terraform (or direct) processes the resource config. - bundle.ApplySeqContext(ctx, b, - snapshot.LoadState(), - mutator.ResolveVariableReferencesOnlyResources("workspace"), - ) - if logdiag.HasError(ctx) { - return - } - } - if !engine.IsDirect() { bundle.ApplySeqContext(ctx, b, // We need to resolve artifact variable (how we do it in build phase) diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 33b39af71e3..1bbbd3f7246 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -2727,10 +2727,6 @@ "description": "Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted.", "$ref": "#/$defs/bool" }, - "immutable_folder": { - "description": "Whether to deploy bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a content-addressed archive and workspace.file_path and workspace.artifact_path are set to the resulting location. The validate and plan commands make no mutative API calls when this is enabled.", - "$ref": "#/$defs/bool" - }, "lock": { "description": "The deployment lock attributes.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Lock" @@ -2749,6 +2745,10 @@ { "type": "object", "properties": { + "immutable_folder": { + "description": "Whether to deploy bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a content-addressed archive and workspace.file_path and workspace.artifact_path are set to the resulting location.", + "$ref": "#/$defs/bool" + }, "pydabs": { "description": "The PyDABs configuration.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.PyDABs", From e2a5b2a9e6125d8767152236cc4cad60f63c25ca Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 24 Jun 2026 15:26:19 +0200 Subject: [PATCH 22/31] simplify and clean up --- .../deploy/immutable-no-artifacts/test.toml | 2 +- acceptance/bundle/deploy/immutable/test.toml | 2 +- .../resources/apps/immutable/output.txt | 2 +- .../bundle/resources/apps/immutable/test.toml | 2 +- bundle/config/workspace.go | 6 +- bundle/deploy/metadata/compute.go | 1 - bundle/deploy/snapshot/state.go | 89 ------------------- bundle/deploy/state.go | 4 - bundle/deploy/state_update.go | 3 - bundle/direct/bundle_plan.go | 6 +- bundle/metadata/metadata.go | 3 - bundle/phases/deploy.go | 20 ----- bundle/phases/destroy.go | 7 +- cmd/bundle/run.go | 15 ---- cmd/bundle/utils/process.go | 1 + 15 files changed, 14 insertions(+), 149 deletions(-) delete mode 100644 bundle/deploy/snapshot/state.go diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml index 22d576a39b9..9b345bc3395 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/test.toml +++ b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml @@ -1,5 +1,5 @@ Local = true -Cloud = true +Cloud = false # Temporary disable cloud tests until the API is fully available RecordRequests = true # immutable_folder only works with the direct engine. diff --git a/acceptance/bundle/deploy/immutable/test.toml b/acceptance/bundle/deploy/immutable/test.toml index 410adac7245..45dffe2d2d7 100644 --- a/acceptance/bundle/deploy/immutable/test.toml +++ b/acceptance/bundle/deploy/immutable/test.toml @@ -1,5 +1,5 @@ Local = true -Cloud = true +Cloud = false # Temporary disable cloud tests until the API is fully available # immutable_folder only works with the direct engine. EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/resources/apps/immutable/output.txt b/acceptance/bundle/resources/apps/immutable/output.txt index 76fc42f91cd..6ba6818e2cc 100644 --- a/acceptance/bundle/resources/apps/immutable/output.txt +++ b/acceptance/bundle/resources/apps/immutable/output.txt @@ -37,6 +37,6 @@ You can access the app at my-immutable-app-123.cloud.databricksapps.com "path": "/api/2.0/apps/my-immutable-app/deployments", "body": { "mode": "SNAPSHOT", - "source_code_path": "/Workspace/Users/[UUID]/.snapshots/[UUID]/[SNAPSHOT_HASH]/src/files/app" + "source_code_path": "${workspace.snapshot_path}/src/files/app" } } diff --git a/acceptance/bundle/resources/apps/immutable/test.toml b/acceptance/bundle/resources/apps/immutable/test.toml index 680263f56d1..005616c8df6 100644 --- a/acceptance/bundle/resources/apps/immutable/test.toml +++ b/acceptance/bundle/resources/apps/immutable/test.toml @@ -1,5 +1,5 @@ Local = true -Cloud = false +Cloud = false # Temporary disable cloud tests until the API is fully available RecordRequests = true # immutable_folder only works with the direct engine. diff --git a/bundle/config/workspace.go b/bundle/config/workspace.go index 81b3837f044..8b869213435 100644 --- a/bundle/config/workspace.go +++ b/bundle/config/workspace.go @@ -80,9 +80,9 @@ type Workspace struct { StatePath string `json:"state_path,omitempty"` // SnapshotPath is the workspace path of the immutable snapshot uploaded during - // deployment. It is set by snapshot.Upload() and persisted to local state so - // that snapshot.LoadState() can restore workspace.snapshot_path for destroy/run. - // Only populated for bundles with experimental.immutable_folder = true. + // deployment. Set by snapshot.Upload() and used by the subsequent variable-resolution + // pass to expand ${workspace.snapshot_path} placeholders in resource configs. + // Only populated at runtime for bundles with experimental.immutable_folder = true. SnapshotPath string `json:"snapshot_path,omitempty" bundle:"internal"` } diff --git a/bundle/deploy/metadata/compute.go b/bundle/deploy/metadata/compute.go index 08a45d7a17c..cb7be9811c4 100644 --- a/bundle/deploy/metadata/compute.go +++ b/bundle/deploy/metadata/compute.go @@ -105,7 +105,6 @@ func (m *compute) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics // Set file upload destination of the bundle in metadata b.Metadata.Config.Workspace.FilePath = b.Config.Workspace.FilePath - b.Metadata.Config.Workspace.SnapshotPath = b.Config.Workspace.SnapshotPath // In source-linked deployment files are not copied and resources use source files, therefore we use sync path as file path in metadata if config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { b.Metadata.Config.Workspace.FilePath = b.SyncRootPath diff --git a/bundle/deploy/snapshot/state.go b/bundle/deploy/snapshot/state.go deleted file mode 100644 index 981efada289..00000000000 --- a/bundle/deploy/snapshot/state.go +++ /dev/null @@ -1,89 +0,0 @@ -package snapshot - -import ( - "context" - "encoding/json" - "errors" - "io/fs" - "os" - "path" - "path/filepath" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/deploy" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/filer" -) - -type loadState struct{} - -// LoadState reads workspace.snapshot_path from the local deployment.json and -// sets the snapshot-derived workspace paths. Missing or empty state is treated -// as a no-op so destroy can proceed against bundles deployed before this -// feature was added. -func LoadState() bundle.Mutator { - return &loadState{} -} - -func (s *loadState) Name() string { return "snapshot.LoadState" } - -func (s *loadState) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - localPath := filepath.Join(b.GetLocalStateDir(ctx), deploy.DeploymentStateFileName) - data, err := os.ReadFile(localPath) - if err != nil && !errors.Is(err, fs.ErrNotExist) { - return diag.FromErr(err) - } - - if err == nil { - var state struct { - SnapshotPath string `json:"snapshot_path"` - } - if jsonErr := json.Unmarshal(data, &state); jsonErr != nil { - return diag.FromErr(jsonErr) - } - if state.SnapshotPath != "" { - applySnapshotPath(b, state.SnapshotPath) - return nil - } - } - - // Local deployment.json is missing or was from a non-immutable deploy — fall - // back to the remote copy so destroy works on a fresh clone or a different machine. - return s.loadFromRemote(ctx, b) -} - -func (s *loadState) loadFromRemote(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - f, err := filer.NewWorkspaceFilesClient(b.WorkspaceClient(ctx), b.Config.Workspace.StatePath) - if err != nil { - return diag.FromErr(err) - } - - r, err := f.Read(ctx, deploy.DeploymentStateFileName) - if errors.Is(err, fs.ErrNotExist) { - return nil - } - if err != nil { - return diag.FromErr(err) - } - defer r.Close() - - var state struct { - SnapshotPath string `json:"snapshot_path"` - } - if err := json.NewDecoder(r).Decode(&state); err != nil { - return diag.FromErr(err) - } - - if state.SnapshotPath != "" { - applySnapshotPath(b, state.SnapshotPath) - } - return nil -} - -func applySnapshotPath(b *bundle.Bundle, snapshotPath string) { - b.Config.Workspace.SnapshotPath = snapshotPath - // Restore FilePath and ArtifactPath for other callers (permissions checks, etc.). - // The resource paths themselves are resolved later via ResolveVariableReferencesOnlyResources("workspace"). - b.Config.Workspace.FilePath = path.Join(snapshotPath, "src", "files") - b.Config.Workspace.ArtifactPath = path.Join(snapshotPath, "src", "artifacts") -} diff --git a/bundle/deploy/state.go b/bundle/deploy/state.go index bf60d0959fe..5d99748fb71 100644 --- a/bundle/deploy/state.go +++ b/bundle/deploy/state.go @@ -53,10 +53,6 @@ type DeploymentState struct { // UUID uniquely identifying the deployment. ID uuid.UUID `json:"id"` - - // SnapshotPath is the remote content-addressed path for immutable folder deployments. - // Empty for non-immutable deployments. - SnapshotPath string `json:"snapshot_path,omitempty"` } // We use this entry type as a proxy to fs.DirEntry. diff --git a/bundle/deploy/state_update.go b/bundle/deploy/state_update.go index 7b067f434db..fbf9f26a7ab 100644 --- a/bundle/deploy/state_update.go +++ b/bundle/deploy/state_update.go @@ -46,9 +46,6 @@ func (s *stateUpdate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnost } state.Files = fl - // Persist the snapshot path so destroy on a different machine can find it. - state.SnapshotPath = b.Config.Workspace.SnapshotPath - if state.ID == uuid.Nil { state.ID = uuid.New() } diff --git a/bundle/direct/bundle_plan.go b/bundle/direct/bundle_plan.go index b25678e251e..b3b8d66cba3 100644 --- a/bundle/direct/bundle_plan.go +++ b/bundle/direct/bundle_plan.go @@ -973,9 +973,9 @@ func (b *DeploymentBundle) makePlan(ctx context.Context, configRoot *config.Root targetNodeDP, _ := config.GetNodeAndType(targetPathParsed) targetNode := targetNodeDP.String() - // Skip non-resource references (e.g. ${workspace.snapshot_path}). - // They are not resource cross-references and don't create DAG edges. - if targetNode == "" { + // ${workspace.snapshot_path} is resolved by the mutator pipeline after + // snapshot.Upload(), not by the direct engine — skip it here. + if targetPath == "workspace.snapshot_path" { continue } diff --git a/bundle/metadata/metadata.go b/bundle/metadata/metadata.go index e5549472696..1c61cb093f0 100644 --- a/bundle/metadata/metadata.go +++ b/bundle/metadata/metadata.go @@ -15,9 +15,6 @@ type Bundle struct { type Workspace struct { FilePath string `json:"file_path"` - // SnapshotPath is the workspace path of the immutable snapshot uploaded - // during deployment. Only populated for bundles with bundle.immutable = true. - SnapshotPath string `json:"snapshot_path,omitempty"` } type Resource struct { diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 71b818f7e8a..7a906be8b5b 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -254,26 +254,6 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand func RunPlan(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) *deployplan.Plan { if engine.IsDirect() { - // When planning in immutable mode, ${workspace.snapshot_path} placeholders - // written by translate_paths must be resolved before CalculatePlan parses - // the resource DAG. If snapshot.Upload() already ran (Deploy calls RunPlan - // after uploading), SnapshotPath is set and this is a no-op. When called - // standalone (bundle plan), we load the previous snapshot path from state. - // If no state exists yet (first deploy), SnapshotPath stays empty and - // ${workspace.snapshot_path} is left as a literal template for CalculatePlan - // to preserve in the plan output. - if b.IsImmutableFolder() && b.Config.Workspace.SnapshotPath == "" { - bundle.ApplySeqContext(ctx, b, snapshot.LoadState()) - if logdiag.HasError(ctx) { - return nil - } - if b.Config.Workspace.SnapshotPath != "" { - bundle.ApplySeqContext(ctx, b, mutator.ResolveVariableReferencesOnlyResources("workspace")) - if logdiag.HasError(ctx) { - return nil - } - } - } plan, err := b.DeploymentBundle.CalculatePlan(ctx, b.WorkspaceClient(ctx), &b.Config) if err != nil { logdiag.LogError(ctx, err) diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index a06471f496d..74049f26f42 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -102,11 +102,9 @@ func destroyCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, e bundle.ApplyContext(ctx, b, files.Delete()) - if logdiag.HasError(ctx) { - return + if !logdiag.HasError(ctx) { + cmdio.LogString(ctx, "Destroy complete!") } - - cmdio.LogString(ctx, "Destroy complete!") } // The destroy phase deletes artifacts and resources. @@ -140,6 +138,7 @@ func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { // Not resolving might lead to terraform "Reference to undeclared resource" error mutator.ResolveVariableReferencesWithoutResources("artifacts"), mutator.ResolveVariableReferencesOnlyResources("artifacts"), + terraform.Interpolate(), terraform.Write(), terraform.Plan(terraform.PlanGoal("destroy")), diff --git a/cmd/bundle/run.go b/cmd/bundle/run.go index e05da4be5ab..e98fe59ac4e 100644 --- a/cmd/bundle/run.go +++ b/cmd/bundle/run.go @@ -10,8 +10,6 @@ import ( "slices" "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config/mutator" - "github.com/databricks/cli/bundle/deploy/snapshot" "github.com/databricks/cli/bundle/env" "github.com/databricks/cli/bundle/resources" "github.com/databricks/cli/bundle/run" @@ -173,19 +171,6 @@ Example usage: return nil }, PostStateFunc: func(ctx context.Context, b *bundle.Bundle, stateDesc *statemgmt.StateDesc) error { - if b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder { - // Restore workspace.snapshot_path from local state so that the - // ${workspace.snapshot_path} placeholders written by translate_paths - // resolve to the actual content-addressed remote paths before running. - bundle.ApplySeqContext(ctx, b, - snapshot.LoadState(), - mutator.ResolveVariableReferencesOnlyResources("workspace"), - ) - if logdiag.HasError(ctx) { - return root.ErrAlreadyPrinted - } - } - runner, err := keyToRunner(b, key) if err != nil { return err diff --git a/cmd/bundle/utils/process.go b/cmd/bundle/utils/process.go index 683477c465f..d61c4525530 100644 --- a/cmd/bundle/utils/process.go +++ b/cmd/bundle/utils/process.go @@ -297,6 +297,7 @@ func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (b *bundle.Bundle } var libs phases.LibLocationMap + if opts.Build { t2 := time.Now() libs = phases.Build(ctx, b) From 6f3f077d196fc4d986f43d08cfe339b4fb9be5e7 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 24 Jun 2026 15:29:53 +0200 Subject: [PATCH 23/31] update out.test.toml --- acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml | 2 +- acceptance/bundle/deploy/immutable/out.test.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml index 9cfad3fb0d5..e90b6d5d1ba 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml +++ b/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml @@ -1,3 +1,3 @@ Local = true -Cloud = true +Cloud = false EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/deploy/immutable/out.test.toml b/acceptance/bundle/deploy/immutable/out.test.toml index 9cfad3fb0d5..e90b6d5d1ba 100644 --- a/acceptance/bundle/deploy/immutable/out.test.toml +++ b/acceptance/bundle/deploy/immutable/out.test.toml @@ -1,3 +1,3 @@ Local = true -Cloud = true +Cloud = false EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] From 5cefc2f64f274bdb52c3551c3f69e3b95e2446ea Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 24 Jun 2026 15:31:18 +0200 Subject: [PATCH 24/31] comment --- bundle/deploy/state_update.go | 1 + 1 file changed, 1 insertion(+) diff --git a/bundle/deploy/state_update.go b/bundle/deploy/state_update.go index fbf9f26a7ab..55cf2393bf1 100644 --- a/bundle/deploy/state_update.go +++ b/bundle/deploy/state_update.go @@ -46,6 +46,7 @@ func (s *stateUpdate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnost } state.Files = fl + // Generate a UUID for the deployment, if one does not already exist if state.ID == uuid.Nil { state.ID = uuid.New() } From 55acaf4cb5fa9c046438513d3fca7aa17ea920e7 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 24 Jun 2026 16:10:48 +0200 Subject: [PATCH 25/31] fix test on windows --- .../bundle/deploy/immutable/out.plan.json | 62 ------------------- acceptance/bundle/deploy/immutable/output.txt | 23 +++++++ acceptance/bundle/deploy/immutable/script | 2 +- 3 files changed, 24 insertions(+), 63 deletions(-) delete mode 100644 acceptance/bundle/deploy/immutable/out.plan.json diff --git a/acceptance/bundle/deploy/immutable/out.plan.json b/acceptance/bundle/deploy/immutable/out.plan.json deleted file mode 100644 index 846b794c926..00000000000 --- a/acceptance/bundle/deploy/immutable/out.plan.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "plan_version": 2, - "cli_version": "[DEV_VERSION]", - "plan": { - "resources.jobs.my_job": { - "action": "create", - "new_state": { - "value": { - "deployment": { - "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-[UNIQUE_NAME]/default/state/metadata.json" - }, - "edit_mode": "UI_LOCKED", - "environments": [ - { - "environment_key": "env", - "spec": { - "dependencies": [ - "dist/immutable-0.0.1-py3-none-any.whl" - ], - "environment_version": "4" - } - } - ], - "format": "MULTI_TASK", - "max_concurrent_runs": 1, - "name": "my job", - "queue": { - "enabled": true - }, - "tasks": [ - { - "notebook_task": { - "notebook_path": "${workspace.snapshot_path}/src/files/src/notebook" - }, - "task_key": "notebook_task" - }, - { - "environment_key": "env", - "python_wheel_task": { - "entry_point": "main", - "package_name": "immutable" - }, - "task_key": "python_wheel_task" - }, - { - "environment_key": "env", - "spark_python_task": { - "python_file": "${workspace.snapshot_path}/src/files/src/main.py" - }, - "task_key": "spark_python_task" - } - ] - }, - "vars": { - "tasks[0].notebook_task.notebook_path": "${workspace.snapshot_path}/src/files/src/notebook", - "tasks[2].spark_python_task.python_file": "${workspace.snapshot_path}/src/files/src/main.py" - } - } - } - } -} diff --git a/acceptance/bundle/deploy/immutable/output.txt b/acceptance/bundle/deploy/immutable/output.txt index e7dee7f2537..211282542bd 100644 --- a/acceptance/bundle/deploy/immutable/output.txt +++ b/acceptance/bundle/deploy/immutable/output.txt @@ -10,6 +10,29 @@ Validation OK! >>> [CLI] bundle plan -o json Building python_artifact... +[ + { + "notebook_task": { + "notebook_path": "${workspace.snapshot_path}/src/files/src/notebook" + }, + "task_key": "notebook_task" + }, + { + "environment_key": "env", + "python_wheel_task": { + "entry_point": "main", + "package_name": "immutable" + }, + "task_key": "python_wheel_task" + }, + { + "environment_key": "env", + "spark_python_task": { + "python_file": "${workspace.snapshot_path}/src/files/src/main.py" + }, + "task_key": "spark_python_task" + } +] >>> [CLI] bundle deploy Building python_artifact... diff --git a/acceptance/bundle/deploy/immutable/script b/acceptance/bundle/deploy/immutable/script index 806eebb7755..85e9a908d1d 100644 --- a/acceptance/bundle/deploy/immutable/script +++ b/acceptance/bundle/deploy/immutable/script @@ -5,7 +5,7 @@ cleanup() { trap cleanup EXIT trace $CLI bundle validate -trace $CLI bundle plan -o json > out.plan.json +trace $CLI bundle plan -o json | jq '.plan["resources.jobs.my_job"].new_state.value.tasks' trace $CLI bundle deploy From 60e41d33619ad1eaa3adf3adc9c9cbeeceb8d3f8 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 24 Jun 2026 16:55:14 +0200 Subject: [PATCH 26/31] added client side validation for size of upload --- bundle/deploy/snapshot/path.go | 31 ++++---- bundle/deploy/snapshot/path_test.go | 16 ++-- bundle/deploy/snapshot/upload.go | 14 +++- bundle/deploy/snapshot/upload_warning_test.go | 75 +++++++++++++++++++ 4 files changed, 112 insertions(+), 24 deletions(-) create mode 100644 bundle/deploy/snapshot/upload_warning_test.go diff --git a/bundle/deploy/snapshot/path.go b/bundle/deploy/snapshot/path.go index 95a24bef2cf..9d0ebdce16a 100644 --- a/bundle/deploy/snapshot/path.go +++ b/bundle/deploy/snapshot/path.go @@ -32,21 +32,23 @@ var zipEpoch = time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) // // The snapshot ID is always IDFromContent(BundleZip(b)), ensuring the // pre-calculated path and the uploaded path are derived from the same content. -func BundleZip(ctx context.Context, b *bundle.Bundle) ([]byte, error) { +// The second return value is the number of sync-root files included in the zip. +func BundleZip(ctx context.Context, b *bundle.Bundle) ([]byte, int, error) { var buf bytes.Buffer zw := zip.NewWriter(&buf) - if err := addSyncRootToZip(ctx, zw, b); err != nil { - return nil, err + fileCount, err := addSyncRootToZip(ctx, zw, b) + if err != nil { + return nil, 0, err } if err := addArtifactsToZip(zw, b); err != nil { - return nil, err + return nil, 0, err } if err := zw.Close(); err != nil { - return nil, err + return nil, 0, err } - return buf.Bytes(), nil + return buf.Bytes(), fileCount, nil } // IDFromContent returns the SHA-256 hex digest of content. @@ -59,21 +61,22 @@ func IDFromContent(content []byte) string { // Called after artifacts are built so that ApplyImmutableWorkspacePaths and // snapshot.Upload both hash identical content. func SnapshotID(ctx context.Context, b *bundle.Bundle) (string, error) { - content, err := BundleZip(ctx, b) + content, _, err := BundleZip(ctx, b) if err != nil { return "", err } return IDFromContent(content), nil } -func addSyncRootToZip(ctx context.Context, zw *zip.Writer, b *bundle.Bundle) error { +// addSyncRootToZip returns the number of files added from the sync root. +func addSyncRootToZip(ctx context.Context, zw *zip.Writer, b *bundle.Bundle) (int, error) { opts, err := files.GetSyncOptions(ctx, b) if err != nil { - return err + return 0, err } fileList, err := libsync.GetFileList(ctx, *opts) if err != nil { - return err + return 0, err } // Sort for a stable zip (same content → same hash regardless of iteration order). slices.SortFunc(fileList, func(a, b fileset.File) int { @@ -89,7 +92,7 @@ func addSyncRootToZip(ctx context.Context, zw *zip.Writer, b *bundle.Bundle) err for _, f := range fileList { rc, err := b.SyncRoot.Open(f.Relative) if err != nil { - return fmt.Errorf("open %s: %w", f.Relative, err) + return 0, fmt.Errorf("open %s: %w", f.Relative, err) } entryPath := filepath.ToSlash(f.Relative) @@ -101,15 +104,15 @@ func addSyncRootToZip(ctx context.Context, zw *zip.Writer, b *bundle.Bundle) err w, err := zw.CreateHeader(h) if err != nil { rc.Close() - return fmt.Errorf("zip entry for %s: %w", f.Relative, err) + return 0, fmt.Errorf("zip entry for %s: %w", f.Relative, err) } _, err = io.Copy(w, rc) rc.Close() if err != nil { - return fmt.Errorf("write %s: %w", f.Relative, err) + return 0, fmt.Errorf("write %s: %w", f.Relative, err) } } - return nil + return len(fileList), nil } func addArtifactsToZip(zw *zip.Writer, b *bundle.Bundle) error { diff --git a/bundle/deploy/snapshot/path_test.go b/bundle/deploy/snapshot/path_test.go index f782a45bb42..ee8251fe316 100644 --- a/bundle/deploy/snapshot/path_test.go +++ b/bundle/deploy/snapshot/path_test.go @@ -43,9 +43,9 @@ func TestBundleZipIsDeterministic(t *testing.T) { "src/task.py": "def run(): pass", }) - zip1, err := snapshot.BundleZip(t.Context(), b) + zip1, _, err := snapshot.BundleZip(t.Context(), b) require.NoError(t, err) - zip2, err := snapshot.BundleZip(t.Context(), b) + zip2, _, err := snapshot.BundleZip(t.Context(), b) require.NoError(t, err) assert.Equal(t, zip1, zip2, "BundleZip must produce identical bytes for identical content") @@ -55,9 +55,9 @@ func TestBundleZipChangesWithContent(t *testing.T) { b1 := makeBundleWithFiles(t, map[string]string{"main.py": "v1"}) b2 := makeBundleWithFiles(t, map[string]string{"main.py": "v2"}) - zip1, err := snapshot.BundleZip(t.Context(), b1) + zip1, _, err := snapshot.BundleZip(t.Context(), b1) require.NoError(t, err) - zip2, err := snapshot.BundleZip(t.Context(), b2) + zip2, _, err := snapshot.BundleZip(t.Context(), b2) require.NoError(t, err) assert.NotEqual(t, zip1, zip2, "different file content must produce different zips") @@ -74,9 +74,9 @@ func TestBundleZipRespectsExcludes(t *testing.T) { }) bExclude.Config.Sync.Exclude = []string{"*.json"} - zipAll, err := snapshot.BundleZip(t.Context(), b) + zipAll, _, err := snapshot.BundleZip(t.Context(), b) require.NoError(t, err) - zipExcl, err := snapshot.BundleZip(t.Context(), bExclude) + zipExcl, _, err := snapshot.BundleZip(t.Context(), bExclude) require.NoError(t, err) // The zip without the excluded file should be smaller and different. @@ -94,7 +94,7 @@ func TestIDFromContent(t *testing.T) { func TestSnapshotIDMatchesBundleZipHash(t *testing.T) { b := makeBundleWithFiles(t, map[string]string{"task.py": "x = 1"}) - zipContent, err := snapshot.BundleZip(t.Context(), b) + zipContent, _, err := snapshot.BundleZip(t.Context(), b) require.NoError(t, err) expectedID := snapshot.IDFromContent(zipContent) @@ -123,7 +123,7 @@ func TestBundleZipDoNotStripNotebookExtensions(t *testing.T) { "src/script.py": "print('hello')", }) - zipContent, err := snapshot.BundleZip(t.Context(), b) + zipContent, _, err := snapshot.BundleZip(t.Context(), b) require.NoError(t, err) names := zipEntryNames(t, zipContent) diff --git a/bundle/deploy/snapshot/upload.go b/bundle/deploy/snapshot/upload.go index 150bb663227..ca85df0e326 100644 --- a/bundle/deploy/snapshot/upload.go +++ b/bundle/deploy/snapshot/upload.go @@ -11,6 +11,9 @@ import ( "github.com/databricks/cli/libs/log" ) +// fileLimitWarning is the file count above which immutable folder deployments may fail. +const fileLimitWarning = 1000 + type snapshotUpload struct { // uploader allows test injection of a custom SnapshotUploader. uploader SnapshotUploader @@ -39,10 +42,17 @@ func (m *snapshotUpload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn cmdio.LogString(ctx, "Uploading immutable bundle snapshot...") - zipContent, err := BundleZip(ctx, b) + zipContent, fileCount, err := BundleZip(ctx, b) if err != nil { return diag.FromErr(fmt.Errorf("failed to build snapshot zip: %w", err)) } + var diags diag.Diagnostics + if fileCount > fileLimitWarning { + diags = append(diags, diag.Warningf( + "immutable folder deployment may not work correctly: bundle contains %d files (limit is %d)", + fileCount, fileLimitWarning, + )...) + } snapshotID := IDFromContent(zipContent) log.Debugf(ctx, "snapshot.Upload: snapshotID=%s zip=%d bytes", snapshotID, len(zipContent)) @@ -67,7 +77,7 @@ func (m *snapshotUpload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn b.Config.Workspace.ArtifactPath = path.Join(info.Path, "src", "artifacts") } - return nil + return diags } // BuildACL constructs the access_control_list for the snapshot upload. diff --git a/bundle/deploy/snapshot/upload_warning_test.go b/bundle/deploy/snapshot/upload_warning_test.go new file mode 100644 index 00000000000..fd8ad648b81 --- /dev/null +++ b/bundle/deploy/snapshot/upload_warning_test.go @@ -0,0 +1,75 @@ +package snapshot + +import ( + "context" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/logdiag" + "github.com/databricks/cli/libs/vfs" + "github.com/databricks/databricks-sdk-go/service/iam" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type mockUploader struct{ path string } + +func (m *mockUploader) Upload(_ context.Context, _, _ string, _ []ACLEntry, _ []byte) (*SnapshotInfo, error) { + return &SnapshotInfo{Path: m.path}, nil +} + +func makeBundle(t *testing.T, nFiles int) *bundle.Bundle { + t.Helper() + dir := t.TempDir() + for i := range nFiles { + p := filepath.Join(dir, fmt.Sprintf("f%d.py", i)) + require.NoError(t, os.WriteFile(p, []byte("x"), 0o644)) + } + root := vfs.MustNew(dir) + b := &bundle.Bundle{ + BundleRootPath: dir, + SyncRoot: root, + WorktreeRoot: root, + Config: config.Root{ + Bundle: config.Bundle{Target: "default"}, + Workspace: config.Workspace{ + CurrentUser: &config.User{ + User: &iam.User{UserName: "test@example.test"}, + }, + }, + }, + } + return b +} + +func testContext(t *testing.T) context.Context { + t.Helper() + return logdiag.InitContext(cmdio.MockDiscard(t.Context())) +} + +func TestUploadWarnsAboveFileLimit(t *testing.T) { + b := makeBundle(t, fileLimitWarning+1) + m := &snapshotUpload{uploader: &mockUploader{path: "/snapshots/test"}} + + diags := m.Apply(testContext(t), b) + + require.Equal(t, 1, len(diags)) + assert.Equal(t, diag.Warning, diags[0].Severity) + assert.Contains(t, diags[0].Summary, fmt.Sprintf("%d files", fileLimitWarning+1)) + assert.Equal(t, "/snapshots/test", b.Config.Workspace.SnapshotPath) +} + +func TestUploadNoWarningBelowFileLimit(t *testing.T) { + b := makeBundle(t, 5) + m := &snapshotUpload{uploader: &mockUploader{path: "/snapshots/test"}} + + diags := m.Apply(testContext(t), b) + + assert.True(t, diags.HasError() == false && len(diags) == 0, "expected no diagnostics") +} From 3d39f44903c30d3534e8abdce76334afc47561a5 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 25 Jun 2026 11:27:59 +0200 Subject: [PATCH 27/31] fix lint --- bundle/deploy/snapshot/upload_warning_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle/deploy/snapshot/upload_warning_test.go b/bundle/deploy/snapshot/upload_warning_test.go index fd8ad648b81..c08b6a87b5a 100644 --- a/bundle/deploy/snapshot/upload_warning_test.go +++ b/bundle/deploy/snapshot/upload_warning_test.go @@ -59,7 +59,7 @@ func TestUploadWarnsAboveFileLimit(t *testing.T) { diags := m.Apply(testContext(t), b) - require.Equal(t, 1, len(diags)) + require.Len(t, diags, 1) assert.Equal(t, diag.Warning, diags[0].Severity) assert.Contains(t, diags[0].Summary, fmt.Sprintf("%d files", fileLimitWarning+1)) assert.Equal(t, "/snapshots/test", b.Config.Workspace.SnapshotPath) From b9e3046f36a09af171175c06cbc473b830802027 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 25 Jun 2026 15:52:02 +0200 Subject: [PATCH 28/31] renamed variable --- bundle/config/mutator/override_immutable_folder.go | 4 ++-- bundle/config/mutator/override_immutable_folder_test.go | 6 +++--- bundle/phases/initialize.go | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bundle/config/mutator/override_immutable_folder.go b/bundle/config/mutator/override_immutable_folder.go index 51307523d82..a033ca12e5f 100644 --- a/bundle/config/mutator/override_immutable_folder.go +++ b/bundle/config/mutator/override_immutable_folder.go @@ -12,7 +12,7 @@ import ( type overrideImmutableFolder struct{} // OverrideImmutableFolder sets bundle.deployment.immutable_folder to true -// if the DATABRICKS_IMMUTABLE_FOLDER environment variable is non-empty. +// if the __TEST_DATABRICKS_IMMUTABLE_FOLDER environment variable is non-empty. // This allows running the acceptance test suite against the immutable folder // code path without modifying any databricks.yml files. func OverrideImmutableFolder() bundle.Mutator { @@ -24,7 +24,7 @@ func (m *overrideImmutableFolder) Name() string { } func (m *overrideImmutableFolder) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - if env.Get(ctx, "DATABRICKS_IMMUTABLE_FOLDER") == "" { + if env.Get(ctx, "__TEST_DATABRICKS_IMMUTABLE_FOLDER") == "" { return nil } if b.Config.Experimental == nil { diff --git a/bundle/config/mutator/override_immutable_folder_test.go b/bundle/config/mutator/override_immutable_folder_test.go index e538fa1ffbb..a675974e7ed 100644 --- a/bundle/config/mutator/override_immutable_folder_test.go +++ b/bundle/config/mutator/override_immutable_folder_test.go @@ -13,7 +13,7 @@ import ( func TestOverrideImmutableFolderNotSet(t *testing.T) { t.Parallel() - ctx := env.Set(t.Context(), "DATABRICKS_IMMUTABLE_FOLDER", "") + ctx := env.Set(t.Context(), "__TEST_DATABRICKS_IMMUTABLE_FOLDER", "") b := &bundle.Bundle{Config: config.Root{}} diags := bundle.Apply(ctx, b, mutator.OverrideImmutableFolder()) require.NoError(t, diags.Error()) @@ -22,7 +22,7 @@ func TestOverrideImmutableFolderNotSet(t *testing.T) { func TestOverrideImmutableFolderSet(t *testing.T) { t.Parallel() - ctx := env.Set(t.Context(), "DATABRICKS_IMMUTABLE_FOLDER", "true") + ctx := env.Set(t.Context(), "__TEST_DATABRICKS_IMMUTABLE_FOLDER", "true") b := &bundle.Bundle{Config: config.Root{}} diags := bundle.Apply(ctx, b, mutator.OverrideImmutableFolder()) require.NoError(t, diags.Error()) @@ -32,7 +32,7 @@ func TestOverrideImmutableFolderSet(t *testing.T) { func TestOverrideImmutableFolderAlreadyTrue(t *testing.T) { t.Parallel() - ctx := env.Set(t.Context(), "DATABRICKS_IMMUTABLE_FOLDER", "") + ctx := env.Set(t.Context(), "__TEST_DATABRICKS_IMMUTABLE_FOLDER", "") b := &bundle.Bundle{Config: config.Root{}} b.Config.Experimental = &config.Experimental{ImmutableFolder: true} diags := bundle.Apply(ctx, b, mutator.OverrideImmutableFolder()) diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 0513c870139..8f54c141f80 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -70,7 +70,7 @@ func Initialize(ctx context.Context, b *bundle.Bundle) { // because it affects how workspace variables are resolved. mutator.ApplySourceLinkedDeploymentPreset(), - // Reads (env): DATABRICKS_IMMUTABLE_FOLDER (non-empty value enables immutable folder mode) + // Reads (env): __TEST_DATABRICKS_IMMUTABLE_FOLDER (non-empty value enables immutable folder mode) // Updates (typed): b.Config.Experimental.ImmutableFolder (forces to true when env var is set) // Allows running the full test suite against the immutable folder code path without // modifying any databricks.yml files. From cdbb94e36382b943f1b60f8fdca38202d5249381 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 25 Jun 2026 16:05:45 +0200 Subject: [PATCH 29/31] moved lib resolution to later stage --- bundle/phases/build.go | 25 +------------------------ bundle/phases/deploy.go | 15 +++++++++++---- cmd/bundle/utils/process.go | 6 ++---- 3 files changed, 14 insertions(+), 32 deletions(-) diff --git a/bundle/phases/build.go b/bundle/phases/build.go index db376e07e28..a731b3b6721 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -9,17 +9,11 @@ import ( "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/bundle/scripts" - "github.com/databricks/cli/bundle/trampoline" "github.com/databricks/cli/libs/log" - "github.com/databricks/cli/libs/logdiag" ) -// LibLocationMap maps artifact names to library locations that need uploading. -// Computed by Build and consumed by Deploy to upload the right files. -type LibLocationMap map[string][]libraries.LocationToUpdate - // Build runs the build phase, which builds artifacts. -func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { +func Build(ctx context.Context, b *bundle.Bundle) { log.Info(ctx, "Phase: build") bundle.ApplySeqContext(ctx, b, @@ -43,21 +37,4 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { // SwitchToPatchedWheels must be run after ExpandGlobReferences and after build phase because it Artifact.Source and Artifact.Patched populated libraries.SwitchToPatchedWheels(), ) - - if logdiag.HasError(ctx) { - return nil - } - - // For immutable bundles, library remote paths are set in the deploy phase - // after snapshot.Upload() provides the content-addressed workspace.artifact_path. - if b.IsImmutableFolder() { - return nil - } - - libs, diags := libraries.ReplaceWithRemotePath(ctx, b) - for _, d := range diags { - logdiag.LogDiag(ctx, d) - } - bundle.ApplyContext(ctx, b, trampoline.TransformWheelTask()) - return libs } diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 7a906be8b5b..b04fca2525f 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -19,6 +19,7 @@ import ( "github.com/databricks/cli/bundle/direct" "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/bundle/metrics" + "github.com/databricks/cli/bundle/trampoline" "github.com/databricks/cli/bundle/permissions" "github.com/databricks/cli/bundle/scripts" "github.com/databricks/cli/bundle/statemgmt" @@ -117,8 +118,7 @@ func deployCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, ta } } -// uploadLibraries uploads libraries to the workspace. -// It also cleans up the artifacts directory and transforms wheel tasks. +// uploadLibraries cleans up the artifacts directory and uploads libraries to the workspace. // It is called by only "bundle deploy". func uploadLibraries(ctx context.Context, b *bundle.Bundle, libs map[string][]libraries.LocationToUpdate) { bundle.ApplySeqContext(ctx, b, @@ -129,7 +129,7 @@ func uploadLibraries(ctx context.Context, b *bundle.Bundle, libs map[string][]li // The deploy phase deploys artifacts and resources. // If readPlanPath is provided, the plan is loaded from that file instead of being calculated. -func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHandler, engine engine.EngineType, libs map[string][]libraries.LocationToUpdate, plan *deployplan.Plan) { +func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHandler, engine engine.EngineType, plan *deployplan.Plan) { log.Info(ctx, "Phase: deploy") // Core mutators that CRUD resources and modify deployment state. These @@ -170,7 +170,14 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand } } } else { - uploadLibraries(ctx, b, libs) + libs, libDiags := libraries.ReplaceWithRemotePath(ctx, b) + for _, d := range libDiags { + logdiag.LogDiag(ctx, d) + } + if !logdiag.HasError(ctx) { + bundle.ApplyContext(ctx, b, trampoline.TransformWheelTask()) + uploadLibraries(ctx, b, libs) + } } if logdiag.HasError(ctx) { diff --git a/cmd/bundle/utils/process.go b/cmd/bundle/utils/process.go index d61c4525530..61924a4e1ed 100644 --- a/cmd/bundle/utils/process.go +++ b/cmd/bundle/utils/process.go @@ -296,11 +296,9 @@ func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (b *bundle.Bundle } } - var libs phases.LibLocationMap - if opts.Build { t2 := time.Now() - libs = phases.Build(ctx, b) + phases.Build(ctx, b) b.Metrics.ExecutionTimes = append(b.Metrics.ExecutionTimes, protos.IntMapEntry{ Key: "phases.Build", Value: time.Since(t2).Milliseconds(), @@ -329,7 +327,7 @@ func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (b *bundle.Bundle } t3 := time.Now() - phases.Deploy(ctx, b, outputHandler, stateDesc.Engine, libs, plan) + phases.Deploy(ctx, b, outputHandler, stateDesc.Engine, plan) b.Metrics.ExecutionTimes = append(b.Metrics.ExecutionTimes, protos.IntMapEntry{ Key: "phases.Deploy", Value: time.Since(t3).Milliseconds(), From 6a55056549728ac0456138b92fedbe5c1d1a96cc Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 25 Jun 2026 16:10:37 +0200 Subject: [PATCH 30/31] fix lint --- bundle/phases/deploy.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index b04fca2525f..e5c75d96999 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -19,10 +19,10 @@ import ( "github.com/databricks/cli/bundle/direct" "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/bundle/metrics" - "github.com/databricks/cli/bundle/trampoline" "github.com/databricks/cli/bundle/permissions" "github.com/databricks/cli/bundle/scripts" "github.com/databricks/cli/bundle/statemgmt" + "github.com/databricks/cli/bundle/trampoline" "github.com/databricks/cli/libs/agent" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/log" From a6018767e0350f19f3fe3cbf6cad7b90b3f0b5e2 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 25 Jun 2026 16:35:00 +0200 Subject: [PATCH 31/31] revert path resolution change --- bundle/phases/build.go | 25 ++++++++++++++++++++++++- bundle/phases/deploy.go | 15 ++++----------- cmd/bundle/utils/process.go | 6 ++++-- 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/bundle/phases/build.go b/bundle/phases/build.go index a731b3b6721..db376e07e28 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -9,11 +9,17 @@ import ( "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/bundle/scripts" + "github.com/databricks/cli/bundle/trampoline" "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/logdiag" ) +// LibLocationMap maps artifact names to library locations that need uploading. +// Computed by Build and consumed by Deploy to upload the right files. +type LibLocationMap map[string][]libraries.LocationToUpdate + // Build runs the build phase, which builds artifacts. -func Build(ctx context.Context, b *bundle.Bundle) { +func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { log.Info(ctx, "Phase: build") bundle.ApplySeqContext(ctx, b, @@ -37,4 +43,21 @@ func Build(ctx context.Context, b *bundle.Bundle) { // SwitchToPatchedWheels must be run after ExpandGlobReferences and after build phase because it Artifact.Source and Artifact.Patched populated libraries.SwitchToPatchedWheels(), ) + + if logdiag.HasError(ctx) { + return nil + } + + // For immutable bundles, library remote paths are set in the deploy phase + // after snapshot.Upload() provides the content-addressed workspace.artifact_path. + if b.IsImmutableFolder() { + return nil + } + + libs, diags := libraries.ReplaceWithRemotePath(ctx, b) + for _, d := range diags { + logdiag.LogDiag(ctx, d) + } + bundle.ApplyContext(ctx, b, trampoline.TransformWheelTask()) + return libs } diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index e5c75d96999..7a906be8b5b 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -22,7 +22,6 @@ import ( "github.com/databricks/cli/bundle/permissions" "github.com/databricks/cli/bundle/scripts" "github.com/databricks/cli/bundle/statemgmt" - "github.com/databricks/cli/bundle/trampoline" "github.com/databricks/cli/libs/agent" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/log" @@ -118,7 +117,8 @@ func deployCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, ta } } -// uploadLibraries cleans up the artifacts directory and uploads libraries to the workspace. +// uploadLibraries uploads libraries to the workspace. +// It also cleans up the artifacts directory and transforms wheel tasks. // It is called by only "bundle deploy". func uploadLibraries(ctx context.Context, b *bundle.Bundle, libs map[string][]libraries.LocationToUpdate) { bundle.ApplySeqContext(ctx, b, @@ -129,7 +129,7 @@ func uploadLibraries(ctx context.Context, b *bundle.Bundle, libs map[string][]li // The deploy phase deploys artifacts and resources. // If readPlanPath is provided, the plan is loaded from that file instead of being calculated. -func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHandler, engine engine.EngineType, plan *deployplan.Plan) { +func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHandler, engine engine.EngineType, libs map[string][]libraries.LocationToUpdate, plan *deployplan.Plan) { log.Info(ctx, "Phase: deploy") // Core mutators that CRUD resources and modify deployment state. These @@ -170,14 +170,7 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand } } } else { - libs, libDiags := libraries.ReplaceWithRemotePath(ctx, b) - for _, d := range libDiags { - logdiag.LogDiag(ctx, d) - } - if !logdiag.HasError(ctx) { - bundle.ApplyContext(ctx, b, trampoline.TransformWheelTask()) - uploadLibraries(ctx, b, libs) - } + uploadLibraries(ctx, b, libs) } if logdiag.HasError(ctx) { diff --git a/cmd/bundle/utils/process.go b/cmd/bundle/utils/process.go index 61924a4e1ed..d61c4525530 100644 --- a/cmd/bundle/utils/process.go +++ b/cmd/bundle/utils/process.go @@ -296,9 +296,11 @@ func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (b *bundle.Bundle } } + var libs phases.LibLocationMap + if opts.Build { t2 := time.Now() - phases.Build(ctx, b) + libs = phases.Build(ctx, b) b.Metrics.ExecutionTimes = append(b.Metrics.ExecutionTimes, protos.IntMapEntry{ Key: "phases.Build", Value: time.Since(t2).Milliseconds(), @@ -327,7 +329,7 @@ func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (b *bundle.Bundle } t3 := time.Now() - phases.Deploy(ctx, b, outputHandler, stateDesc.Engine, plan) + phases.Deploy(ctx, b, outputHandler, stateDesc.Engine, libs, plan) b.Metrics.ExecutionTimes = append(b.Metrics.ExecutionTimes, protos.IntMapEntry{ Key: "phases.Deploy", Value: time.Since(t3).Milliseconds(),