From c84146535f0a1d8781ca728776a2e4d8c16d87a7 Mon Sep 17 00:00:00 2001 From: Corey Christous Date: Thu, 18 Dec 2025 09:31:20 -0500 Subject: [PATCH 1/6] silence file collision warnings in cache restore --- cache-cli/pkg/archive/archiver_test.go | 47 +++++++++++++++++++++ cache-cli/pkg/archive/shell_out_archiver.go | 13 +++++- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/cache-cli/pkg/archive/archiver_test.go b/cache-cli/pkg/archive/archiver_test.go index 8714de06..e096c8a9 100644 --- a/cache-cli/pkg/archive/archiver_test.go +++ b/cache-cli/pkg/archive/archiver_test.go @@ -273,6 +273,53 @@ func Test__Compress(t *testing.T) { assert.NoError(t, os.Remove(tempFile.Name())) assert.NoError(t, os.Remove(compressedFileName)) }) + + t.Run(archiverType+" skips existing files without error", func(t *testing.T) { + // This test only applies to shell-out archiver which uses tar -k flag + if archiverType != "shell-out" { + t.Skip("Test only applies to shell-out archiver") + } + + cwd, _ := os.Getwd() + tempDir, _ := ioutil.TempDir(cwd, "*") + tempFile1, _ := ioutil.TempFile(tempDir, "*") + tempFile2, _ := ioutil.TempFile(tempDir, "*") + + // Write content to both files + originalContent := []byte("original content") + cachedContent := []byte("cached content") + _, _ = tempFile1.Write(originalContent) + _, _ = tempFile2.Write(cachedContent) + _ = tempFile1.Close() + _ = tempFile2.Close() + + tempDirBase := filepath.Base(tempDir) + + // Create archive with both files + compressedFileName := tmpFileNameWithPrefix("abc0008") + err := archiver.Compress(compressedFileName, tempDirBase) + assert.NoError(t, err) + + // Delete only tempFile2, keep tempFile1 to simulate existing file + assert.NoError(t, os.Remove(tempFile2.Name())) + + // Decompress - should skip tempFile1 (already exists) and restore tempFile2 + unpackedAt, err := archiver.Decompress(compressedFileName) + assert.NoError(t, err) + assert.Equal(t, tempDirBase+string(os.PathSeparator), unpackedAt) + + // Verify tempFile1 still has original content (was not overwritten) + content1, err := ioutil.ReadFile(tempFile1.Name()) + assert.NoError(t, err) + assert.Equal(t, originalContent, content1) + + // Verify tempFile2 was restored + _, err = os.Stat(tempFile2.Name()) + assert.NoError(t, err) + + assert.NoError(t, os.RemoveAll(tempDirBase)) + assert.NoError(t, os.Remove(compressedFileName)) + }) }) } diff --git a/cache-cli/pkg/archive/shell_out_archiver.go b/cache-cli/pkg/archive/shell_out_archiver.go index 00c0e2e4..a1654d5c 100644 --- a/cache-cli/pkg/archive/shell_out_archiver.go +++ b/cache-cli/pkg/archive/shell_out_archiver.go @@ -67,11 +67,20 @@ func (a *ShellOutArchiver) compressionCommand(dst, src string) *exec.Cmd { } func (a *ShellOutArchiver) decompressionCmd(dst, tempFile string) *exec.Cmd { + args := []string{} + if filepath.IsAbs(dst) { - return exec.Command("tar", "xzPf", tempFile, "-C", ".") + args = append(args, "xzPf", tempFile, "-C", ".") + } else { + args = append(args, "xzf", tempFile, "-C", ".") } - return exec.Command("tar", "xzf", tempFile, "-C", ".") + // Use -k to keep existing files (don't overwrite) + // This is supported on both GNU tar (Linux) and BSD tar (macOS) + // and handles the case where mise pre-populates Go modules before cache restore + args = append(args, "-k") + + return exec.Command("tar", args...) } func (a *ShellOutArchiver) findRestorationPath(src string) (string, error) { From e597370555571bcb6b676e437cec516c05349e0f Mon Sep 17 00:00:00 2001 From: Corey Christous Date: Mon, 22 Dec 2025 11:37:09 -0500 Subject: [PATCH 2/6] fix comment --- cache-cli/pkg/archive/shell_out_archiver.go | 1 - 1 file changed, 1 deletion(-) diff --git a/cache-cli/pkg/archive/shell_out_archiver.go b/cache-cli/pkg/archive/shell_out_archiver.go index a1654d5c..e90d2e5c 100644 --- a/cache-cli/pkg/archive/shell_out_archiver.go +++ b/cache-cli/pkg/archive/shell_out_archiver.go @@ -77,7 +77,6 @@ func (a *ShellOutArchiver) decompressionCmd(dst, tempFile string) *exec.Cmd { // Use -k to keep existing files (don't overwrite) // This is supported on both GNU tar (Linux) and BSD tar (macOS) - // and handles the case where mise pre-populates Go modules before cache restore args = append(args, "-k") return exec.Command("tar", args...) From 8fd6c5add3c7619febcd18bf7610b58746b127b9 Mon Sep 17 00:00:00 2001 From: Corey Christous Date: Wed, 28 Jan 2026 23:10:01 -0500 Subject: [PATCH 3/6] Add --ignore-collisions flag to cache restore This adds an opt-in flag to silently ignore file collisions during cache restore. When enabled, existing files are preserved rather than overwritten, and no error is raised. The implementation handles platform differences: - GNU tar: uses --skip-old-files (silent, exit 0) - BSD tar: uses -k (silent, exit 0) - Native archiver: checks file existence and skips --- cache-cli/cmd/restore.go | 7 ++++- cache-cli/pkg/archive/archiver.go | 14 +++++++-- cache-cli/pkg/archive/archiver_test.go | 20 +++++++++---- cache-cli/pkg/archive/native_archiver.go | 27 ++++++++++++++++- cache-cli/pkg/archive/shell_out_archiver.go | 33 +++++++++++++++++++-- 5 files changed, 87 insertions(+), 14 deletions(-) diff --git a/cache-cli/cmd/restore.go b/cache-cli/cmd/restore.go index 037505e5..77899100 100644 --- a/cache-cli/cmd/restore.go +++ b/cache-cli/cmd/restore.go @@ -16,6 +16,8 @@ import ( "github.com/spf13/cobra" ) +var ignoreCollisions bool + var restoreCmd = &cobra.Command{ Use: "restore [keys]", Short: "Restore keys from the cache.", @@ -39,7 +41,9 @@ func RunRestore(cmd *cobra.Command, args []string) { metricsManager, err := metrics.InitMetricsManager(metrics.LocalBackend) utils.Check(err) - archiver := archive.NewArchiver(metricsManager) + archiver := archive.NewArchiverWithOptions(metricsManager, archive.ArchiverOptions{ + SkipExisting: ignoreCollisions, + }) if len(args) == 0 { lookupResults := files.Lookup(files.LookupOptions{ @@ -165,5 +169,6 @@ func publishMetrics(metricsManager metrics.MetricsManager, fileInfo fs.FileInfo, } func init() { + restoreCmd.Flags().BoolVar(&ignoreCollisions, "ignore-collisions", false, "Silently ignore file collisions, keeping existing files") RootCmd.AddCommand(restoreCmd) } diff --git a/cache-cli/pkg/archive/archiver.go b/cache-cli/pkg/archive/archiver.go index 197cf316..9c1e6afd 100644 --- a/cache-cli/pkg/archive/archiver.go +++ b/cache-cli/pkg/archive/archiver.go @@ -11,14 +11,22 @@ type Archiver interface { Decompress(src string) (string, error) } +type ArchiverOptions struct { + SkipExisting bool +} + func NewArchiver(metricsManager metrics.MetricsManager) Archiver { + return NewArchiverWithOptions(metricsManager, ArchiverOptions{}) +} + +func NewArchiverWithOptions(metricsManager metrics.MetricsManager, opts ArchiverOptions) Archiver { method := os.Getenv("SEMAPHORE_CACHE_ARCHIVE_METHOD") switch method { case "native": - return NewNativeArchiver(metricsManager, false) + return NewNativeArchiverWithOptions(metricsManager, false, opts) case "native-parallel": - return NewNativeArchiver(metricsManager, true) + return NewNativeArchiverWithOptions(metricsManager, true, opts) default: - return NewShellOutArchiver(metricsManager) + return NewShellOutArchiverWithOptions(metricsManager, opts) } } diff --git a/cache-cli/pkg/archive/archiver_test.go b/cache-cli/pkg/archive/archiver_test.go index e096c8a9..9120320c 100644 --- a/cache-cli/pkg/archive/archiver_test.go +++ b/cache-cli/pkg/archive/archiver_test.go @@ -275,11 +275,6 @@ func Test__Compress(t *testing.T) { }) t.Run(archiverType+" skips existing files without error", func(t *testing.T) { - // This test only applies to shell-out archiver which uses tar -k flag - if archiverType != "shell-out" { - t.Skip("Test only applies to shell-out archiver") - } - cwd, _ := os.Getwd() tempDir, _ := ioutil.TempDir(cwd, "*") tempFile1, _ := ioutil.TempFile(tempDir, "*") @@ -303,8 +298,21 @@ func Test__Compress(t *testing.T) { // Delete only tempFile2, keep tempFile1 to simulate existing file assert.NoError(t, os.Remove(tempFile2.Name())) + // Create an archiver with SkipExisting enabled for decompression + metricsManager := metrics.NewNoOpMetricsManager() + opts := ArchiverOptions{SkipExisting: true} + var skipArchiver Archiver + switch archiverType { + case "shell-out": + skipArchiver = NewShellOutArchiverWithOptions(metricsManager, opts) + case "native": + skipArchiver = NewNativeArchiverWithOptions(metricsManager, false, opts) + case "native-parallel": + skipArchiver = NewNativeArchiverWithOptions(metricsManager, true, opts) + } + // Decompress - should skip tempFile1 (already exists) and restore tempFile2 - unpackedAt, err := archiver.Decompress(compressedFileName) + unpackedAt, err := skipArchiver.Decompress(compressedFileName) assert.NoError(t, err) assert.Equal(t, tempDirBase+string(os.PathSeparator), unpackedAt) diff --git a/cache-cli/pkg/archive/native_archiver.go b/cache-cli/pkg/archive/native_archiver.go index c380dce8..740943b7 100644 --- a/cache-cli/pkg/archive/native_archiver.go +++ b/cache-cli/pkg/archive/native_archiver.go @@ -19,6 +19,7 @@ import ( type NativeArchiver struct { MetricsManager metrics.MetricsManager UseParallelism bool + SkipExisting bool } func NewNativeArchiver(metricsManager metrics.MetricsManager, useParallelism bool) *NativeArchiver { @@ -28,6 +29,14 @@ func NewNativeArchiver(metricsManager metrics.MetricsManager, useParallelism boo } } +func NewNativeArchiverWithOptions(metricsManager metrics.MetricsManager, useParallelism bool, opts ArchiverOptions) *NativeArchiver { + return &NativeArchiver{ + MetricsManager: metricsManager, + UseParallelism: useParallelism, + SkipExisting: opts.SkipExisting, + } +} + func (a *NativeArchiver) Compress(dst, src string) error { if _, err := os.Stat(src); err != nil { return fmt.Errorf("error finding '%s': %v", src, err) @@ -206,6 +215,16 @@ func (a *NativeArchiver) Decompress(src string) (string, error) { continue } + // nil outFile means the file should be skipped (e.g., SkipExisting is enabled) + if outFile == nil { + // Discard the file contents from the tar reader + if _, err := io.Copy(io.Discard, tarReader); err != nil { + log.Errorf("Error skipping file '%s': %v", header.Name, err) + hadError = true + } + continue + } + // #nosec _, err = io.Copy(outFile, tarReader) if err != nil { @@ -243,6 +262,8 @@ func (a *NativeArchiver) Decompress(src string) (string, error) { return restorationPath, nil } +// openFile opens a file for writing during decompression. +// Returns (nil, nil) if the file should be skipped (e.g., when SkipExisting is true and file exists). func (a *NativeArchiver) openFile(header *tar.Header, tarReader *tar.Reader) (*os.File, error) { outFile, err := os.OpenFile(header.Name, os.O_RDWR|os.O_CREATE|os.O_EXCL, header.FileInfo().Mode()) @@ -252,8 +273,12 @@ func (a *NativeArchiver) openFile(header *tar.Header, tarReader *tar.Reader) (*o } // Since we are using O_EXCL, this error could mean that the file already exists. - // If that is the case, we attempt to remove it before attempting to open it again. if errors.Is(err, os.ErrExist) { + // If SkipExisting is enabled, skip this file silently. + if a.SkipExisting { + return nil, nil + } + // Otherwise, attempt to remove it before opening again. if err := os.Remove(header.Name); err != nil { return nil, fmt.Errorf("file '%s' already exists and can't be removed: %v", header.Name, err) } diff --git a/cache-cli/pkg/archive/shell_out_archiver.go b/cache-cli/pkg/archive/shell_out_archiver.go index e90d2e5c..de8158aa 100644 --- a/cache-cli/pkg/archive/shell_out_archiver.go +++ b/cache-cli/pkg/archive/shell_out_archiver.go @@ -8,6 +8,7 @@ import ( "os" "os/exec" "path/filepath" + "strings" "github.com/semaphoreci/toolbox/cache-cli/pkg/metrics" log "github.com/sirupsen/logrus" @@ -15,12 +16,20 @@ import ( type ShellOutArchiver struct { metricsManager metrics.MetricsManager + skipExisting bool } func NewShellOutArchiver(metricsManager metrics.MetricsManager) *ShellOutArchiver { return &ShellOutArchiver{metricsManager: metricsManager} } +func NewShellOutArchiverWithOptions(metricsManager metrics.MetricsManager, opts ArchiverOptions) *ShellOutArchiver { + return &ShellOutArchiver{ + metricsManager: metricsManager, + skipExisting: opts.SkipExisting, + } +} + func (a *ShellOutArchiver) Compress(dst, src string) error { if _, err := os.Stat(src); err != nil { return fmt.Errorf("error finding '%s': %v", src, err) @@ -75,13 +84,31 @@ func (a *ShellOutArchiver) decompressionCmd(dst, tempFile string) *exec.Cmd { args = append(args, "xzf", tempFile, "-C", ".") } - // Use -k to keep existing files (don't overwrite) - // This is supported on both GNU tar (Linux) and BSD tar (macOS) - args = append(args, "-k") + // When skipExisting is enabled, skip existing files without overwriting. + // GNU tar uses --skip-old-files (silent, exit 0). + // BSD tar uses -k (silent, exit 0). + if a.skipExisting { + if isGNUTar() { + args = append(args, "--skip-old-files") + } else { + args = append(args, "-k") + } + } return exec.Command("tar", args...) } +// isGNUTar returns true if the system tar is GNU tar. +// GNU tar includes "GNU tar" in its --version output. +func isGNUTar() bool { + cmd := exec.Command("tar", "--version") + output, err := cmd.Output() + if err != nil { + return false + } + return strings.Contains(string(output), "GNU tar") +} + func (a *ShellOutArchiver) findRestorationPath(src string) (string, error) { // #nosec file, err := os.Open(src) From 64a77715cec289016174a7c5e3269890945bd32a Mon Sep 17 00:00:00 2001 From: Corey Christous Date: Wed, 28 Jan 2026 23:19:23 -0500 Subject: [PATCH 4/6] Address code review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Cache isGNUTar() result using sync.Once to avoid repeated subprocess calls - Align internal naming: SkipExisting → IgnoreCollisions - Add assertion for restored file content in test Co-Authored-By: Claude --- cache-cli/cmd/restore.go | 2 +- cache-cli/pkg/archive/archiver.go | 2 +- cache-cli/pkg/archive/archiver_test.go | 9 +++--- cache-cli/pkg/archive/native_archiver.go | 20 ++++++------ cache-cli/pkg/archive/shell_out_archiver.go | 35 ++++++++++++++------- 5 files changed, 40 insertions(+), 28 deletions(-) diff --git a/cache-cli/cmd/restore.go b/cache-cli/cmd/restore.go index 77899100..06d25351 100644 --- a/cache-cli/cmd/restore.go +++ b/cache-cli/cmd/restore.go @@ -42,7 +42,7 @@ func RunRestore(cmd *cobra.Command, args []string) { utils.Check(err) archiver := archive.NewArchiverWithOptions(metricsManager, archive.ArchiverOptions{ - SkipExisting: ignoreCollisions, + IgnoreCollisions: ignoreCollisions, }) if len(args) == 0 { diff --git a/cache-cli/pkg/archive/archiver.go b/cache-cli/pkg/archive/archiver.go index 9c1e6afd..1c87e573 100644 --- a/cache-cli/pkg/archive/archiver.go +++ b/cache-cli/pkg/archive/archiver.go @@ -12,7 +12,7 @@ type Archiver interface { } type ArchiverOptions struct { - SkipExisting bool + IgnoreCollisions bool } func NewArchiver(metricsManager metrics.MetricsManager) Archiver { diff --git a/cache-cli/pkg/archive/archiver_test.go b/cache-cli/pkg/archive/archiver_test.go index 9120320c..96785cf5 100644 --- a/cache-cli/pkg/archive/archiver_test.go +++ b/cache-cli/pkg/archive/archiver_test.go @@ -298,9 +298,9 @@ func Test__Compress(t *testing.T) { // Delete only tempFile2, keep tempFile1 to simulate existing file assert.NoError(t, os.Remove(tempFile2.Name())) - // Create an archiver with SkipExisting enabled for decompression + // Create an archiver with IgnoreCollisions enabled for decompression metricsManager := metrics.NewNoOpMetricsManager() - opts := ArchiverOptions{SkipExisting: true} + opts := ArchiverOptions{IgnoreCollisions: true} var skipArchiver Archiver switch archiverType { case "shell-out": @@ -321,9 +321,10 @@ func Test__Compress(t *testing.T) { assert.NoError(t, err) assert.Equal(t, originalContent, content1) - // Verify tempFile2 was restored - _, err = os.Stat(tempFile2.Name()) + // Verify tempFile2 was restored with correct content + content2, err := ioutil.ReadFile(tempFile2.Name()) assert.NoError(t, err) + assert.Equal(t, cachedContent, content2) assert.NoError(t, os.RemoveAll(tempDirBase)) assert.NoError(t, os.Remove(compressedFileName)) diff --git a/cache-cli/pkg/archive/native_archiver.go b/cache-cli/pkg/archive/native_archiver.go index 740943b7..685abbfc 100644 --- a/cache-cli/pkg/archive/native_archiver.go +++ b/cache-cli/pkg/archive/native_archiver.go @@ -17,9 +17,9 @@ import ( ) type NativeArchiver struct { - MetricsManager metrics.MetricsManager - UseParallelism bool - SkipExisting bool + MetricsManager metrics.MetricsManager + UseParallelism bool + IgnoreCollisions bool } func NewNativeArchiver(metricsManager metrics.MetricsManager, useParallelism bool) *NativeArchiver { @@ -31,9 +31,9 @@ func NewNativeArchiver(metricsManager metrics.MetricsManager, useParallelism boo func NewNativeArchiverWithOptions(metricsManager metrics.MetricsManager, useParallelism bool, opts ArchiverOptions) *NativeArchiver { return &NativeArchiver{ - MetricsManager: metricsManager, - UseParallelism: useParallelism, - SkipExisting: opts.SkipExisting, + MetricsManager: metricsManager, + UseParallelism: useParallelism, + IgnoreCollisions: opts.IgnoreCollisions, } } @@ -215,7 +215,7 @@ func (a *NativeArchiver) Decompress(src string) (string, error) { continue } - // nil outFile means the file should be skipped (e.g., SkipExisting is enabled) + // nil outFile means the file should be skipped (e.g., IgnoreCollisions is enabled) if outFile == nil { // Discard the file contents from the tar reader if _, err := io.Copy(io.Discard, tarReader); err != nil { @@ -263,7 +263,7 @@ func (a *NativeArchiver) Decompress(src string) (string, error) { } // openFile opens a file for writing during decompression. -// Returns (nil, nil) if the file should be skipped (e.g., when SkipExisting is true and file exists). +// Returns (nil, nil) if the file should be skipped (e.g., when IgnoreCollisions is true and file exists). func (a *NativeArchiver) openFile(header *tar.Header, tarReader *tar.Reader) (*os.File, error) { outFile, err := os.OpenFile(header.Name, os.O_RDWR|os.O_CREATE|os.O_EXCL, header.FileInfo().Mode()) @@ -274,8 +274,8 @@ func (a *NativeArchiver) openFile(header *tar.Header, tarReader *tar.Reader) (*o // Since we are using O_EXCL, this error could mean that the file already exists. if errors.Is(err, os.ErrExist) { - // If SkipExisting is enabled, skip this file silently. - if a.SkipExisting { + // If IgnoreCollisions is enabled, skip this file silently. + if a.IgnoreCollisions { return nil, nil } // Otherwise, attempt to remove it before opening again. diff --git a/cache-cli/pkg/archive/shell_out_archiver.go b/cache-cli/pkg/archive/shell_out_archiver.go index de8158aa..bd246a84 100644 --- a/cache-cli/pkg/archive/shell_out_archiver.go +++ b/cache-cli/pkg/archive/shell_out_archiver.go @@ -9,14 +9,15 @@ import ( "os/exec" "path/filepath" "strings" + "sync" "github.com/semaphoreci/toolbox/cache-cli/pkg/metrics" log "github.com/sirupsen/logrus" ) type ShellOutArchiver struct { - metricsManager metrics.MetricsManager - skipExisting bool + metricsManager metrics.MetricsManager + ignoreCollisions bool } func NewShellOutArchiver(metricsManager metrics.MetricsManager) *ShellOutArchiver { @@ -25,8 +26,8 @@ func NewShellOutArchiver(metricsManager metrics.MetricsManager) *ShellOutArchive func NewShellOutArchiverWithOptions(metricsManager metrics.MetricsManager, opts ArchiverOptions) *ShellOutArchiver { return &ShellOutArchiver{ - metricsManager: metricsManager, - skipExisting: opts.SkipExisting, + metricsManager: metricsManager, + ignoreCollisions: opts.IgnoreCollisions, } } @@ -84,10 +85,10 @@ func (a *ShellOutArchiver) decompressionCmd(dst, tempFile string) *exec.Cmd { args = append(args, "xzf", tempFile, "-C", ".") } - // When skipExisting is enabled, skip existing files without overwriting. + // When ignoreCollisions is enabled, skip existing files without overwriting. // GNU tar uses --skip-old-files (silent, exit 0). // BSD tar uses -k (silent, exit 0). - if a.skipExisting { + if a.ignoreCollisions { if isGNUTar() { args = append(args, "--skip-old-files") } else { @@ -98,15 +99,25 @@ func (a *ShellOutArchiver) decompressionCmd(dst, tempFile string) *exec.Cmd { return exec.Command("tar", args...) } +var ( + gnuTarOnce sync.Once + gnuTarCached bool +) + // isGNUTar returns true if the system tar is GNU tar. // GNU tar includes "GNU tar" in its --version output. +// The result is cached to avoid repeated subprocess calls. func isGNUTar() bool { - cmd := exec.Command("tar", "--version") - output, err := cmd.Output() - if err != nil { - return false - } - return strings.Contains(string(output), "GNU tar") + gnuTarOnce.Do(func() { + cmd := exec.Command("tar", "--version") + output, err := cmd.Output() + if err != nil { + gnuTarCached = false + return + } + gnuTarCached = strings.Contains(string(output), "GNU tar") + }) + return gnuTarCached } func (a *ShellOutArchiver) findRestorationPath(src string) (string, error) { From 1c4cbeb9c06c7a516f234d4453fee4bbcab1bf7f Mon Sep 17 00:00:00 2001 From: Corey Christous Date: Fri, 6 Mar 2026 23:55:22 -0500 Subject: [PATCH 5/6] Address code review feedback - Add // #nosec to io.Copy(io.Discard, ...) in native_archiver.go - Hardcode exec.Command args in decompressionCmd to satisfy gosec CWE 78 --- cache-cli/pkg/archive/native_archiver.go | 1 + cache-cli/pkg/archive/shell_out_archiver.go | 23 +++++++++------------ 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/cache-cli/pkg/archive/native_archiver.go b/cache-cli/pkg/archive/native_archiver.go index 685abbfc..a665adea 100644 --- a/cache-cli/pkg/archive/native_archiver.go +++ b/cache-cli/pkg/archive/native_archiver.go @@ -218,6 +218,7 @@ func (a *NativeArchiver) Decompress(src string) (string, error) { // nil outFile means the file should be skipped (e.g., IgnoreCollisions is enabled) if outFile == nil { // Discard the file contents from the tar reader + // #nosec if _, err := io.Copy(io.Discard, tarReader); err != nil { log.Errorf("Error skipping file '%s': %v", header.Name, err) hadError = true diff --git a/cache-cli/pkg/archive/shell_out_archiver.go b/cache-cli/pkg/archive/shell_out_archiver.go index bd246a84..f211a150 100644 --- a/cache-cli/pkg/archive/shell_out_archiver.go +++ b/cache-cli/pkg/archive/shell_out_archiver.go @@ -77,26 +77,23 @@ func (a *ShellOutArchiver) compressionCommand(dst, src string) *exec.Cmd { } func (a *ShellOutArchiver) decompressionCmd(dst, tempFile string) *exec.Cmd { - args := []string{} - if filepath.IsAbs(dst) { - args = append(args, "xzPf", tempFile, "-C", ".") - } else { - args = append(args, "xzf", tempFile, "-C", ".") + if a.ignoreCollisions { + if isGNUTar() { + return exec.Command("tar", "xzPf", tempFile, "-C", ".", "--skip-old-files") + } + return exec.Command("tar", "xzPf", tempFile, "-C", ".", "-k") + } + return exec.Command("tar", "xzPf", tempFile, "-C", ".") } - // When ignoreCollisions is enabled, skip existing files without overwriting. - // GNU tar uses --skip-old-files (silent, exit 0). - // BSD tar uses -k (silent, exit 0). if a.ignoreCollisions { if isGNUTar() { - args = append(args, "--skip-old-files") - } else { - args = append(args, "-k") + return exec.Command("tar", "xzf", tempFile, "-C", ".", "--skip-old-files") } + return exec.Command("tar", "xzf", tempFile, "-C", ".", "-k") } - - return exec.Command("tar", args...) + return exec.Command("tar", "xzf", tempFile, "-C", ".") } var ( From ff4e873e9410209d75aeb976ab5ae4aa54987e05 Mon Sep 17 00:00:00 2001 From: Corey Christous Date: Sat, 7 Mar 2026 00:03:35 -0500 Subject: [PATCH 6/6] Address PR review findings - Fix symlinks being overwritten when IgnoreCollisions is enabled in NativeArchiver - Add warning log when isGNUTar() fails to detect tar version - Add doc comment to decompressionCmd explaining GNU/BSD tar behavior difference - Improve openFile doc comment to reflect skip behavior - Improve error message when draining tar entry for skipped files - Add doc comment to ArchiverOptions struct - Add test for default overwrite behavior (IgnoreCollisions=false) - Add test for symlink collision skipping with IgnoreCollisions --- cache-cli/pkg/archive/archiver.go | 2 + cache-cli/pkg/archive/archiver_test.go | 82 +++++++++++++++++++++ cache-cli/pkg/archive/native_archiver.go | 17 +++-- cache-cli/pkg/archive/shell_out_archiver.go | 5 ++ 4 files changed, 100 insertions(+), 6 deletions(-) diff --git a/cache-cli/pkg/archive/archiver.go b/cache-cli/pkg/archive/archiver.go index 1c87e573..eca1aa23 100644 --- a/cache-cli/pkg/archive/archiver.go +++ b/cache-cli/pkg/archive/archiver.go @@ -11,7 +11,9 @@ type Archiver interface { Decompress(src string) (string, error) } +// ArchiverOptions configures optional behaviors for archive decompression. type ArchiverOptions struct { + // IgnoreCollisions skips extracting files that already exist on disk. IgnoreCollisions bool } diff --git a/cache-cli/pkg/archive/archiver_test.go b/cache-cli/pkg/archive/archiver_test.go index 96785cf5..e23c4d44 100644 --- a/cache-cli/pkg/archive/archiver_test.go +++ b/cache-cli/pkg/archive/archiver_test.go @@ -274,6 +274,88 @@ func Test__Compress(t *testing.T) { assert.NoError(t, os.Remove(compressedFileName)) }) + t.Run(archiverType+" overwrites existing files by default", func(t *testing.T) { + cwd, _ := os.Getwd() + tempDir, _ := ioutil.TempDir(cwd, "*") + tempFile1, _ := ioutil.TempFile(tempDir, "*") + + originalContent := []byte("original content") + cachedContent := []byte("cached content") + _, _ = tempFile1.Write(cachedContent) + _ = tempFile1.Close() + + tempDirBase := filepath.Base(tempDir) + + // Create archive with cached content + compressedFileName := tmpFileNameWithPrefix("abc0009") + err := archiver.Compress(compressedFileName, tempDirBase) + assert.NoError(t, err) + + // Overwrite with different content to simulate existing file + assert.NoError(t, ioutil.WriteFile(tempFile1.Name(), originalContent, 0600)) + + // Decompress with default archiver (no IgnoreCollisions) - should overwrite + _, err = archiver.Decompress(compressedFileName) + assert.NoError(t, err) + + // Verify file was overwritten with cached content + content, err := ioutil.ReadFile(tempFile1.Name()) + assert.NoError(t, err) + assert.Equal(t, cachedContent, content) + + assert.NoError(t, os.RemoveAll(tempDirBase)) + assert.NoError(t, os.Remove(compressedFileName)) + }) + + t.Run(archiverType+" skips existing symlinks with ignore collisions", func(t *testing.T) { + if archiverType == "shell-out" { + t.Skip("shell-out archiver delegates symlink handling to tar") + } + + cwd, _ := os.Getwd() + tempDir, _ := ioutil.TempDir(cwd, "*") + tempFile1, _ := ioutil.TempFile(tempDir, "*") + _ = tempFile1.Close() + + symlinkName := tempFile1.Name() + "-link" + assert.NoError(t, os.Symlink(tempFile1.Name(), symlinkName)) + + tempDirBase := filepath.Base(tempDir) + + // Create archive containing the symlink + compressedFileName := tmpFileNameWithPrefix("abc0010") + err := archiver.Compress(compressedFileName, tempDirBase) + assert.NoError(t, err) + + // Change the symlink target to a different file + altTarget := tempFile1.Name() + "-alt" + assert.NoError(t, ioutil.WriteFile(altTarget, []byte("alt"), 0600)) + assert.NoError(t, os.Remove(symlinkName)) + assert.NoError(t, os.Symlink(altTarget, symlinkName)) + + // Decompress with IgnoreCollisions - symlink should not be overwritten + metricsManager := metrics.NewNoOpMetricsManager() + opts := ArchiverOptions{IgnoreCollisions: true} + var skipArchiver Archiver + switch archiverType { + case "native": + skipArchiver = NewNativeArchiverWithOptions(metricsManager, false, opts) + case "native-parallel": + skipArchiver = NewNativeArchiverWithOptions(metricsManager, true, opts) + } + + _, err = skipArchiver.Decompress(compressedFileName) + assert.NoError(t, err) + + // Verify symlink still points to the alt target (was not overwritten) + target, err := os.Readlink(symlinkName) + assert.NoError(t, err) + assert.Equal(t, altTarget, target) + + assert.NoError(t, os.RemoveAll(tempDirBase)) + assert.NoError(t, os.Remove(compressedFileName)) + }) + t.Run(archiverType+" skips existing files without error", func(t *testing.T) { cwd, _ := os.Getwd() tempDir, _ := ioutil.TempDir(cwd, "*") diff --git a/cache-cli/pkg/archive/native_archiver.go b/cache-cli/pkg/archive/native_archiver.go index a665adea..7f331b1a 100644 --- a/cache-cli/pkg/archive/native_archiver.go +++ b/cache-cli/pkg/archive/native_archiver.go @@ -195,9 +195,12 @@ func (a *NativeArchiver) Decompress(src string) (string, error) { } case tar.TypeSymlink: - // we have to remove the symlink first, if it exists. - // Otherwise os.Symlink will complain. + // If the symlink already exists, either skip it (IgnoreCollisions) + // or remove it before recreating (os.Symlink requires no existing file). if _, err := os.Lstat(header.Name); err == nil { + if a.IgnoreCollisions { + continue + } _ = os.Remove(header.Name) } @@ -217,10 +220,11 @@ func (a *NativeArchiver) Decompress(src string) (string, error) { // nil outFile means the file should be skipped (e.g., IgnoreCollisions is enabled) if outFile == nil { - // Discard the file contents from the tar reader + // The tar reader is sequential; we must consume this entry's bytes + // before advancing to the next header. // #nosec if _, err := io.Copy(io.Discard, tarReader); err != nil { - log.Errorf("Error skipping file '%s': %v", header.Name, err) + log.Errorf("Error draining tar entry for '%s' (skipped due to existing file): %v", header.Name, err) hadError = true } continue @@ -263,8 +267,9 @@ func (a *NativeArchiver) Decompress(src string) (string, error) { return restorationPath, nil } -// openFile opens a file for writing during decompression. -// Returns (nil, nil) if the file should be skipped (e.g., when IgnoreCollisions is true and file exists). +// openFile attempts to open a file for writing during decompression, or signals +// that the file should be skipped by returning (nil, nil) when IgnoreCollisions +// is true and the file already exists. func (a *NativeArchiver) openFile(header *tar.Header, tarReader *tar.Reader) (*os.File, error) { outFile, err := os.OpenFile(header.Name, os.O_RDWR|os.O_CREATE|os.O_EXCL, header.FileInfo().Mode()) diff --git a/cache-cli/pkg/archive/shell_out_archiver.go b/cache-cli/pkg/archive/shell_out_archiver.go index f211a150..efa440ff 100644 --- a/cache-cli/pkg/archive/shell_out_archiver.go +++ b/cache-cli/pkg/archive/shell_out_archiver.go @@ -76,6 +76,9 @@ func (a *ShellOutArchiver) compressionCommand(dst, src string) *exec.Cmd { return exec.Command("tar", "czf", dst, src) } +// decompressionCmd builds the tar extraction command. +// When ignoreCollisions is enabled, GNU tar uses --skip-old-files (silently skips, exit 0), +// while BSD tar uses -k (skips but may return non-zero on some systems). func (a *ShellOutArchiver) decompressionCmd(dst, tempFile string) *exec.Cmd { if filepath.IsAbs(dst) { if a.ignoreCollisions { @@ -104,11 +107,13 @@ var ( // isGNUTar returns true if the system tar is GNU tar. // GNU tar includes "GNU tar" in its --version output. // The result is cached to avoid repeated subprocess calls. +// If tar --version fails, it defaults to false (assumes BSD tar). func isGNUTar() bool { gnuTarOnce.Do(func() { cmd := exec.Command("tar", "--version") output, err := cmd.Output() if err != nil { + log.Warnf("Could not determine tar version, assuming BSD tar: %v", err) gnuTarCached = false return }