diff --git a/cache-cli/cmd/restore.go b/cache-cli/cmd/restore.go index 037505e5..06d25351 100644 --- a/cache-cli/cmd/restore.go +++ b/cache-cli/cmd/restore.go @@ -16,6 +16,8 @@ import ( "github.com/spf13/cobra" ) +var ignoreCollisions bool + var restoreCmd = &cobra.Command{ Use: "restore [keys]", Short: "Restore keys from the cache.", @@ -39,7 +41,9 @@ func RunRestore(cmd *cobra.Command, args []string) { metricsManager, err := metrics.InitMetricsManager(metrics.LocalBackend) utils.Check(err) - archiver := archive.NewArchiver(metricsManager) + archiver := archive.NewArchiverWithOptions(metricsManager, archive.ArchiverOptions{ + IgnoreCollisions: ignoreCollisions, + }) if len(args) == 0 { lookupResults := files.Lookup(files.LookupOptions{ @@ -165,5 +169,6 @@ func publishMetrics(metricsManager metrics.MetricsManager, fileInfo fs.FileInfo, } func init() { + restoreCmd.Flags().BoolVar(&ignoreCollisions, "ignore-collisions", false, "Silently ignore file collisions, keeping existing files") RootCmd.AddCommand(restoreCmd) } diff --git a/cache-cli/pkg/archive/archiver.go b/cache-cli/pkg/archive/archiver.go index 197cf316..eca1aa23 100644 --- a/cache-cli/pkg/archive/archiver.go +++ b/cache-cli/pkg/archive/archiver.go @@ -11,14 +11,24 @@ type Archiver interface { Decompress(src string) (string, error) } +// ArchiverOptions configures optional behaviors for archive decompression. +type ArchiverOptions struct { + // IgnoreCollisions skips extracting files that already exist on disk. + IgnoreCollisions bool +} + func NewArchiver(metricsManager metrics.MetricsManager) Archiver { + return NewArchiverWithOptions(metricsManager, ArchiverOptions{}) +} + +func NewArchiverWithOptions(metricsManager metrics.MetricsManager, opts ArchiverOptions) Archiver { method := os.Getenv("SEMAPHORE_CACHE_ARCHIVE_METHOD") switch method { case "native": - return NewNativeArchiver(metricsManager, false) + return NewNativeArchiverWithOptions(metricsManager, false, opts) case "native-parallel": - return NewNativeArchiver(metricsManager, true) + return NewNativeArchiverWithOptions(metricsManager, true, opts) default: - return NewShellOutArchiver(metricsManager) + return NewShellOutArchiverWithOptions(metricsManager, opts) } } diff --git a/cache-cli/pkg/archive/archiver_test.go b/cache-cli/pkg/archive/archiver_test.go index 8714de06..e23c4d44 100644 --- a/cache-cli/pkg/archive/archiver_test.go +++ b/cache-cli/pkg/archive/archiver_test.go @@ -273,6 +273,144 @@ func Test__Compress(t *testing.T) { assert.NoError(t, os.Remove(tempFile.Name())) assert.NoError(t, os.Remove(compressedFileName)) }) + + t.Run(archiverType+" overwrites existing files by default", func(t *testing.T) { + cwd, _ := os.Getwd() + tempDir, _ := ioutil.TempDir(cwd, "*") + tempFile1, _ := ioutil.TempFile(tempDir, "*") + + originalContent := []byte("original content") + cachedContent := []byte("cached content") + _, _ = tempFile1.Write(cachedContent) + _ = tempFile1.Close() + + tempDirBase := filepath.Base(tempDir) + + // Create archive with cached content + compressedFileName := tmpFileNameWithPrefix("abc0009") + err := archiver.Compress(compressedFileName, tempDirBase) + assert.NoError(t, err) + + // Overwrite with different content to simulate existing file + assert.NoError(t, ioutil.WriteFile(tempFile1.Name(), originalContent, 0600)) + + // Decompress with default archiver (no IgnoreCollisions) - should overwrite + _, err = archiver.Decompress(compressedFileName) + assert.NoError(t, err) + + // Verify file was overwritten with cached content + content, err := ioutil.ReadFile(tempFile1.Name()) + assert.NoError(t, err) + assert.Equal(t, cachedContent, content) + + assert.NoError(t, os.RemoveAll(tempDirBase)) + assert.NoError(t, os.Remove(compressedFileName)) + }) + + t.Run(archiverType+" skips existing symlinks with ignore collisions", func(t *testing.T) { + if archiverType == "shell-out" { + t.Skip("shell-out archiver delegates symlink handling to tar") + } + + cwd, _ := os.Getwd() + tempDir, _ := ioutil.TempDir(cwd, "*") + tempFile1, _ := ioutil.TempFile(tempDir, "*") + _ = tempFile1.Close() + + symlinkName := tempFile1.Name() + "-link" + assert.NoError(t, os.Symlink(tempFile1.Name(), symlinkName)) + + tempDirBase := filepath.Base(tempDir) + + // Create archive containing the symlink + compressedFileName := tmpFileNameWithPrefix("abc0010") + err := archiver.Compress(compressedFileName, tempDirBase) + assert.NoError(t, err) + + // Change the symlink target to a different file + altTarget := tempFile1.Name() + "-alt" + assert.NoError(t, ioutil.WriteFile(altTarget, []byte("alt"), 0600)) + assert.NoError(t, os.Remove(symlinkName)) + assert.NoError(t, os.Symlink(altTarget, symlinkName)) + + // Decompress with IgnoreCollisions - symlink should not be overwritten + metricsManager := metrics.NewNoOpMetricsManager() + opts := ArchiverOptions{IgnoreCollisions: true} + var skipArchiver Archiver + switch archiverType { + case "native": + skipArchiver = NewNativeArchiverWithOptions(metricsManager, false, opts) + case "native-parallel": + skipArchiver = NewNativeArchiverWithOptions(metricsManager, true, opts) + } + + _, err = skipArchiver.Decompress(compressedFileName) + assert.NoError(t, err) + + // Verify symlink still points to the alt target (was not overwritten) + target, err := os.Readlink(symlinkName) + assert.NoError(t, err) + assert.Equal(t, altTarget, target) + + assert.NoError(t, os.RemoveAll(tempDirBase)) + assert.NoError(t, os.Remove(compressedFileName)) + }) + + t.Run(archiverType+" skips existing files without error", func(t *testing.T) { + cwd, _ := os.Getwd() + tempDir, _ := ioutil.TempDir(cwd, "*") + tempFile1, _ := ioutil.TempFile(tempDir, "*") + tempFile2, _ := ioutil.TempFile(tempDir, "*") + + // Write content to both files + originalContent := []byte("original content") + cachedContent := []byte("cached content") + _, _ = tempFile1.Write(originalContent) + _, _ = tempFile2.Write(cachedContent) + _ = tempFile1.Close() + _ = tempFile2.Close() + + tempDirBase := filepath.Base(tempDir) + + // Create archive with both files + compressedFileName := tmpFileNameWithPrefix("abc0008") + err := archiver.Compress(compressedFileName, tempDirBase) + assert.NoError(t, err) + + // Delete only tempFile2, keep tempFile1 to simulate existing file + assert.NoError(t, os.Remove(tempFile2.Name())) + + // Create an archiver with IgnoreCollisions enabled for decompression + metricsManager := metrics.NewNoOpMetricsManager() + opts := ArchiverOptions{IgnoreCollisions: true} + var skipArchiver Archiver + switch archiverType { + case "shell-out": + skipArchiver = NewShellOutArchiverWithOptions(metricsManager, opts) + case "native": + skipArchiver = NewNativeArchiverWithOptions(metricsManager, false, opts) + case "native-parallel": + skipArchiver = NewNativeArchiverWithOptions(metricsManager, true, opts) + } + + // Decompress - should skip tempFile1 (already exists) and restore tempFile2 + unpackedAt, err := skipArchiver.Decompress(compressedFileName) + assert.NoError(t, err) + assert.Equal(t, tempDirBase+string(os.PathSeparator), unpackedAt) + + // Verify tempFile1 still has original content (was not overwritten) + content1, err := ioutil.ReadFile(tempFile1.Name()) + assert.NoError(t, err) + assert.Equal(t, originalContent, content1) + + // Verify tempFile2 was restored with correct content + content2, err := ioutil.ReadFile(tempFile2.Name()) + assert.NoError(t, err) + assert.Equal(t, cachedContent, content2) + + assert.NoError(t, os.RemoveAll(tempDirBase)) + assert.NoError(t, os.Remove(compressedFileName)) + }) }) } diff --git a/cache-cli/pkg/archive/native_archiver.go b/cache-cli/pkg/archive/native_archiver.go index c380dce8..7f331b1a 100644 --- a/cache-cli/pkg/archive/native_archiver.go +++ b/cache-cli/pkg/archive/native_archiver.go @@ -17,8 +17,9 @@ import ( ) type NativeArchiver struct { - MetricsManager metrics.MetricsManager - UseParallelism bool + MetricsManager metrics.MetricsManager + UseParallelism bool + IgnoreCollisions bool } func NewNativeArchiver(metricsManager metrics.MetricsManager, useParallelism bool) *NativeArchiver { @@ -28,6 +29,14 @@ func NewNativeArchiver(metricsManager metrics.MetricsManager, useParallelism boo } } +func NewNativeArchiverWithOptions(metricsManager metrics.MetricsManager, useParallelism bool, opts ArchiverOptions) *NativeArchiver { + return &NativeArchiver{ + MetricsManager: metricsManager, + UseParallelism: useParallelism, + IgnoreCollisions: opts.IgnoreCollisions, + } +} + func (a *NativeArchiver) Compress(dst, src string) error { if _, err := os.Stat(src); err != nil { return fmt.Errorf("error finding '%s': %v", src, err) @@ -186,9 +195,12 @@ func (a *NativeArchiver) Decompress(src string) (string, error) { } case tar.TypeSymlink: - // we have to remove the symlink first, if it exists. - // Otherwise os.Symlink will complain. + // If the symlink already exists, either skip it (IgnoreCollisions) + // or remove it before recreating (os.Symlink requires no existing file). if _, err := os.Lstat(header.Name); err == nil { + if a.IgnoreCollisions { + continue + } _ = os.Remove(header.Name) } @@ -206,6 +218,18 @@ func (a *NativeArchiver) Decompress(src string) (string, error) { continue } + // nil outFile means the file should be skipped (e.g., IgnoreCollisions is enabled) + if outFile == nil { + // The tar reader is sequential; we must consume this entry's bytes + // before advancing to the next header. + // #nosec + if _, err := io.Copy(io.Discard, tarReader); err != nil { + log.Errorf("Error draining tar entry for '%s' (skipped due to existing file): %v", header.Name, err) + hadError = true + } + continue + } + // #nosec _, err = io.Copy(outFile, tarReader) if err != nil { @@ -243,6 +267,9 @@ func (a *NativeArchiver) Decompress(src string) (string, error) { return restorationPath, nil } +// openFile attempts to open a file for writing during decompression, or signals +// that the file should be skipped by returning (nil, nil) when IgnoreCollisions +// is true and the file already exists. func (a *NativeArchiver) openFile(header *tar.Header, tarReader *tar.Reader) (*os.File, error) { outFile, err := os.OpenFile(header.Name, os.O_RDWR|os.O_CREATE|os.O_EXCL, header.FileInfo().Mode()) @@ -252,8 +279,12 @@ func (a *NativeArchiver) openFile(header *tar.Header, tarReader *tar.Reader) (*o } // Since we are using O_EXCL, this error could mean that the file already exists. - // If that is the case, we attempt to remove it before attempting to open it again. if errors.Is(err, os.ErrExist) { + // If IgnoreCollisions is enabled, skip this file silently. + if a.IgnoreCollisions { + return nil, nil + } + // Otherwise, attempt to remove it before opening again. if err := os.Remove(header.Name); err != nil { return nil, fmt.Errorf("file '%s' already exists and can't be removed: %v", header.Name, err) } diff --git a/cache-cli/pkg/archive/shell_out_archiver.go b/cache-cli/pkg/archive/shell_out_archiver.go index 00c0e2e4..efa440ff 100644 --- a/cache-cli/pkg/archive/shell_out_archiver.go +++ b/cache-cli/pkg/archive/shell_out_archiver.go @@ -8,19 +8,29 @@ import ( "os" "os/exec" "path/filepath" + "strings" + "sync" "github.com/semaphoreci/toolbox/cache-cli/pkg/metrics" log "github.com/sirupsen/logrus" ) type ShellOutArchiver struct { - metricsManager metrics.MetricsManager + metricsManager metrics.MetricsManager + ignoreCollisions bool } func NewShellOutArchiver(metricsManager metrics.MetricsManager) *ShellOutArchiver { return &ShellOutArchiver{metricsManager: metricsManager} } +func NewShellOutArchiverWithOptions(metricsManager metrics.MetricsManager, opts ArchiverOptions) *ShellOutArchiver { + return &ShellOutArchiver{ + metricsManager: metricsManager, + ignoreCollisions: opts.IgnoreCollisions, + } +} + func (a *ShellOutArchiver) Compress(dst, src string) error { if _, err := os.Stat(src); err != nil { return fmt.Errorf("error finding '%s': %v", src, err) @@ -66,14 +76,52 @@ func (a *ShellOutArchiver) compressionCommand(dst, src string) *exec.Cmd { return exec.Command("tar", "czf", dst, src) } +// decompressionCmd builds the tar extraction command. +// When ignoreCollisions is enabled, GNU tar uses --skip-old-files (silently skips, exit 0), +// while BSD tar uses -k (skips but may return non-zero on some systems). func (a *ShellOutArchiver) decompressionCmd(dst, tempFile string) *exec.Cmd { if filepath.IsAbs(dst) { + if a.ignoreCollisions { + if isGNUTar() { + return exec.Command("tar", "xzPf", tempFile, "-C", ".", "--skip-old-files") + } + return exec.Command("tar", "xzPf", tempFile, "-C", ".", "-k") + } return exec.Command("tar", "xzPf", tempFile, "-C", ".") } + if a.ignoreCollisions { + if isGNUTar() { + return exec.Command("tar", "xzf", tempFile, "-C", ".", "--skip-old-files") + } + return exec.Command("tar", "xzf", tempFile, "-C", ".", "-k") + } return exec.Command("tar", "xzf", tempFile, "-C", ".") } +var ( + gnuTarOnce sync.Once + gnuTarCached bool +) + +// isGNUTar returns true if the system tar is GNU tar. +// GNU tar includes "GNU tar" in its --version output. +// The result is cached to avoid repeated subprocess calls. +// If tar --version fails, it defaults to false (assumes BSD tar). +func isGNUTar() bool { + gnuTarOnce.Do(func() { + cmd := exec.Command("tar", "--version") + output, err := cmd.Output() + if err != nil { + log.Warnf("Could not determine tar version, assuming BSD tar: %v", err) + gnuTarCached = false + return + } + gnuTarCached = strings.Contains(string(output), "GNU tar") + }) + return gnuTarCached +} + func (a *ShellOutArchiver) findRestorationPath(src string) (string, error) { // #nosec file, err := os.Open(src)