Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion cache-cli/cmd/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ import (
"github.com/spf13/cobra"
)

var ignoreCollisions bool

var restoreCmd = &cobra.Command{
Use: "restore [keys]",
Short: "Restore keys from the cache.",
Expand All @@ -39,7 +41,9 @@ func RunRestore(cmd *cobra.Command, args []string) {
metricsManager, err := metrics.InitMetricsManager(metrics.LocalBackend)
utils.Check(err)

archiver := archive.NewArchiver(metricsManager)
archiver := archive.NewArchiverWithOptions(metricsManager, archive.ArchiverOptions{
IgnoreCollisions: ignoreCollisions,
})

if len(args) == 0 {
lookupResults := files.Lookup(files.LookupOptions{
Expand Down Expand Up @@ -165,5 +169,6 @@ func publishMetrics(metricsManager metrics.MetricsManager, fileInfo fs.FileInfo,
}

func init() {
restoreCmd.Flags().BoolVar(&ignoreCollisions, "ignore-collisions", false, "Silently ignore file collisions, keeping existing files")
RootCmd.AddCommand(restoreCmd)
}
16 changes: 13 additions & 3 deletions cache-cli/pkg/archive/archiver.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,24 @@ type Archiver interface {
Decompress(src string) (string, error)
}

// ArchiverOptions configures optional behaviors for archive decompression.
type ArchiverOptions struct {
// IgnoreCollisions skips extracting files that already exist on disk.
IgnoreCollisions bool
}

func NewArchiver(metricsManager metrics.MetricsManager) Archiver {
return NewArchiverWithOptions(metricsManager, ArchiverOptions{})
}

func NewArchiverWithOptions(metricsManager metrics.MetricsManager, opts ArchiverOptions) Archiver {
method := os.Getenv("SEMAPHORE_CACHE_ARCHIVE_METHOD")
switch method {
case "native":
return NewNativeArchiver(metricsManager, false)
return NewNativeArchiverWithOptions(metricsManager, false, opts)
case "native-parallel":
return NewNativeArchiver(metricsManager, true)
return NewNativeArchiverWithOptions(metricsManager, true, opts)
default:
return NewShellOutArchiver(metricsManager)
return NewShellOutArchiverWithOptions(metricsManager, opts)
}
}
138 changes: 138 additions & 0 deletions cache-cli/pkg/archive/archiver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,144 @@ func Test__Compress(t *testing.T) {
assert.NoError(t, os.Remove(tempFile.Name()))
assert.NoError(t, os.Remove(compressedFileName))
})

t.Run(archiverType+" overwrites existing files by default", func(t *testing.T) {
cwd, _ := os.Getwd()
tempDir, _ := ioutil.TempDir(cwd, "*")
tempFile1, _ := ioutil.TempFile(tempDir, "*")

originalContent := []byte("original content")
cachedContent := []byte("cached content")
_, _ = tempFile1.Write(cachedContent)
_ = tempFile1.Close()

tempDirBase := filepath.Base(tempDir)

// Create archive with cached content
compressedFileName := tmpFileNameWithPrefix("abc0009")
err := archiver.Compress(compressedFileName, tempDirBase)
assert.NoError(t, err)

// Overwrite with different content to simulate existing file
assert.NoError(t, ioutil.WriteFile(tempFile1.Name(), originalContent, 0600))

// Decompress with default archiver (no IgnoreCollisions) - should overwrite
_, err = archiver.Decompress(compressedFileName)
assert.NoError(t, err)

// Verify file was overwritten with cached content
content, err := ioutil.ReadFile(tempFile1.Name())
assert.NoError(t, err)
assert.Equal(t, cachedContent, content)

assert.NoError(t, os.RemoveAll(tempDirBase))
assert.NoError(t, os.Remove(compressedFileName))
})

t.Run(archiverType+" skips existing symlinks with ignore collisions", func(t *testing.T) {
if archiverType == "shell-out" {
t.Skip("shell-out archiver delegates symlink handling to tar")
}

cwd, _ := os.Getwd()
tempDir, _ := ioutil.TempDir(cwd, "*")
tempFile1, _ := ioutil.TempFile(tempDir, "*")
_ = tempFile1.Close()

symlinkName := tempFile1.Name() + "-link"
assert.NoError(t, os.Symlink(tempFile1.Name(), symlinkName))

tempDirBase := filepath.Base(tempDir)

// Create archive containing the symlink
compressedFileName := tmpFileNameWithPrefix("abc0010")
err := archiver.Compress(compressedFileName, tempDirBase)
assert.NoError(t, err)

// Change the symlink target to a different file
altTarget := tempFile1.Name() + "-alt"
assert.NoError(t, ioutil.WriteFile(altTarget, []byte("alt"), 0600))
assert.NoError(t, os.Remove(symlinkName))
assert.NoError(t, os.Symlink(altTarget, symlinkName))

// Decompress with IgnoreCollisions - symlink should not be overwritten
metricsManager := metrics.NewNoOpMetricsManager()
opts := ArchiverOptions{IgnoreCollisions: true}
var skipArchiver Archiver
switch archiverType {
case "native":
skipArchiver = NewNativeArchiverWithOptions(metricsManager, false, opts)
case "native-parallel":
skipArchiver = NewNativeArchiverWithOptions(metricsManager, true, opts)
}

_, err = skipArchiver.Decompress(compressedFileName)
assert.NoError(t, err)

// Verify symlink still points to the alt target (was not overwritten)
target, err := os.Readlink(symlinkName)
assert.NoError(t, err)
assert.Equal(t, altTarget, target)

assert.NoError(t, os.RemoveAll(tempDirBase))
assert.NoError(t, os.Remove(compressedFileName))
})

t.Run(archiverType+" skips existing files without error", func(t *testing.T) {
cwd, _ := os.Getwd()
tempDir, _ := ioutil.TempDir(cwd, "*")
tempFile1, _ := ioutil.TempFile(tempDir, "*")
tempFile2, _ := ioutil.TempFile(tempDir, "*")

// Write content to both files
originalContent := []byte("original content")
cachedContent := []byte("cached content")
_, _ = tempFile1.Write(originalContent)
_, _ = tempFile2.Write(cachedContent)
_ = tempFile1.Close()
_ = tempFile2.Close()

tempDirBase := filepath.Base(tempDir)

// Create archive with both files
compressedFileName := tmpFileNameWithPrefix("abc0008")
err := archiver.Compress(compressedFileName, tempDirBase)
assert.NoError(t, err)

// Delete only tempFile2, keep tempFile1 to simulate existing file
assert.NoError(t, os.Remove(tempFile2.Name()))

// Create an archiver with IgnoreCollisions enabled for decompression
metricsManager := metrics.NewNoOpMetricsManager()
opts := ArchiverOptions{IgnoreCollisions: true}
var skipArchiver Archiver
switch archiverType {
case "shell-out":
skipArchiver = NewShellOutArchiverWithOptions(metricsManager, opts)
case "native":
skipArchiver = NewNativeArchiverWithOptions(metricsManager, false, opts)
case "native-parallel":
skipArchiver = NewNativeArchiverWithOptions(metricsManager, true, opts)
}

// Decompress - should skip tempFile1 (already exists) and restore tempFile2
unpackedAt, err := skipArchiver.Decompress(compressedFileName)
assert.NoError(t, err)
assert.Equal(t, tempDirBase+string(os.PathSeparator), unpackedAt)

// Verify tempFile1 still has original content (was not overwritten)
content1, err := ioutil.ReadFile(tempFile1.Name())
assert.NoError(t, err)
assert.Equal(t, originalContent, content1)

// Verify tempFile2 was restored with correct content
content2, err := ioutil.ReadFile(tempFile2.Name())
assert.NoError(t, err)
assert.Equal(t, cachedContent, content2)

assert.NoError(t, os.RemoveAll(tempDirBase))
assert.NoError(t, os.Remove(compressedFileName))
})
})
}

Expand Down
41 changes: 36 additions & 5 deletions cache-cli/pkg/archive/native_archiver.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ import (
)

type NativeArchiver struct {
MetricsManager metrics.MetricsManager
UseParallelism bool
MetricsManager metrics.MetricsManager
UseParallelism bool
IgnoreCollisions bool
}

func NewNativeArchiver(metricsManager metrics.MetricsManager, useParallelism bool) *NativeArchiver {
Expand All @@ -28,6 +29,14 @@ func NewNativeArchiver(metricsManager metrics.MetricsManager, useParallelism boo
}
}

func NewNativeArchiverWithOptions(metricsManager metrics.MetricsManager, useParallelism bool, opts ArchiverOptions) *NativeArchiver {
return &NativeArchiver{
MetricsManager: metricsManager,
UseParallelism: useParallelism,
IgnoreCollisions: opts.IgnoreCollisions,
}
}

func (a *NativeArchiver) Compress(dst, src string) error {
if _, err := os.Stat(src); err != nil {
return fmt.Errorf("error finding '%s': %v", src, err)
Expand Down Expand Up @@ -186,9 +195,12 @@ func (a *NativeArchiver) Decompress(src string) (string, error) {
}

case tar.TypeSymlink:
// we have to remove the symlink first, if it exists.
// Otherwise os.Symlink will complain.
// If the symlink already exists, either skip it (IgnoreCollisions)
// or remove it before recreating (os.Symlink requires no existing file).
if _, err := os.Lstat(header.Name); err == nil {
if a.IgnoreCollisions {
continue
}
_ = os.Remove(header.Name)
}

Expand All @@ -206,6 +218,18 @@ func (a *NativeArchiver) Decompress(src string) (string, error) {
continue
}

// nil outFile means the file should be skipped (e.g., IgnoreCollisions is enabled)
if outFile == nil {
// The tar reader is sequential; we must consume this entry's bytes
// before advancing to the next header.
// #nosec
if _, err := io.Copy(io.Discard, tarReader); err != nil {
log.Errorf("Error draining tar entry for '%s' (skipped due to existing file): %v", header.Name, err)
hadError = true
}
continue
}

// #nosec
_, err = io.Copy(outFile, tarReader)
if err != nil {
Expand Down Expand Up @@ -243,6 +267,9 @@ func (a *NativeArchiver) Decompress(src string) (string, error) {
return restorationPath, nil
}

// openFile attempts to open a file for writing during decompression, or signals
// that the file should be skipped by returning (nil, nil) when IgnoreCollisions
// is true and the file already exists.
func (a *NativeArchiver) openFile(header *tar.Header, tarReader *tar.Reader) (*os.File, error) {
outFile, err := os.OpenFile(header.Name, os.O_RDWR|os.O_CREATE|os.O_EXCL, header.FileInfo().Mode())

Expand All @@ -252,8 +279,12 @@ func (a *NativeArchiver) openFile(header *tar.Header, tarReader *tar.Reader) (*o
}

// Since we are using O_EXCL, this error could mean that the file already exists.
// If that is the case, we attempt to remove it before attempting to open it again.
if errors.Is(err, os.ErrExist) {
// If IgnoreCollisions is enabled, skip this file silently.
if a.IgnoreCollisions {
return nil, nil
}
// Otherwise, attempt to remove it before opening again.
if err := os.Remove(header.Name); err != nil {
return nil, fmt.Errorf("file '%s' already exists and can't be removed: %v", header.Name, err)
}
Expand Down
50 changes: 49 additions & 1 deletion cache-cli/pkg/archive/shell_out_archiver.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,29 @@ import (
"os"
"os/exec"
"path/filepath"
"strings"
"sync"

"github.com/semaphoreci/toolbox/cache-cli/pkg/metrics"
log "github.com/sirupsen/logrus"
)

type ShellOutArchiver struct {
metricsManager metrics.MetricsManager
metricsManager metrics.MetricsManager
ignoreCollisions bool
}

func NewShellOutArchiver(metricsManager metrics.MetricsManager) *ShellOutArchiver {
return &ShellOutArchiver{metricsManager: metricsManager}
}

func NewShellOutArchiverWithOptions(metricsManager metrics.MetricsManager, opts ArchiverOptions) *ShellOutArchiver {
return &ShellOutArchiver{
metricsManager: metricsManager,
ignoreCollisions: opts.IgnoreCollisions,
}
}

func (a *ShellOutArchiver) Compress(dst, src string) error {
if _, err := os.Stat(src); err != nil {
return fmt.Errorf("error finding '%s': %v", src, err)
Expand Down Expand Up @@ -66,14 +76,52 @@ func (a *ShellOutArchiver) compressionCommand(dst, src string) *exec.Cmd {
return exec.Command("tar", "czf", dst, src)
}

// decompressionCmd builds the tar extraction command.
// When ignoreCollisions is enabled, GNU tar uses --skip-old-files (silently skips, exit 0),
// while BSD tar uses -k (skips but may return non-zero on some systems).
func (a *ShellOutArchiver) decompressionCmd(dst, tempFile string) *exec.Cmd {
if filepath.IsAbs(dst) {
if a.ignoreCollisions {
if isGNUTar() {
return exec.Command("tar", "xzPf", tempFile, "-C", ".", "--skip-old-files")
}
return exec.Command("tar", "xzPf", tempFile, "-C", ".", "-k")
}
return exec.Command("tar", "xzPf", tempFile, "-C", ".")
}

if a.ignoreCollisions {
if isGNUTar() {
return exec.Command("tar", "xzf", tempFile, "-C", ".", "--skip-old-files")
}
return exec.Command("tar", "xzf", tempFile, "-C", ".", "-k")
}
return exec.Command("tar", "xzf", tempFile, "-C", ".")
}

var (
gnuTarOnce sync.Once
gnuTarCached bool
)

// isGNUTar returns true if the system tar is GNU tar.
// GNU tar includes "GNU tar" in its --version output.
// The result is cached to avoid repeated subprocess calls.
// If tar --version fails, it defaults to false (assumes BSD tar).
func isGNUTar() bool {
gnuTarOnce.Do(func() {
cmd := exec.Command("tar", "--version")
output, err := cmd.Output()
if err != nil {
log.Warnf("Could not determine tar version, assuming BSD tar: %v", err)
gnuTarCached = false
return
}
gnuTarCached = strings.Contains(string(output), "GNU tar")
})
return gnuTarCached
}

func (a *ShellOutArchiver) findRestorationPath(src string) (string, error) {
// #nosec
file, err := os.Open(src)
Expand Down