From 5d32670b2e601ee5233a77f59cdd8e5229972bcc Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 27 Oct 2025 16:08:30 +0500 Subject: [PATCH 01/14] Drop gerrors in runner.go --- runner/internal/shim/runner.go | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/runner/internal/shim/runner.go b/runner/internal/shim/runner.go index 209d4b47fc..0191e33edf 100644 --- a/runner/internal/shim/runner.go +++ b/runner/internal/shim/runner.go @@ -3,6 +3,7 @@ package shim import ( "context" "errors" + "fmt" "io" "net/http" "os" @@ -11,7 +12,6 @@ import ( "time" "github.com/dstackai/dstack/runner/consts" - "github.com/dstackai/dstack/runner/internal/gerrors" "github.com/dstackai/dstack/runner/internal/log" ) @@ -21,7 +21,7 @@ func (c *CLIArgs) DownloadRunner(ctx context.Context) error { } err := downloadRunner(ctx, c.Runner.DownloadURL, c.Runner.BinaryPath, false) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("download runner from %s: %w", c.Runner.DownloadURL, err) } return nil } @@ -48,7 +48,7 @@ func downloadRunner(ctx context.Context, url string, path string, force bool) er } tempFile, err := os.CreateTemp(filepath.Dir(path), "dstack-runner") if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("create temp file for runner: %w", err) } defer func() { err := tempFile.Close() @@ -63,13 +63,14 @@ func downloadRunner(ctx context.Context, url string, path string, force bool) er req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("create download request: %w", err) } resp, err := http.DefaultClient.Do(req) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("execute download request: %w", err) } + defer func() { err := resp.Body.Close() if err != nil { @@ -78,12 +79,12 @@ func downloadRunner(ctx context.Context, url string, path string, force bool) er }() if resp.StatusCode != http.StatusOK { - return gerrors.Newf("unexpected status code: %s", resp.Status) + return fmt.Errorf("unexpected status code %s downloading runner from %s", resp.Status, url) } written, err := io.Copy(tempFile, resp.Body) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("copy runner binary: %w", err) } select { @@ -91,18 +92,18 @@ func downloadRunner(ctx context.Context, url string, path string, force bool) er err := ctx.Err() if errors.Is(err, context.DeadlineExceeded) { log.Error(ctx, "downloadRunner error", "err", err, "bytes", written, "total", resp.ContentLength) - return gerrors.Newf("Cannot download runner %w", err) + return fmt.Errorf("download runner timeout after %d/%d bytes: %w", written, resp.ContentLength, err) } default: log.Info(ctx, "the runner was downloaded successfully", "bytes", written) } if err := tempFile.Chmod(0o755); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("chmod runner binary: %w", err) } if err := os.Rename(tempFile.Name(), path); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("move runner binary to %s: %w", path, err) } return nil From 8e3691bd0f0d3ed46793002053b7b8bb809419a1 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 27 Oct 2025 16:15:44 +0500 Subject: [PATCH 02/14] Drop gerrors in http.go --- runner/internal/runner/api/http.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/runner/internal/runner/api/http.go b/runner/internal/runner/api/http.go index b4a0032e4c..1236b0af71 100644 --- a/runner/internal/runner/api/http.go +++ b/runner/internal/runner/api/http.go @@ -15,7 +15,6 @@ import ( "github.com/dstackai/dstack/runner/internal/api" "github.com/dstackai/dstack/runner/internal/executor" - "github.com/dstackai/dstack/runner/internal/gerrors" "github.com/dstackai/dstack/runner/internal/log" "github.com/dstackai/dstack/runner/internal/metrics" "github.com/dstackai/dstack/runner/internal/schemas" @@ -77,7 +76,7 @@ func (s *Server) uploadArchivePostHandler(w http.ResponseWriter, r *http.Request } mediaType, params, err := mime.ParseMediaType(contentType) if err != nil { - return nil, gerrors.Wrap(err) + return nil, fmt.Errorf("parse request content-type: %w", err) } if mediaType != "multipart/form-data" { return nil, &api.Error{Status: http.StatusBadRequest, Msg: fmt.Sprintf("multipart/form-data expected, got %s", mediaType)} @@ -93,7 +92,7 @@ func (s *Server) uploadArchivePostHandler(w http.ResponseWriter, r *http.Request return nil, &api.Error{Status: http.StatusBadRequest, Msg: "empty form"} } if err != nil { - return nil, gerrors.Wrap(err) + return nil, fmt.Errorf("read multipart form: %w", err) } fieldName := part.FormName() if fieldName == "" { @@ -107,7 +106,7 @@ func (s *Server) uploadArchivePostHandler(w http.ResponseWriter, r *http.Request return nil, &api.Error{Status: http.StatusBadRequest, Msg: "missing file name"} } if err := s.executor.AddFileArchive(archiveId, part); err != nil { - return nil, gerrors.Wrap(err) + return nil, fmt.Errorf("add file archive: %w", err) } if _, err := formReader.NextPart(); !errors.Is(err, io.EOF) { return nil, &api.Error{Status: http.StatusBadRequest, Msg: "extra form field(s)"} @@ -127,14 +126,14 @@ func (s *Server) uploadCodePostHandler(w http.ResponseWriter, r *http.Request) ( codePath := filepath.Join(s.tempDir, "code") // todo random name? file, err := os.Create(codePath) if err != nil { - return nil, gerrors.Wrap(err) + return nil, fmt.Errorf("create code file: %w", err) } defer func() { _ = file.Close() }() if _, err = io.Copy(file, r.Body); err != nil { if err.Error() == "http: request body too large" { return nil, &api.Error{Status: http.StatusRequestEntityTooLarge} } - return nil, gerrors.Wrap(err) + return nil, fmt.Errorf("copy request body: %w", err) } s.executor.SetCodePath(codePath) From d5242e67914da23a230cb946433a29955c60cee3 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 27 Oct 2025 16:19:33 +0500 Subject: [PATCH 03/14] Drop gerrors in repo/manager.go --- runner/internal/repo/manager.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/runner/internal/repo/manager.go b/runner/internal/repo/manager.go index 75afea5ec6..6339207c57 100644 --- a/runner/internal/repo/manager.go +++ b/runner/internal/repo/manager.go @@ -4,7 +4,6 @@ import ( "context" "fmt" - "github.com/dstackai/dstack/runner/internal/gerrors" "github.com/dstackai/dstack/runner/internal/log" "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" @@ -67,12 +66,12 @@ func (m *Manager) Checkout() error { log.Info(m.ctx, "git checkout", "auth", fmt.Sprintf("%T", (&m.clo).Auth)) ref, err := git.PlainClone(m.localPath, false, &m.clo) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("clone repo: %w", err) } if ref != nil { branchRef, err := ref.Reference(m.clo.ReferenceName, true) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("get branch reference: %w", err) } var cho git.CheckoutOptions if m.hash == "" || m.hash == branchRef.Hash().String() { @@ -82,11 +81,11 @@ func (m *Manager) Checkout() error { } workTree, err := ref.Worktree() if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("get worktree: %w", err) } err = workTree.Checkout(&cho) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("checkout: %w", err) } } else { log.Warning(m.ctx, "git clone ref==nil") @@ -102,16 +101,16 @@ func (m *Manager) URL() string { func (m *Manager) SetConfig(name, email string) error { repo, err := git.PlainOpen(m.localPath) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("open repo: %w", err) } config, err := repo.Config() if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("get repo config: %w", err) } config.User.Name = name config.User.Email = email if err := repo.SetConfig(config); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("set repo config: %w", err) } return nil } From 615326aade1254c8045969bf36aebca9095e5074 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 27 Oct 2025 16:22:09 +0500 Subject: [PATCH 04/14] Drop gerrors in executor/files.go --- runner/internal/executor/files.go | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/runner/internal/executor/files.go b/runner/internal/executor/files.go index 3179e5290a..b866ce2d83 100644 --- a/runner/internal/executor/files.go +++ b/runner/internal/executor/files.go @@ -10,7 +10,6 @@ import ( "github.com/codeclysm/extract/v4" "github.com/dstackai/dstack/runner/internal/common" - "github.com/dstackai/dstack/runner/internal/gerrors" "github.com/dstackai/dstack/runner/internal/log" ) @@ -18,16 +17,16 @@ var renameRegex = regexp.MustCompile(`^([^/]*)(/|$)`) func (ex *RunExecutor) AddFileArchive(id string, src io.Reader) error { if err := os.MkdirAll(ex.archiveDir, 0o755); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("create archive directory: %w", err) } archivePath := path.Join(ex.archiveDir, id) archive, err := os.Create(archivePath) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("create archive file: %w", err) } defer func() { _ = archive.Close() }() if _, err = io.Copy(archive, src); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("copy archive data: %w", err) } return nil } @@ -38,14 +37,12 @@ func (ex *RunExecutor) setupFiles(ctx context.Context) error { for _, fa := range ex.jobSpec.FileArchives { archivePath := path.Join(ex.archiveDir, fa.Id) if err := extractFileArchive(ctx, archivePath, fa.Path, ex.jobWorkingDir, ex.jobUid, ex.jobGid, ex.jobHomeDir); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("extract file archive %s: %w", fa.Id, err) } } - if err := os.RemoveAll(ex.archiveDir); err != nil { log.Warning(ctx, "Failed to remove file archives dir", "path", ex.archiveDir, "err", err) } - return nil } @@ -54,11 +51,11 @@ func extractFileArchive(ctx context.Context, archivePath string, destPath string destPath, err := common.ExpandPath(destPath, baseDir, homeDir) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("expand destination path: %w", err) } destBase, destName := path.Split(destPath) if err := common.MkdirAll(ctx, destBase, uid, gid); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("create destination directory: %w", err) } if err := os.RemoveAll(destPath); err != nil { log.Warning(ctx, "Failed to remove", "path", destPath, "err", err) @@ -66,7 +63,7 @@ func extractFileArchive(ctx context.Context, archivePath string, destPath string archive, err := os.Open(archivePath) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("open archive file: %w", err) } defer archive.Close() @@ -78,7 +75,7 @@ func extractFileArchive(ctx context.Context, archivePath string, destPath string return s } if err := extract.Tar(ctx, archive, destBase, renameAndRemember); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("extract tar archive: %w", err) } if uid != -1 || gid != -1 { From 686818fa6182cab8df62e6f1518cecd61624da9a Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 27 Oct 2025 16:24:21 +0500 Subject: [PATCH 05/14] Drop more gerrors usages --- runner/internal/common/interpolator.go | 3 +-- runner/internal/log/log.go | 3 +-- runner/internal/runner/api/server.go | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/runner/internal/common/interpolator.go b/runner/internal/common/interpolator.go index 7331141810..84597df7fa 100644 --- a/runner/internal/common/interpolator.go +++ b/runner/internal/common/interpolator.go @@ -5,7 +5,6 @@ import ( "fmt" "strings" - "github.com/dstackai/dstack/runner/internal/gerrors" "github.com/dstackai/dstack/runner/internal/log" ) @@ -52,7 +51,7 @@ func (vi *VariablesInterpolator) Interpolate(ctx context.Context, s string) (str sb.WriteString(s[start:opening]) closing := IndexWithOffset(s, PatternClosing, opening) if closing == -1 { - return "", gerrors.Newf("no pattern closing: %s", s[opening:]) + return "", fmt.Errorf("no pattern closing: %s", s[opening:]) } name := strings.TrimSpace(s[opening+len(PatternOpening) : closing]) diff --git a/runner/internal/log/log.go b/runner/internal/log/log.go index 6ee1efa0a5..854ce9b883 100644 --- a/runner/internal/log/log.go +++ b/runner/internal/log/log.go @@ -6,7 +6,6 @@ import ( "io" "os" - "github.com/dstackai/dstack/runner/internal/gerrors" "github.com/sirupsen/logrus" ) @@ -107,7 +106,7 @@ func GetLogger(ctx context.Context) *logrus.Entry { func CreateAppendFile(path string) (*os.File, error) { f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0o644) if err != nil { - return nil, gerrors.Wrap(err) + return nil, fmt.Errorf("open file: %w", err) } return f, nil } diff --git a/runner/internal/runner/api/server.go b/runner/internal/runner/api/server.go index 968a037d39..be573cc663 100644 --- a/runner/internal/runner/api/server.go +++ b/runner/internal/runner/api/server.go @@ -11,7 +11,6 @@ import ( "github.com/dstackai/dstack/runner/internal/api" "github.com/dstackai/dstack/runner/internal/executor" - "github.com/dstackai/dstack/runner/internal/gerrors" "github.com/dstackai/dstack/runner/internal/log" ) @@ -85,7 +84,7 @@ func (s *Server) Run() error { case <-s.jobBarrierCh: // job started case <-time.After(s.submitWaitDuration): log.Error(context.TODO(), "Job didn't start in time, shutting down") - return gerrors.Newf("no job") + return errors.New("no job submitted") } // todo timeout on code and run From a7163c3ef3659978a72286fcdc3da5a60a511191 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 27 Oct 2025 16:29:34 +0500 Subject: [PATCH 06/14] Drop gerrors in executor/repo.go --- runner/internal/executor/repo.go | 47 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/runner/internal/executor/repo.go b/runner/internal/executor/repo.go index ac60405ed3..f633f839f6 100644 --- a/runner/internal/executor/repo.go +++ b/runner/internal/executor/repo.go @@ -10,7 +10,6 @@ import ( "github.com/codeclysm/extract/v4" "github.com/dstackai/dstack/runner/internal/common" - "github.com/dstackai/dstack/runner/internal/gerrors" "github.com/dstackai/dstack/runner/internal/log" "github.com/dstackai/dstack/runner/internal/repo" ) @@ -25,13 +24,13 @@ func (ex *RunExecutor) setupRepo(ctx context.Context) error { var err error ex.repoDir, err = common.ExpandPath(*ex.jobSpec.RepoDir, ex.jobWorkingDir, ex.jobHomeDir) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("expand repo dir path: %w", err) } log.Trace(ctx, "Job repo dir", "path", ex.repoDir) shouldCheckout, err := ex.shouldCheckout(ctx) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("check if checkout needed: %w", err) } if !shouldCheckout { log.Info(ctx, "skipping repo checkout: repo dir is not empty") @@ -41,32 +40,32 @@ func (ex *RunExecutor) setupRepo(ctx context.Context) error { // Currently, only needed for volumes mounted inside repo with lost+found present. tmpRepoDir, err := os.MkdirTemp(ex.tempDir, "repo_dir_copy") if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("create temp repo dir: %w", err) } defer func() { _ = os.RemoveAll(tmpRepoDir) }() err = ex.moveRepoDir(tmpRepoDir) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("move repo dir: %w", err) } defer func() { err_ := ex.restoreRepoDir(tmpRepoDir) if err == nil { - err = gerrors.Wrap(err_) + err = fmt.Errorf("restore repo dir: %w", err_) } }() switch ex.getRepoData().RepoType { case "remote": log.Trace(ctx, "Fetching git repository") if err := ex.prepareGit(ctx); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("prepare git repo: %w", err) } case "local", "virtual": log.Trace(ctx, "Extracting tar archive") if err := ex.prepareArchive(ctx); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("prepare archive: %w", err) } default: - return gerrors.Newf("unknown RepoType: %s", ex.getRepoData().RepoType) + return fmt.Errorf("unknown RepoType: %s", ex.getRepoData().RepoType) } return err } @@ -92,11 +91,11 @@ func (ex *RunExecutor) prepareGit(ctx context.Context) error { case "ssh": log.Trace(ctx, "Select SSH protocol") if ex.repoCredentials.PrivateKey == nil { - return gerrors.Newf("private key is empty") + return fmt.Errorf("private key is empty") } repoManager.WithSSHAuth(*ex.repoCredentials.PrivateKey, "") // we don't support passphrase default: - return gerrors.Newf("unsupported remote repo protocol: %s", ex.repoCredentials.GetProtocol()) + return fmt.Errorf("unsupported remote repo protocol: %s", ex.repoCredentials.GetProtocol()) } } else { log.Trace(ctx, "Credentials is empty") @@ -104,20 +103,20 @@ func (ex *RunExecutor) prepareGit(ctx context.Context) error { log.Trace(ctx, "Checking out remote repo", "GIT URL", repoManager.URL()) if err := repoManager.Checkout(); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("checkout repo: %w", err) } if err := repoManager.SetConfig(ex.getRepoData().RepoConfigName, ex.getRepoData().RepoConfigEmail); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("set repo config: %w", err) } log.Trace(ctx, "Applying diff") repoDiff, err := os.ReadFile(ex.codePath) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("read repo diff: %w", err) } if len(repoDiff) > 0 { if err := repo.ApplyDiff(ctx, ex.repoDir, string(repoDiff)); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("apply diff: %w", err) } } return nil @@ -126,12 +125,12 @@ func (ex *RunExecutor) prepareGit(ctx context.Context) error { func (ex *RunExecutor) prepareArchive(ctx context.Context) error { file, err := os.Open(ex.codePath) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("open code archive: %w", err) } defer func() { _ = file.Close() }() log.Trace(ctx, "Extracting code archive", "src", ex.codePath, "dst", ex.repoDir) if err := extract.Tar(ctx, file, ex.repoDir, nil); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("extract tar archive: %w", err) } return nil } @@ -142,19 +141,19 @@ func (ex *RunExecutor) shouldCheckout(ctx context.Context) (bool, error) { if err != nil { if os.IsNotExist(err) { if err = common.MkdirAll(ctx, ex.repoDir, ex.jobUid, ex.jobGid); err != nil { - return false, gerrors.Wrap(err) + return false, fmt.Errorf("create repo dir: %w", err) } // No repo dir - created a new one return true, nil } - return false, gerrors.Wrap(err) + return false, fmt.Errorf("stat repo dir: %w", err) } if !info.IsDir() { return false, fmt.Errorf("failed to set up repo dir: %s is not a dir", ex.repoDir) } entries, err := os.ReadDir(ex.repoDir) if err != nil { - return false, gerrors.Wrap(err) + return false, fmt.Errorf("read repo dir: %w", err) } if len(entries) == 0 { // Repo dir existed but was empty, e.g. a volume without repo @@ -173,14 +172,14 @@ func (ex *RunExecutor) shouldCheckout(ctx context.Context) (bool, error) { func (ex *RunExecutor) moveRepoDir(tmpDir string) error { if err := moveDir(ex.repoDir, tmpDir); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("move directory: %w", err) } return nil } func (ex *RunExecutor) restoreRepoDir(tmpDir string) error { if err := moveDir(tmpDir, ex.repoDir); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("move directory: %w", err) } return nil } @@ -193,12 +192,12 @@ func moveDir(srcDir, dstDir string) error { } entries, err := os.ReadDir(srcDir) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("read source directory: %w", err) } for _, entry := range entries { err := os.RemoveAll(filepath.Join(srcDir, entry.Name())) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("remove file from source: %w", err) } } return nil From af1cb37c08afb78dc1ec17e043068a12605d71be Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 27 Oct 2025 16:36:59 +0500 Subject: [PATCH 07/14] Drop gerrors in executor/executor.go --- runner/internal/executor/executor.go | 55 ++++++++++++++-------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/runner/internal/executor/executor.go b/runner/internal/executor/executor.go index 35503683b3..872cf16213 100644 --- a/runner/internal/executor/executor.go +++ b/runner/internal/executor/executor.go @@ -24,7 +24,6 @@ import ( "github.com/dstackai/dstack/runner/consts" "github.com/dstackai/dstack/runner/internal/common" "github.com/dstackai/dstack/runner/internal/connections" - "github.com/dstackai/dstack/runner/internal/gerrors" "github.com/dstackai/dstack/runner/internal/log" "github.com/dstackai/dstack/runner/internal/schemas" "github.com/dstackai/dstack/runner/internal/types" @@ -146,14 +145,14 @@ func (ex *RunExecutor) Run(ctx context.Context) (err error) { runnerLogFile, err := log.CreateAppendFile(filepath.Join(ex.tempDir, consts.RunnerLogFileName)) if err != nil { ex.SetJobState(ctx, types.JobStateFailed) - return gerrors.Wrap(err) + return fmt.Errorf("create runner log file: %w", err) } defer func() { _ = runnerLogFile.Close() }() jobLogFile, err := log.CreateAppendFile(filepath.Join(ex.tempDir, consts.RunnerJobLogFileName)) if err != nil { ex.SetJobState(ctx, types.JobStateFailed) - return gerrors.Wrap(err) + return fmt.Errorf("create job log file: %w", err) } defer func() { _ = jobLogFile.Close() }() @@ -162,7 +161,7 @@ func (ex *RunExecutor) Run(ctx context.Context) (err error) { if r := recover(); r != nil { log.Error(ctx, "Executor PANIC", "err", r) ex.SetJobState(ctx, types.JobStateFailed) - err = gerrors.Newf("recovered: %v", r) + err = fmt.Errorf("recovered: %v", r) } // no more logs will be written after this ex.mu.Lock() @@ -192,7 +191,7 @@ func (ex *RunExecutor) Run(ctx context.Context) (err error) { types.TerminationReasonExecutorError, fmt.Sprintf("Failed to fill in the job user fields (%s)", err), ) - return gerrors.Wrap(err) + return fmt.Errorf("fill user: %w", err) } ex.setJobCredentials(ctx) @@ -204,7 +203,7 @@ func (ex *RunExecutor) Run(ctx context.Context) (err error) { types.TerminationReasonExecutorError, fmt.Sprintf("Failed to set up the working dir (%s)", err), ) - return gerrors.Wrap(err) + return fmt.Errorf("prepare job working dir: %w", err) } if err := ex.setupRepo(ctx); err != nil { @@ -214,7 +213,7 @@ func (ex *RunExecutor) Run(ctx context.Context) (err error) { types.TerminationReasonContainerExitedWithError, fmt.Sprintf("Failed to set up the repo (%s)", err), ) - return gerrors.Wrap(err) + return fmt.Errorf("setup repo: %w", err) } if err := ex.setupFiles(ctx); err != nil { @@ -224,13 +223,13 @@ func (ex *RunExecutor) Run(ctx context.Context) (err error) { types.TerminationReasonExecutorError, fmt.Sprintf("Failed to set up files (%s)", err), ) - return gerrors.Wrap(err) + return fmt.Errorf("setup files: %w", err) } cleanupCredentials, err := ex.setupCredentials(ctx) if err != nil { ex.SetJobState(ctx, types.JobStateFailed) - return gerrors.Wrap(err) + return fmt.Errorf("setup credentials: %w", err) } defer cleanupCredentials() @@ -250,7 +249,7 @@ func (ex *RunExecutor) Run(ctx context.Context) (err error) { case <-ctx.Done(): log.Error(ctx, "Job canceled") ex.SetJobState(ctx, types.JobStateTerminated) - return gerrors.Wrap(err) + return fmt.Errorf("job canceled: %w", err) default: } @@ -263,7 +262,7 @@ func (ex *RunExecutor) Run(ctx context.Context) (err error) { types.TerminationReasonMaxDurationExceeded, "Max duration exceeded", ) - return gerrors.Wrap(err) + return fmt.Errorf("max duration exceeded: %w", err) default: } @@ -275,7 +274,7 @@ func (ex *RunExecutor) Run(ctx context.Context) (err error) { } else { ex.SetJobState(ctx, types.JobStateFailed) } - return gerrors.Wrap(err) + return fmt.Errorf("exec job failed: %w", err) } ex.SetJobStateWithExitStatus(ctx, types.JobStateDone, 0) @@ -358,7 +357,7 @@ func (ex *RunExecutor) prepareJobWorkingDir(ctx context.Context) error { if ex.jobSpec.WorkingDir == nil { ex.jobWorkingDir, err = os.Getwd() if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("get working directory: %w", err) } } else { // We still support relative paths, as 0.19.27 server uses relative paths when possible @@ -366,7 +365,7 @@ func (ex *RunExecutor) prepareJobWorkingDir(ctx context.Context) error { // Replace consts.LegacyRepoDir with "" eventually. ex.jobWorkingDir, err = common.ExpandPath(*ex.jobSpec.WorkingDir, consts.LegacyRepoDir, ex.jobHomeDir) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("expand working dir path: %w", err) } if !path.IsAbs(ex.jobWorkingDir) { return fmt.Errorf("working_dir must be absolute: %s", ex.jobWorkingDir) @@ -374,7 +373,7 @@ func (ex *RunExecutor) prepareJobWorkingDir(ctx context.Context) error { } log.Trace(ctx, "Job working dir", "path", ex.jobWorkingDir) if err := common.MkdirAll(ctx, ex.jobWorkingDir, ex.jobUid, ex.jobGid); err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("create working directory: %w", err) } return nil } @@ -423,7 +422,7 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error cmd := exec.CommandContext(ctx, ex.jobSpec.Commands[0], ex.jobSpec.Commands[1:]...) cmd.Cancel = func() error { // returns error on Windows - return gerrors.Wrap(cmd.Process.Signal(os.Interrupt)) + return fmt.Errorf("send interrupt signal: %w", cmd.Process.Signal(os.Interrupt)) } cmd.WaitDelay = ex.killDelay // kills the process if it doesn't exit in time @@ -538,7 +537,7 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error ptm, err := startCommand(cmd) if err != nil { - return gerrors.Wrap(err) + return fmt.Errorf("start command: %w", err) } defer func() { _ = ptm.Close() }() defer func() { _ = cmd.Wait() }() // release resources if copy fails @@ -548,9 +547,9 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error logger := io.MultiWriter(jobLogFile, ex.jobWsLogs, stripper) _, err = io.Copy(logger, ptm) if err != nil && !isPtyError(err) { - return gerrors.Wrap(err) + return fmt.Errorf("copy command output: %w", err) } - return gerrors.Wrap(cmd.Wait()) + return fmt.Errorf("wait for command: %w", cmd.Wait()) } func (ex *RunExecutor) setupCredentials(ctx context.Context) (func(), error) { @@ -560,18 +559,18 @@ func (ex *RunExecutor) setupCredentials(ctx context.Context) (func(), error) { switch ex.repoCredentials.GetProtocol() { case "ssh": if ex.repoCredentials.PrivateKey == nil { - return nil, gerrors.New("private key is missing") + return nil, fmt.Errorf("private key is missing") } keyPath := filepath.Join(ex.homeDir, ".ssh/id_rsa") if _, err := os.Stat(keyPath); err == nil { - return nil, gerrors.New("private key already exists") + return nil, fmt.Errorf("private key already exists") } if err := os.MkdirAll(filepath.Dir(keyPath), 0o700); err != nil { - return nil, gerrors.Wrap(err) + return nil, fmt.Errorf("create ssh directory: %w", err) } log.Info(ctx, "Writing private key", "path", keyPath) if err := os.WriteFile(keyPath, []byte(*ex.repoCredentials.PrivateKey), 0o600); err != nil { - return nil, gerrors.Wrap(err) + return nil, fmt.Errorf("write private key: %w", err) } return func() { log.Info(ctx, "Removing private key", "path", keyPath) @@ -583,26 +582,26 @@ func (ex *RunExecutor) setupCredentials(ctx context.Context) (func(), error) { } hostsPath := filepath.Join(ex.homeDir, ".config/gh/hosts.yml") if _, err := os.Stat(hostsPath); err == nil { - return nil, gerrors.New("hosts.yml file already exists") + return nil, fmt.Errorf("hosts.yml file already exists") } if err := os.MkdirAll(filepath.Dir(hostsPath), 0o700); err != nil { - return nil, gerrors.Wrap(err) + return nil, fmt.Errorf("create gh config directory: %w", err) } log.Info(ctx, "Writing OAuth token", "path", hostsPath) cloneURL, err := url.Parse(ex.repoCredentials.CloneURL) if err != nil { - return nil, gerrors.Wrap(err) + return nil, fmt.Errorf("parse clone URL: %w", err) } ghHost := fmt.Sprintf("%s:\n oauth_token: \"%s\"\n", cloneURL.Hostname(), *ex.repoCredentials.OAuthToken) if err := os.WriteFile(hostsPath, []byte(ghHost), 0o600); err != nil { - return nil, gerrors.Wrap(err) + return nil, fmt.Errorf("write OAuth token: %w", err) } return func() { log.Info(ctx, "Removing OAuth token", "path", hostsPath) _ = os.Remove(hostsPath) }, nil } - return nil, gerrors.Newf("unknown protocol %s", ex.repoCredentials.GetProtocol()) + return nil, fmt.Errorf("unknown protocol %s", ex.repoCredentials.GetProtocol()) } func isPtyError(err error) bool { From 4b47d74b4ed613911b2a4d43769633ea9b7cdfae Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 27 Oct 2025 16:37:34 +0500 Subject: [PATCH 08/14] Drop gerrors package --- runner/internal/gerrors/stacktrace.go | 72 --------------------------- 1 file changed, 72 deletions(-) delete mode 100644 runner/internal/gerrors/stacktrace.go diff --git a/runner/internal/gerrors/stacktrace.go b/runner/internal/gerrors/stacktrace.go deleted file mode 100644 index 102c7221fa..0000000000 --- a/runner/internal/gerrors/stacktrace.go +++ /dev/null @@ -1,72 +0,0 @@ -package gerrors - -import ( - "errors" - "fmt" - "path/filepath" - "runtime" - "strings" -) - -type withStack struct { - err error - pointFrame uintptr -} - -func (ws withStack) Error() string { - if ws.pointFrame == 0 { - return ws.err.Error() - } - - f := getFrame(ws.pointFrame) - if f.File == "" { - return "[unknown] " + ws.err.Error() - } - - _, file := filepath.Split(f.File) - l := fmt.Sprintf("%s:%d", file, f.Line) - if f.Function != "" { - idx := strings.LastIndex(f.Function, "/") - l += " " + f.Function[idx+1:] - } - return fmt.Sprintf("[%s] %s", l, ws.err) -} - -func (ws withStack) Unwrap() error { - return ws.err -} - -func New(s string) error { - return withStack{ - err: errors.New(s), - pointFrame: pointFrame(), - } -} - -func Newf(format string, a ...interface{}) error { - return withStack{ - err: fmt.Errorf(format, a...), - pointFrame: pointFrame(), - } -} - -func Wrap(err error) error { - if err == nil { - return nil - } - return withStack{ - err: err, - pointFrame: pointFrame(), - } -} - -func pointFrame() uintptr { - pc := make([]uintptr, 1) - runtime.Callers(3, pc) - return pc[0] -} - -func getFrame(pc uintptr) *runtime.Frame { - f, _ := runtime.CallersFrames([]uintptr{pc}).Next() - return &f -} From 364eab2797bfa9249473768b03894bc9a307c358 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 27 Oct 2025 16:52:23 +0500 Subject: [PATCH 09/14] Drop tracerr --- runner/cmd/runner/main.go | 11 ++-- runner/internal/shim/authorized_keys.go | 27 ++++----- runner/internal/shim/backends/gcp.go | 4 +- runner/internal/shim/docker.go | 79 ++++++++++++------------- 4 files changed, 58 insertions(+), 63 deletions(-) diff --git a/runner/cmd/runner/main.go b/runner/cmd/runner/main.go index 6ed8515687..4b37a086ad 100644 --- a/runner/cmd/runner/main.go +++ b/runner/cmd/runner/main.go @@ -12,7 +12,6 @@ import ( "github.com/dstackai/dstack/runner/internal/log" "github.com/dstackai/dstack/runner/internal/runner/api" "github.com/sirupsen/logrus" - "github.com/ztrue/tracerr" ) func main() { @@ -21,17 +20,17 @@ func main() { func start(tempDir string, homeDir string, httpPort int, sshPort int, logLevel int, version string) error { if err := os.MkdirAll(tempDir, 0o755); err != nil { - return tracerr.Errorf("Failed to create temp directory: %w", err) + return fmt.Errorf("create temp directory: %w", err) } defaultLogFile, err := log.CreateAppendFile(filepath.Join(tempDir, consts.RunnerDefaultLogFileName)) if err != nil { - return tracerr.Errorf("Failed to create default log file: %w", err) + return fmt.Errorf("create default log file: %w", err) } defer func() { err = defaultLogFile.Close() if err != nil { - tracerr.Print(err) + log.Error(context.TODO(), "Failed to close default log file", "err", err) } }() @@ -40,12 +39,12 @@ func start(tempDir string, homeDir string, httpPort int, sshPort int, logLevel i server, err := api.NewServer(tempDir, homeDir, fmt.Sprintf(":%d", httpPort), sshPort, version) if err != nil { - return tracerr.Errorf("Failed to create server: %w", err) + return fmt.Errorf("create server: %w", err) } log.Trace(context.TODO(), "Starting API server", "port", httpPort) if err := server.Run(); err != nil { - return tracerr.Errorf("Server failed: %w", err) + return fmt.Errorf("server failed: %w", err) } return nil diff --git a/runner/internal/shim/authorized_keys.go b/runner/internal/shim/authorized_keys.go index 1fdce75013..491172ce85 100644 --- a/runner/internal/shim/authorized_keys.go +++ b/runner/internal/shim/authorized_keys.go @@ -9,14 +9,13 @@ import ( "path/filepath" "slices" - "github.com/ztrue/tracerr" "golang.org/x/crypto/ssh" ) func PublicKeyFingerprint(key string) (string, error) { pk, _, _, _, err := ssh.ParseAuthorizedKey([]byte(key)) if err != nil { - return "", tracerr.Wrap(err) + return "", fmt.Errorf("parse authorized key: %w", err) } keyFingerprint := ssh.FingerprintSHA256(pk) return keyFingerprint, nil @@ -74,7 +73,7 @@ func (ak AuthorizedKeys) read(r io.Reader) ([]string, error) { lines = append(lines, text) } if err := scanner.Err(); err != nil { - return []string{}, tracerr.Wrap(err) + return []string{}, fmt.Errorf("scan authorized keys: %w", err) } return lines, nil } @@ -84,7 +83,7 @@ func (ak AuthorizedKeys) write(w io.Writer, lines []string) error { for _, line := range lines { _, err := fmt.Fprintln(wr, line) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("write line: %w", err) } } return wr.Flush() @@ -109,40 +108,40 @@ func (ak AuthorizedKeys) GetAuthorizedKeysPath() (string, error) { func (ak AuthorizedKeys) transformAuthorizedKeys(transform func([]string, []string) []string, publicKeys []string) error { authorizedKeysPath, err := ak.GetAuthorizedKeysPath() if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("get authorized keys path: %w", err) } info, err := os.Stat(authorizedKeysPath) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("stat authorized keys: %w", err) } fileMode := info.Mode().Perm() authorizedKeysFile, err := os.OpenFile(authorizedKeysPath, os.O_RDWR, fileMode) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("open authorized keys: %w", err) } defer authorizedKeysFile.Close() lines, err := ak.read(authorizedKeysFile) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("read authorized keys: %w", err) } // write backup authorizedKeysPath, err = ak.GetAuthorizedKeysPath() if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("get authorized keys path: %w", err) } authorizedKeysPathBackup := authorizedKeysPath + ".bak" authorizedKeysBackup, err := os.OpenFile(authorizedKeysPathBackup, os.O_RDWR|os.O_CREATE|os.O_TRUNC, fileMode) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("open authorized keys backup: %w", err) } defer authorizedKeysBackup.Close() if err := ak.write(authorizedKeysBackup, lines); err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("write authorized keys backup: %w", err) } // transform lines @@ -150,13 +149,13 @@ func (ak AuthorizedKeys) transformAuthorizedKeys(transform func([]string, []stri // write authorized_keys if err := authorizedKeysFile.Truncate(0); err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("truncate authorized keys: %w", err) } if _, err := authorizedKeysFile.Seek(0, 0); err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("seek authorized keys: %w", err) } if err := ak.write(authorizedKeysFile, newLines); err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("write authorized keys: %w", err) } return nil diff --git a/runner/internal/shim/backends/gcp.go b/runner/internal/shim/backends/gcp.go index b0724bd5b7..65632b2371 100644 --- a/runner/internal/shim/backends/gcp.go +++ b/runner/internal/shim/backends/gcp.go @@ -4,8 +4,6 @@ import ( "fmt" "os" "path/filepath" - - "github.com/ztrue/tracerr" ) type GCPBackend struct{} @@ -26,7 +24,7 @@ func (e *GCPBackend) GetRealDeviceName(volumeID, deviceName string) (string, err } realDeviceName, err = filepath.Abs(filepath.Join("/dev/disk/by-id/", realDeviceName)) if err != nil { - return "", tracerr.Wrap(err) + return "", fmt.Errorf("get device absolute path: %w", err) } return realDeviceName, nil } diff --git a/runner/internal/shim/docker.go b/runner/internal/shim/docker.go index 37c9d9adae..d566453a29 100644 --- a/runner/internal/shim/docker.go +++ b/runner/internal/shim/docker.go @@ -36,7 +36,6 @@ import ( "github.com/dstackai/dstack/runner/internal/shim/host" "github.com/dstackai/dstack/runner/internal/types" bytesize "github.com/inhies/go-bytesize" - "github.com/ztrue/tracerr" ) // TODO: Allow for configuration via cli arguments or environment variables. @@ -63,11 +62,11 @@ type DockerRunner struct { func NewDockerRunner(ctx context.Context, dockerParams DockerParameters) (*DockerRunner, error) { client, err := docker.NewClientWithOpts(docker.FromEnv, docker.WithAPIVersionNegotiation()) if err != nil { - return nil, tracerr.Wrap(err) + return nil, fmt.Errorf("create docker client: %w", err) } dockerInfo, err := client.Info(ctx) if err != nil { - return nil, tracerr.Wrap(err) + return nil, fmt.Errorf("get docker info: %w", err) } var gpuVendor common.GpuVendor @@ -79,7 +78,7 @@ func NewDockerRunner(ctx context.Context, dockerParams DockerParameters) (*Docke } gpuLock, err := NewGpuLock(gpus) if err != nil { - return nil, tracerr.Wrap(err) + return nil, fmt.Errorf("create GPU lock: %w", err) } runner := &DockerRunner{ @@ -93,7 +92,7 @@ func NewDockerRunner(ctx context.Context, dockerParams DockerParameters) (*Docke } if err := runner.restoreStateFromContainers(ctx); err != nil { - return nil, tracerr.Errorf("failed to restore state from containers: %w", err) + return nil, fmt.Errorf("failed to restore state from containers: %w", err) } return runner, nil @@ -245,7 +244,7 @@ func (d *DockerRunner) TaskInfo(taskID string) TaskInfo { func (d *DockerRunner) Submit(ctx context.Context, cfg TaskConfig) error { task := NewTaskFromConfig(cfg) if ok := d.tasks.Add(task); !ok { - return tracerr.Errorf("%w: task %s is already submitted", ErrRequest, task.ID) + return fmt.Errorf("%w: task %s is already submitted", ErrRequest, task.ID) } log.Debug(ctx, "new task submitted", "task", task.ID) return nil @@ -273,7 +272,7 @@ func (d *DockerRunner) Run(ctx context.Context, taskID string) error { task.SetStatusPreparing() if err := d.tasks.Update(task); err != nil { - return tracerr.Errorf("%w: failed to update task %s: %w", ErrInternal, task.ID, err) + return fmt.Errorf("%w: failed to update task %s: %w", ErrInternal, task.ID, err) } cfg := task.config @@ -281,7 +280,7 @@ func (d *DockerRunner) Run(ctx context.Context, taskID string) error { runnerDir, err := d.dockerParams.MakeRunnerDir(task.containerName) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("make runner dir: %w", err) } task.runnerDir = runnerDir log.Debug(ctx, "runner dir", "task", task.ID, "path", runnerDir) @@ -291,7 +290,7 @@ func (d *DockerRunner) Run(ctx context.Context, taskID string) error { if err != nil { log.Error(ctx, err.Error()) task.SetStatusTerminated(string(types.TerminationReasonExecutorError), err.Error()) - return tracerr.Wrap(err) + return fmt.Errorf("acquire GPU: %w", err) } task.gpuIDs = gpuIDs log.Debug(ctx, "acquired GPU(s)", "task", task.ID, "gpus", gpuIDs) @@ -310,7 +309,7 @@ func (d *DockerRunner) Run(ctx context.Context, taskID string) error { errMessage := fmt.Sprintf("ak.AppendPublicKeys error: %s", err.Error()) log.Error(ctx, errMessage) task.SetStatusTerminated(string(types.TerminationReasonExecutorError), errMessage) - return tracerr.Wrap(err) + return fmt.Errorf("append public keys: %w", err) } defer func(cfg TaskConfig) { err := ak.RemovePublicKeys(cfg.HostSshKeys) @@ -330,14 +329,14 @@ func (d *DockerRunner) Run(ctx context.Context, taskID string) error { errMessage := fmt.Sprintf("prepareVolumes error: %s", err.Error()) log.Error(ctx, errMessage) task.SetStatusTerminated(string(types.TerminationReasonExecutorError), errMessage) - return tracerr.Wrap(err) + return fmt.Errorf("prepare volumes: %w", err) } err = prepareInstanceMountPoints(cfg) if err != nil { errMessage := fmt.Sprintf("prepareInstanceMountPoints error: %s", err.Error()) log.Error(ctx, errMessage) task.SetStatusTerminated(string(types.TerminationReasonExecutorError), errMessage) - return tracerr.Wrap(err) + return fmt.Errorf("prepare instance mount points: %w", err) } log.Debug(ctx, "Pulling image") @@ -345,7 +344,7 @@ func (d *DockerRunner) Run(ctx context.Context, taskID string) error { defer cancelPull() task.SetStatusPulling(cancelPull) if err := d.tasks.Update(task); err != nil { - return tracerr.Errorf("%w: failed to update task %s: %w", ErrInternal, task.ID, err) + return fmt.Errorf("%w: failed to update task %s: %w", ErrInternal, task.ID, err) } // Although it's called "runner dir", we also use it for shim task-related data. // Maybe we should rename it to "task dir" (including the `/root/.dstack/runners` dir on the host). @@ -354,31 +353,31 @@ func (d *DockerRunner) Run(ctx context.Context, taskID string) error { errMessage := fmt.Sprintf("pullImage error: %s", err.Error()) log.Error(ctx, errMessage) task.SetStatusTerminated(string(types.TerminationReasonCreatingContainerError), errMessage) - return tracerr.Wrap(err) + return fmt.Errorf("pull image: %w", err) } log.Debug(ctx, "Creating container", "task", task.ID, "name", task.containerName) task.SetStatusCreating() if err := d.tasks.Update(task); err != nil { - return tracerr.Errorf("%w: failed to update task %s: %w", ErrInternal, task.ID, err) + return fmt.Errorf("%w: failed to update task %s: %w", ErrInternal, task.ID, err) } if err := d.createContainer(ctx, &task); err != nil { errMessage := fmt.Sprintf("createContainer error: %s", err.Error()) log.Error(ctx, errMessage) task.SetStatusTerminated(string(types.TerminationReasonCreatingContainerError), errMessage) - return tracerr.Wrap(err) + return fmt.Errorf("create container: %w", err) } log.Debug(ctx, "Running container", "task", task.ID, "name", task.containerName) task.SetStatusRunning() if err := d.tasks.Update(task); err != nil { - return tracerr.Errorf("%w: failed to update task %s: %w", ErrInternal, task.ID, err) + return fmt.Errorf("%w: failed to update task %s: %w", ErrInternal, task.ID, err) } err = d.startContainer(ctx, &task) if err == nil { // startContainer sets `ports` field, committing update if err := d.tasks.Update(task); err != nil { - return tracerr.Errorf("%w: failed to update task %s: %w", ErrInternal, task.ID, err) + return fmt.Errorf("%w: failed to update task %s: %w", ErrInternal, task.ID, err) } err = d.waitContainer(ctx, &task) } @@ -392,7 +391,7 @@ func (d *DockerRunner) Run(ctx context.Context, taskID string) error { errMessage = "" } task.SetStatusTerminated(string(types.TerminationReasonContainerExitedWithError), errMessage) - return tracerr.Wrap(err) + return fmt.Errorf("wait container: %w", err) } log.Debug(ctx, "Container finished successfully", "task", task.ID, "name", task.containerName) @@ -521,7 +520,7 @@ func prepareVolumes(ctx context.Context, taskConfig TaskConfig) error { for _, volume := range taskConfig.Volumes { err := formatAndMountVolume(ctx, volume) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("format and mount volume: %w", err) } } return nil @@ -557,15 +556,15 @@ func unmountVolumes(ctx context.Context, taskConfig TaskConfig) error { func formatAndMountVolume(ctx context.Context, volume VolumeInfo) error { backend, err := getBackend(volume.Backend) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("get backend: %w", err) } deviceName, err := backend.GetRealDeviceName(volume.VolumeId, volume.DeviceName) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("get real device name: %w", err) } fsCreated, err := initFileSystem(ctx, deviceName, !volume.InitFs) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("init file system: %w", err) } // Make FS root directory world-writable (0777) to give any job user // a permission to create new files @@ -582,7 +581,7 @@ func formatAndMountVolume(ctx context.Context, volume VolumeInfo) error { } err = mountDisk(ctx, deviceName, getVolumeMountPoint(volume.Name), fsRootPerms) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("mount disk: %w", err) } return nil } @@ -602,13 +601,13 @@ func prepareInstanceMountPoints(taskConfig TaskConfig) error { if _, err := os.Stat(mountPoint.InstancePath); errors.Is(err, os.ErrNotExist) { // All missing parent dirs are created with 0755 permissions if err = os.MkdirAll(mountPoint.InstancePath, 0o755); err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("create instance mount directory: %w", err) } if err = os.Chmod(mountPoint.InstancePath, 0o777); err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("chmod instance mount directory: %w", err) } } else if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("stat instance mount directory: %w", err) } } return nil @@ -678,7 +677,7 @@ func pullImage(ctx context.Context, client docker.APIClient, taskConfig TaskConf Filters: filters.NewArgs(filters.Arg("reference", taskConfig.ImageName)), }) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("list images: %w", err) } // TODO: force pull latset @@ -698,13 +697,13 @@ func pullImage(ctx context.Context, client docker.APIClient, taskConfig TaskConf startTime := time.Now() reader, err := client.ImagePull(ctx, taskConfig.ImageName, opts) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("pull image: %w", err) } defer reader.Close() logFile, err := os.OpenFile(logPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("open pull log file: %w", err) } defer logFile.Close() @@ -775,13 +774,13 @@ func pullImage(ctx context.Context, client docker.APIClient, taskConfig TaskConf speed := bytesize.New(float64(currentBytes) / duration.Seconds()) if err := ctx.Err(); err != nil { - return tracerr.Errorf("image pull interrupted: downloaded %d bytes out of %d (%s/s): %w", currentBytes, totalBytes, speed, err) + return fmt.Errorf("image pull interrupted: downloaded %d bytes out of %d (%s/s): %w", currentBytes, totalBytes, speed, err) } if pullCompleted { log.Debug(ctx, "image successfully pulled", "bytes", currentBytes, "bps", speed) } else { - return tracerr.Errorf( + return fmt.Errorf( "failed pulling %s: downloaded %d/%d bytes (%s/s), errors: %q", taskConfig.ImageName, currentBytes, totalBytes, speed, pullErrors, ) @@ -793,16 +792,16 @@ func pullImage(ctx context.Context, client docker.APIClient, taskConfig TaskConf func (d *DockerRunner) createContainer(ctx context.Context, task *Task) error { mounts, err := d.dockerParams.DockerMounts(task.runnerDir) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("get docker mounts: %w", err) } volumeMounts, err := getVolumeMounts(task.config.VolumeMounts) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("get volume mounts: %w", err) } mounts = append(mounts, volumeMounts...) instanceMounts, err := getInstanceMounts(task.config.InstanceMounts) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("get instance mounts: %w", err) } mounts = append(mounts, instanceMounts...) @@ -862,7 +861,7 @@ func (d *DockerRunner) createContainer(ctx context.Context, task *Task) error { resp, err := d.client.ContainerCreate(ctx, containerConfig, hostConfig, nil, nil, task.containerName) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("create container: %w", err) } task.containerID = resp.ID return nil @@ -870,7 +869,7 @@ func (d *DockerRunner) createContainer(ctx context.Context, task *Task) error { func (d *DockerRunner) startContainer(ctx context.Context, task *Task) error { if err := d.client.ContainerStart(ctx, task.containerID, container.StartOptions{}); err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("start container: %w", err) } if getNetworkMode(task.config.NetworkMode).IsHost() { task.ports = []PortMapping{} @@ -878,7 +877,7 @@ func (d *DockerRunner) startContainer(ctx context.Context, task *Task) error { } container_, err := d.client.ContainerInspect(ctx, task.containerID) if err != nil { - return tracerr.Wrap(err) + return fmt.Errorf("inspect container: %w", err) } // FIXME: container_.NetworkSettings.Ports values (bindings) are not immediately available // on macOS, so ports can be empty with local backend. @@ -897,7 +896,7 @@ func (d *DockerRunner) waitContainer(ctx context.Context, task *Task) error { } } case err := <-errorCh: - return tracerr.Wrap(err) + return fmt.Errorf("wait for container: %w", err) } return nil } @@ -1244,7 +1243,7 @@ func (c *CLIArgs) DockerPorts() []int { func (c *CLIArgs) MakeRunnerDir(name string) (string, error) { runnerTemp := filepath.Join(c.Shim.HomeDir, "runners", name) if err := os.MkdirAll(runnerTemp, 0o755); err != nil { - return "", tracerr.Wrap(err) + return "", fmt.Errorf("create runner directory: %w", err) } return runnerTemp, nil } From bc5e0fa797142d55c77f85d02719c9938e5dd3cc Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 27 Oct 2025 16:54:19 +0500 Subject: [PATCH 10/14] Drop tracerr from go.mod --- runner/go.mod | 1 - runner/go.sum | 2 -- 2 files changed, 3 deletions(-) diff --git a/runner/go.mod b/runner/go.mod index 8af11d34dc..b72312768e 100644 --- a/runner/go.mod +++ b/runner/go.mod @@ -21,7 +21,6 @@ require ( github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.10.0 github.com/urfave/cli/v2 v2.27.1 - github.com/ztrue/tracerr v0.4.0 golang.org/x/crypto v0.22.0 golang.org/x/sys v0.26.0 ) diff --git a/runner/go.sum b/runner/go.sum index adb3d96a07..f9e6c0e912 100644 --- a/runner/go.sum +++ b/runner/go.sum @@ -196,8 +196,6 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= -github.com/ztrue/tracerr v0.4.0 h1:vT5PFxwIGs7rCg9ZgJ/y0NmOpJkPCPFK8x0vVIYzd04= -github.com/ztrue/tracerr v0.4.0/go.mod h1:PaFfYlas0DfmXNpo7Eay4MFhZUONqvXM+T2HyGPpngk= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.50.0 h1:cEPbyTSEHlQR89XVlyo78gqluF8Y3oMeBkXGWzQsfXY= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.50.0/go.mod h1:DKdbWcT4GH1D0Y3Sqt/PFXt2naRKDWtU+eE6oLdFNA8= go.opentelemetry.io/otel v1.25.0 h1:gldB5FfhRl7OJQbUHt/8s0a7cE8fbsPAtdpRaApKy4k= From 291ccb5cff9f5f422cc6ec9390002a7c1417b894 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 27 Oct 2025 17:13:43 +0500 Subject: [PATCH 11/14] Fix wrapping nil errors --- runner/internal/executor/executor.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/runner/internal/executor/executor.go b/runner/internal/executor/executor.go index 872cf16213..af310ff1d9 100644 --- a/runner/internal/executor/executor.go +++ b/runner/internal/executor/executor.go @@ -422,7 +422,10 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error cmd := exec.CommandContext(ctx, ex.jobSpec.Commands[0], ex.jobSpec.Commands[1:]...) cmd.Cancel = func() error { // returns error on Windows - return fmt.Errorf("send interrupt signal: %w", cmd.Process.Signal(os.Interrupt)) + if err = cmd.Process.Signal(os.Interrupt); err != nil { + return fmt.Errorf("send interrupt signal: %w", err) + } + return nil } cmd.WaitDelay = ex.killDelay // kills the process if it doesn't exit in time @@ -549,7 +552,10 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error if err != nil && !isPtyError(err) { return fmt.Errorf("copy command output: %w", err) } - return fmt.Errorf("wait for command: %w", cmd.Wait()) + if err = cmd.Wait(); err != nil { + return fmt.Errorf("wait for command: %w", err) + } + return nil } func (ex *RunExecutor) setupCredentials(ctx context.Context) (func(), error) { From 72049dfe57bbc9e65aefb04d20a6f795225969ed Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Tue, 28 Oct 2025 11:02:09 +0500 Subject: [PATCH 12/14] Wrap errors --- runner/cmd/shim/main.go | 4 ++-- runner/internal/repo/diff.go | 15 ++++++--------- runner/internal/shim/dcgm/exporter.go | 4 ++-- runner/internal/shim/host_info.go | 2 +- 4 files changed, 11 insertions(+), 14 deletions(-) diff --git a/runner/cmd/shim/main.go b/runner/cmd/shim/main.go index b88a02470e..63d459a8cd 100644 --- a/runner/cmd/shim/main.go +++ b/runner/cmd/shim/main.go @@ -252,9 +252,9 @@ func start(ctx context.Context, args shim.CLIArgs, serviceMode bool) (err error) if serviceMode { if err := shim.WriteHostInfo(shimHomeDir, dockerRunner.Resources(ctx)); err != nil { if errors.Is(err, os.ErrExist) { - log.Error(ctx, "cannot write host info: file already exists") + log.Error(ctx, "write host info: file already exists") } else { - return err + return fmt.Errorf("write host info: %w", err) } } } diff --git a/runner/internal/repo/diff.go b/runner/internal/repo/diff.go index 2868132672..ebb4580cbd 100644 --- a/runner/internal/repo/diff.go +++ b/runner/internal/repo/diff.go @@ -19,7 +19,7 @@ func ApplyDiff(ctx context.Context, dir, patch string) error { log.Info(ctx, "apply diff start", "dir", dir) files, _, err := gitdiff.Parse(strings.NewReader(patch + "\n")) if err != nil { - return err + return fmt.Errorf("parse git diff: %w", err) } output := &bytes.Buffer{} @@ -34,8 +34,7 @@ func ApplyDiff(ctx context.Context, dir, patch string) error { oldFile, err = os.Open(path.Join(dir, fileInfo.OldName)) input = oldFile if err != nil { - log.Error(ctx, "apply diff can not open file", "filename", fileInfo.OldName, "err", err) - return err + return fmt.Errorf("open file %s: %w", fileInfo.OldName, err) } } err = gitdiff.Apply(output, input, fileInfo) @@ -47,8 +46,7 @@ func ApplyDiff(ctx context.Context, dir, patch string) error { aes = fmt.Sprintf("ApplyError{Fragment: %d, FragmentLine: %d, Line: %d}", ae.Fragment, ae.FragmentLine, ae.Line) } - log.Error(ctx, "diff applier error", "filename", fileInfo.OldName, "err", err, "ae", aes) - return err + return fmt.Errorf("diff applier error for file %s (%s): %w", fileInfo.OldName, aes, ae) } if !fileInfo.IsDelete { @@ -64,20 +62,19 @@ func ApplyDiff(ctx context.Context, dir, patch string) error { mode := fileModeHeuristic(ctx, dir, fileInfo) err = os.WriteFile(path.Join(dir, fileInfo.NewName), output.Bytes(), mode) if err != nil { - log.Error(ctx, "diff apply write file", "filename", fileInfo.NewName, "err", err) - return err + return fmt.Errorf("write file %s: %w", fileInfo.NewName, err) } // WriteFile does not change perm for existing files err = os.Chmod(path.Join(dir, fileInfo.NewName), mode) if err != nil { - log.Warning(ctx, "diff apply can not chmod", "filename", fileInfo.NewName, "err", err) + log.Warning(ctx, "diff apply cannot chmod", "filename", fileInfo.NewName, "err", err) } } if fileInfo.IsDelete || fileInfo.IsRename { err = os.Remove(path.Join(dir, fileInfo.OldName)) if err != nil { - log.Warning(ctx, "diff apply can not delete", "filename", fileInfo.OldName, "err", err) + log.Warning(ctx, "diff apply cannot delete", "filename", fileInfo.OldName, "err", err) } } } diff --git a/runner/internal/shim/dcgm/exporter.go b/runner/internal/shim/dcgm/exporter.go index 6d6e8484e3..de4ac939ee 100644 --- a/runner/internal/shim/dcgm/exporter.go +++ b/runner/internal/shim/dcgm/exporter.go @@ -107,7 +107,7 @@ func (c *DCGMExporter) Start(ctx context.Context) error { configFile, err := os.CreateTemp("", "counters-*.csv") if err != nil { - return err + return fmt.Errorf("create config file: %w", err) } defer configFile.Close() c.configPath = configFile.Name() @@ -115,7 +115,7 @@ func (c *DCGMExporter) Start(ctx context.Context) error { for _, counter := range counters { err := configWriter.Write([]string{counter.Name, counter.Type, counter.Help}) if err != nil { - return err + return fmt.Errorf("write config file: %w", err) } } configWriter.Flush() diff --git a/runner/internal/shim/host_info.go b/runner/internal/shim/host_info.go index ea717e112c..f7df215082 100644 --- a/runner/internal/shim/host_info.go +++ b/runner/internal/shim/host_info.go @@ -24,7 +24,7 @@ type hostInfo struct { func WriteHostInfo(dir string, resources Resources) error { path := filepath.Join(dir, "host_info.json") // if host_info.json already exists, do nothing and return os.ErrExist - if _, err := os.Stat(path); !errors.Is(err, os.ErrNotExist) { + if _, err := os.Stat(path); errors.Is(err, os.ErrExist) { return err } From 6ab738c1a9f82e30ee67918cf57e9cecd7de7581 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Tue, 28 Oct 2025 11:12:47 +0500 Subject: [PATCH 13/14] Propagate check dstack-runner exists errors --- runner/internal/shim/runner.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/runner/internal/shim/runner.go b/runner/internal/shim/runner.go index 0191e33edf..4ef8f5db6e 100644 --- a/runner/internal/shim/runner.go +++ b/runner/internal/shim/runner.go @@ -45,6 +45,8 @@ func downloadRunner(ctx context.Context, url string, path string, force bool) er log.Info(ctx, "dstack-runner binary exists, skipping download", "path", path) return nil } + } else if !os.IsNotExist(err) { + return fmt.Errorf("check dstack-runner exists: %w", err) } tempFile, err := os.CreateTemp(filepath.Dir(path), "dstack-runner") if err != nil { From 11857ca854b5f5ae4fcc92d8f12528093c3d6402 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Tue, 28 Oct 2025 11:24:15 +0500 Subject: [PATCH 14/14] Wrap errors in executor.go --- runner/internal/executor/executor.go | 72 ++++++++++++++-------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/runner/internal/executor/executor.go b/runner/internal/executor/executor.go index af310ff1d9..d893754c60 100644 --- a/runner/internal/executor/executor.go +++ b/runner/internal/executor/executor.go @@ -106,7 +106,7 @@ func NewRunExecutor(tempDir string, homeDir string, sshPort int) (*RunExecutor, if runtime.GOOS == "linux" { proc, err := procfs.NewDefaultFS() if err != nil { - return nil, fmt.Errorf("failed to initialize procfs: %w", err) + return nil, fmt.Errorf("initialize procfs: %w", err) } connectionTracker = connections.NewConnectionTracker(connections.ConnectionTrackerConfig{ Port: uint64(sshPort), @@ -516,7 +516,7 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error err = writeMpiHostfile(ctx, ex.clusterInfo.JobIPs, gpus_per_node_num, mpiHostfilePath) if err != nil { - return err + return fmt.Errorf("write MPI hostfile: %w", err) } cmd.Env = envMap.Render() @@ -743,7 +743,7 @@ func parseStringId(stringId string) (uint32, error) { return 0, err } if id < 0 { - return 0, fmt.Errorf("negative value: %d", id) + return 0, fmt.Errorf("negative id value: %d", id) } return uint32(id), nil } @@ -755,7 +755,7 @@ func parseStringId(stringId string) (uint32, error) { func startCommand(cmd *exec.Cmd) (*os.File, error) { ptm, pts, err := pty.Open() if err != nil { - return nil, err + return nil, fmt.Errorf("open pty: %w", err) } defer func() { _ = pts.Close() }() @@ -780,13 +780,13 @@ func startCommand(cmd *exec.Cmd) (*os.File, error) { uid := cmd.SysProcAttr.Credential.Uid if err := os.Chown(pts.Name(), int(uid), -1); err != nil { _ = ptm.Close() - return nil, err + return nil, fmt.Errorf("chown pty slave: %w", err) } } if err := cmd.Start(); err != nil { _ = ptm.Close() - return nil, err + return nil, fmt.Errorf("start command: %w", err) } return ptm, nil } @@ -830,28 +830,28 @@ func prepareSSHDir(uid int, gid int, homeDir string) (string, error) { return "", fmt.Errorf("not a directory: %s", sshDir) } if err = os.Chmod(sshDir, 0o700); err != nil { - return "", err + return "", fmt.Errorf("chmod ssh dir: %w", err) } } else if errors.Is(err, os.ErrNotExist) { if err = os.MkdirAll(sshDir, 0o700); err != nil { - return "", err + return "", fmt.Errorf("create ssh dir: %w", err) } } else { return "", err } if err = os.Chown(sshDir, uid, gid); err != nil { - return "", err + return "", fmt.Errorf("chown ssh dir: %w", err) } return sshDir, nil } func writeMpiHostfile(ctx context.Context, ips []string, gpus_per_node int, path string) error { if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { - return err + return fmt.Errorf("create MPI hostfile directory: %w", err) } file, err := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644) if err != nil { - return err + return fmt.Errorf("open MPI hostfile: %w", err) } defer file.Close() nonEmptyIps := []string{} @@ -864,7 +864,7 @@ func writeMpiHostfile(ctx context.Context, ips []string, gpus_per_node int, path for _, ip := range nonEmptyIps { line := fmt.Sprintf("%s slots=%d\n", ip, gpus_per_node) if _, err = file.WriteString(line); err != nil { - return err + return fmt.Errorf("write MPI hostfile line: %w", err) } } } else { @@ -875,11 +875,11 @@ func writeMpiHostfile(ctx context.Context, ips []string, gpus_per_node int, path func writeDstackProfile(env map[string]string, pth string) error { if err := os.MkdirAll(path.Dir(pth), 0o755); err != nil { - return err + return fmt.Errorf("create dstack profile directory: %w", err) } file, err := os.OpenFile(pth, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644) if err != nil { - return err + return fmt.Errorf("open dstack profile: %w", err) } defer file.Close() for key, value := range env { @@ -889,14 +889,14 @@ func writeDstackProfile(env map[string]string, pth string) error { } line := fmt.Sprintf("export %s='%s'\n", key, strings.ReplaceAll(value, `'`, `'"'"'`)) if _, err = file.WriteString(line); err != nil { - return err + return fmt.Errorf("write dstack profile: %w", err) } } if _, err = file.WriteString("cd \"$DSTACK_WORKING_DIR\"\n"); err != nil { - return err + return fmt.Errorf("write dstack profile: %w", err) } if err = os.Chmod(pth, 0o644); err != nil { - return err + return fmt.Errorf("chmod dstack profile: %w", err) } return nil } @@ -904,14 +904,14 @@ func writeDstackProfile(env map[string]string, pth string) error { func includeDstackProfile(profilePath string, dstackProfilePath string) error { file, err := os.OpenFile(profilePath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644) if err != nil { - return err + return fmt.Errorf("open profile file: %w", err) } defer file.Close() if _, err = file.WriteString(fmt.Sprintf("\n. '%s'\n", dstackProfilePath)); err != nil { - return err + return fmt.Errorf("write profile include: %w", err) } if err = os.Chmod(profilePath, 0o644); err != nil { - return err + return fmt.Errorf("chmod profile file: %w", err) } return nil } @@ -920,37 +920,37 @@ func configureSSH(private string, public string, ips []string, port int, uid int privatePath := filepath.Join(sshDir, "dstack_job") privateFile, err := os.OpenFile(privatePath, os.O_TRUNC|os.O_WRONLY|os.O_CREATE, 0o600) if err != nil { - return err + return fmt.Errorf("open private key file: %w", err) } defer privateFile.Close() if err := os.Chown(privatePath, uid, gid); err != nil { - return err + return fmt.Errorf("chown private key: %w", err) } if _, err := privateFile.WriteString(private); err != nil { - return err + return fmt.Errorf("write private key: %w", err) } akPath := filepath.Join(sshDir, "authorized_keys") akFile, err := os.OpenFile(akPath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o600) if err != nil { - return err + return fmt.Errorf("open authorized_keys: %w", err) } defer akFile.Close() if err := os.Chown(akPath, uid, gid); err != nil { - return err + return fmt.Errorf("chown authorized_keys: %w", err) } if _, err := akFile.WriteString(public); err != nil { - return err + return fmt.Errorf("write public key: %w", err) } configPath := filepath.Join(sshDir, "config") configFile, err := os.OpenFile(configPath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o600) if err != nil { - return err + return fmt.Errorf("open SSH config: %w", err) } defer configFile.Close() if err := os.Chown(configPath, uid, gid); err != nil { - return err + return fmt.Errorf("chown SSH config: %w", err) } var configBuffer bytes.Buffer for _, ip := range ips { @@ -961,7 +961,7 @@ func configureSSH(private string, public string, ips []string, port int, uid int configBuffer.WriteString(fmt.Sprintf(" IdentityFile %s\n", privatePath)) } if _, err := configFile.Write(configBuffer.Bytes()); err != nil { - return err + return fmt.Errorf("write SSH config: %w", err) } return nil } @@ -973,7 +973,7 @@ func configureSSH(private string, public string, ips []string, port int, uid int func copyAuthorizedKeys(srcPath string, uid int, gid int, dstPath string) error { srcFile, err := os.Open(srcPath) if err != nil { - return err + return fmt.Errorf("open source authorized_keys: %w", err) } defer srcFile.Close() @@ -985,29 +985,29 @@ func copyAuthorizedKeys(srcPath string, uid int, gid int, dstPath string) error return fmt.Errorf("is a directory: %s", dstPath) } if err = os.Chmod(dstPath, 0o600); err != nil { - return err + return fmt.Errorf("chmod destination authorized_keys: %w", err) } } else if !errors.Is(err, os.ErrNotExist) { - return err + return fmt.Errorf("stat destination authorized_keys: %w", err) } dstFile, err := os.OpenFile(dstPath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o600) if err != nil { - return err + return fmt.Errorf("open destination authorized_keys: %w", err) } defer dstFile.Close() if dstExists { // visually separate our keys from existing ones if _, err := dstFile.WriteString("\n\n"); err != nil { - return err + return fmt.Errorf("write separator to authorized_keys: %w", err) } } if _, err := io.Copy(dstFile, srcFile); err != nil { - return err + return fmt.Errorf("copy authorized_keys: %w", err) } if err = os.Chown(dstPath, uid, gid); err != nil { - return err + return fmt.Errorf("chown destination authorized_keys: %w", err) } return nil