From f864d9e03cb639e7dcab8d55fb7fa8ea6975da5d Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 21 Jun 2026 11:06:03 +0000 Subject: [PATCH] fix(runners): preserve stopped status when Run returns error Commit 2a156b30 moved the IsFinished() early-return into the err==nil branch only. When a user stops a task, checkNewJobs can move the runner job to stopped before Executor.Run returns an error from the killed process; the error path then overwrote stopped with failed. Restore the finished-status guard before error handling and add regression tests for handleJobRunComplete. Co-authored-by: Denis Gukov --- services/runners/job_pool.go | 90 +++++++++++++++++-------------- services/runners/job_pool_test.go | 54 +++++++++++++++++++ 2 files changed, 104 insertions(+), 40 deletions(-) diff --git a/services/runners/job_pool.go b/services/runners/job_pool.go index d1d7bbea7..b5f014548 100644 --- a/services/runners/job_pool.go +++ b/services/runners/job_pool.go @@ -321,46 +321,7 @@ func (p *JobPool) Run() { }).Debug("Running job") err := running.job.Run(t.username, t.incomingVersion, t.alias) - - if err != nil { - - log.WithFields(log.Fields{ - "context": "job_running", - "task_id": t.taskID, - "task_status": t.status, - }).WithError(err).Error("launch job failed") - - running.Log("Unable to launch the application. Please contact your system administrator for assistance.") - - if running.getStatus() == task_logger.TaskStoppingStatus { - running.SetStatus(task_logger.TaskStoppedStatus) - } else { - running.SetStatus(task_logger.TaskFailStatus) - } - } else { - - log.WithFields(log.Fields{ - "context": "job_running", - "task_id": running.taskID, - "status": string(running.getStatus()), - }).Debug("Job run returned") - - if running.getStatus().IsFinished() { - return - } - - if running.getStatus() == task_logger.TaskStoppingStatus { - running.SetStatus(task_logger.TaskStoppedStatus) - } else { - running.SetStatus(task_logger.TaskSuccessStatus) - } - } - - log.WithFields(log.Fields{ - "context": "job_running", - "task_id": running.taskID, - "status": string(running.getStatus()), - }).Info("Task finished") + running.handleJobRunComplete(err) }(rj) log.WithFields(log.Fields{ @@ -526,6 +487,55 @@ func (p *JobPool) sendProgress() (ok bool) { return } +// handleJobRunComplete applies the terminal status transition after Executor.Run +// returns. Run often returns a non-nil error when the job was stopped or killed +// mid-flight; in that case the executor (or a concurrent stop/kill) may already +// have moved the status to a finished state and it must not be overwritten. +func (running *runningJob) handleJobRunComplete(err error) { + if running.getStatus().IsFinished() { + log.WithFields(log.Fields{ + "context": "job_running", + "task_id": running.taskID, + "status": string(running.getStatus()), + }).Debug("Job run returned") + return + } + + if err != nil { + log.WithFields(log.Fields{ + "context": "job_running", + "task_id": running.taskID, + "task_status": running.getStatus(), + }).WithError(err).Error("launch job failed") + + running.Log("Unable to launch the application. Please contact your system administrator for assistance.") + + if running.getStatus() == task_logger.TaskStoppingStatus { + running.SetStatus(task_logger.TaskStoppedStatus) + } else { + running.SetStatus(task_logger.TaskFailStatus) + } + } else { + log.WithFields(log.Fields{ + "context": "job_running", + "task_id": running.taskID, + "status": string(running.getStatus()), + }).Debug("Job run returned") + + if running.getStatus() == task_logger.TaskStoppingStatus { + running.SetStatus(task_logger.TaskStoppedStatus) + } else { + running.SetStatus(task_logger.TaskSuccessStatus) + } + } + + log.WithFields(log.Fields{ + "context": "job_running", + "task_id": running.taskID, + "status": string(running.getStatus()), + }).Info("Task finished") +} + // applyTerminatedJobs emergency-stops jobs the server no longer accepts // results for — the task reached a terminal status on the server (e.g. force // stopped) while the runner was offline. The job's process is killed and the diff --git a/services/runners/job_pool_test.go b/services/runners/job_pool_test.go index f6914898a..c0d70a31b 100644 --- a/services/runners/job_pool_test.go +++ b/services/runners/job_pool_test.go @@ -2,6 +2,7 @@ package runners import ( "encoding/json" + "errors" "net/http" "net/http/httptest" "sync" @@ -85,6 +86,59 @@ func TestJobPool_RunningJobsLifecycle(t *testing.T) { assert.Nil(t, p.getRunningJob(1)) } +func TestHandleJobRunComplete_PreservesFinishedStatusOnError(t *testing.T) { + // When the server already moved the job to stopped (e.g. user stop confirmed + // via checkNewJobs) and Executor.Run returns an error from the killed process, + // the stopped status must not be overwritten with failed. + rj := &runningJob{ + taskID: 1, + job: &tasks.LocalExecutor{Task: db.Task{ID: 1}}, + status: task_logger.TaskStoppedStatus, + } + rj.handleJobRunComplete(errors.New("signal: killed")) + + assert.Equal(t, task_logger.TaskStoppedStatus, rj.getStatus()) +} + +func TestHandleJobRunComplete_StoppingBecomesStoppedOnError(t *testing.T) { + lj := &tasks.LocalExecutor{Task: db.Task{ID: 2}} + rj := &runningJob{ + taskID: 2, + job: lj, + status: task_logger.TaskStoppingStatus, + } + lj.Logger = rj + rj.handleJobRunComplete(errors.New("prepare failed")) + + assert.Equal(t, task_logger.TaskStoppedStatus, rj.getStatus()) +} + +func TestHandleJobRunComplete_RunningBecomesFailedOnError(t *testing.T) { + lj := &tasks.LocalExecutor{Task: db.Task{ID: 3}} + rj := &runningJob{ + taskID: 3, + job: lj, + status: task_logger.TaskRunningStatus, + } + lj.Logger = rj + rj.handleJobRunComplete(errors.New("prepare failed")) + + assert.Equal(t, task_logger.TaskFailStatus, rj.getStatus()) +} + +func TestHandleJobRunComplete_RunningBecomesSuccess(t *testing.T) { + lj := &tasks.LocalExecutor{Task: db.Task{ID: 4}} + rj := &runningJob{ + taskID: 4, + job: lj, + status: task_logger.TaskRunningStatus, + } + lj.Logger = rj + rj.handleJobRunComplete(nil) + + assert.Equal(t, task_logger.TaskSuccessStatus, rj.getStatus()) +} + func TestJobPool_HasRunningJobs(t *testing.T) { p := NewJobPool(nil) assert.False(t, p.hasRunningJobs())