@@ -325,56 +325,45 @@ def duration(self) -> timedelta:
325325 end_time = self .finished_at
326326 return end_time - self .submitted_at
327327
328- @root_validator
329- def _status_message (cls , values ) -> Dict :
330- try :
331- status = values ["status" ]
332- termination_reason = values ["termination_reason" ]
333- exit_code = values ["exit_status" ]
334- except KeyError :
335- return values
336- values ["status_message" ] = JobSubmission ._get_status_message (
337- status = status ,
338- termination_reason = termination_reason ,
339- exit_status = exit_code ,
340- )
341- return values
328+ def dict (self , * args , ** kwargs ) -> Dict :
329+ status_message = self ._get_status_message ()
330+ error = self ._get_error ()
331+ # super() does not work with pydantic-duality
332+ res = CoreModel .dict (self , * args , ** kwargs )
333+ res ["status_message" ] = status_message
334+ res ["error" ] = error
335+ return res
342336
343- @staticmethod
344- def _get_status_message (
345- status : JobStatus ,
346- termination_reason : Optional [JobTerminationReason ],
347- exit_status : Optional [int ],
348- ) -> str :
349- if status == JobStatus .DONE :
337+ def _get_status_message (self ) -> Optional [str ]:
338+ if self .status == JobStatus .DONE :
350339 return "exited (0)"
351- elif status == JobStatus .FAILED :
352- if termination_reason == JobTerminationReason .CONTAINER_EXITED_WITH_ERROR :
353- return f"exited ({ exit_status } )"
354- elif termination_reason == JobTerminationReason .FAILED_TO_START_DUE_TO_NO_CAPACITY :
340+ elif self .status == JobStatus .FAILED :
341+ if self .termination_reason == JobTerminationReason .CONTAINER_EXITED_WITH_ERROR :
342+ return f"exited ({ self .exit_status } )"
343+ elif (
344+ self .termination_reason == JobTerminationReason .FAILED_TO_START_DUE_TO_NO_CAPACITY
345+ ):
355346 return "no offers"
356- elif termination_reason == JobTerminationReason .INTERRUPTED_BY_NO_CAPACITY :
347+ elif self . termination_reason == JobTerminationReason .INTERRUPTED_BY_NO_CAPACITY :
357348 return "interrupted"
358349 else :
359350 return "error"
360- elif status == JobStatus .TERMINATED :
361- if termination_reason == JobTerminationReason .TERMINATED_BY_USER :
351+ elif self . status == JobStatus .TERMINATED :
352+ if self . termination_reason == JobTerminationReason .TERMINATED_BY_USER :
362353 return "stopped"
363- elif termination_reason == JobTerminationReason .ABORTED_BY_USER :
354+ elif self . termination_reason == JobTerminationReason .ABORTED_BY_USER :
364355 return "aborted"
365- return status .value
356+ return self . status .value
366357
367- @root_validator
368- def _error (cls , values ) -> Dict :
369- try :
370- termination_reason = values ["termination_reason" ]
371- except KeyError :
372- return values
373- values ["error" ] = JobSubmission ._get_error (termination_reason = termination_reason )
374- return values
358+ def _get_error (self ) -> Optional [str ]:
359+ return JobSubmission ._termination_reason_to_error (
360+ termination_reason = self .termination_reason
361+ )
375362
376363 @staticmethod
377- def _get_error (termination_reason : Optional [JobTerminationReason ]) -> Optional [str ]:
364+ def _termination_reason_to_error (
365+ termination_reason : Optional [JobTerminationReason ],
366+ ) -> Optional [str ]:
378367 error_mapping = {
379368 JobTerminationReason .INSTANCE_UNREACHABLE : "instance unreachable" ,
380369 JobTerminationReason .WAITING_INSTANCE_LIMIT_EXCEEDED : "waiting instance limit exceeded" ,
@@ -395,6 +384,12 @@ class Job(CoreModel):
395384 job_spec : JobSpec
396385 job_submissions : List [JobSubmission ]
397386
387+ def get_last_termination_reason (self ) -> Optional [JobTerminationReason ]:
388+ for submission in reversed (self .job_submissions ):
389+ if submission .termination_reason is not None :
390+ return submission .termination_reason
391+ return None
392+
398393
399394class RunSpec (CoreModel ):
400395 # TODO: run_name, working_dir are redundant here since they already passed in configuration
@@ -525,87 +520,70 @@ class Run(CoreModel):
525520 last_processed_at : datetime
526521 status : RunStatus
527522 status_message : Optional [str ] = None
528- termination_reason : Optional [RunTerminationReason ]
523+ termination_reason : Optional [RunTerminationReason ] = None
529524 run_spec : RunSpec
530525 jobs : List [Job ]
531- latest_job_submission : Optional [JobSubmission ]
526+ latest_job_submission : Optional [JobSubmission ] = None
532527 cost : float = 0
533528 service : Optional [ServiceSpec ] = None
534529 deployment_num : int = 0 # default for compatibility with pre-0.19.14 servers
535530 # TODO: make error a computed field after migrating to pydanticV2
536531 error : Optional [str ] = None
537532 deleted : Optional [bool ] = None
538533
539- @root_validator
540- def _error (cls , values ) -> Dict :
541- try :
542- termination_reason = values ["termination_reason" ]
543- except KeyError :
544- return values
545- values ["error" ] = Run ._get_error (termination_reason = termination_reason )
546- return values
534+ def dict (self , * args , ** kwargs ) -> Dict :
535+ status_message = self ._get_status_message ()
536+ error = self ._get_error ()
537+ # super() does not work with pydantic-duality
538+ res = CoreModel .dict (self , * args , ** kwargs )
539+ res ["status_message" ] = status_message
540+ res ["error" ] = error
541+ return res
542+
543+ def _get_error (self ) -> Optional [str ]:
544+ return Run ._termination_reason_to_error (termination_reason = self .termination_reason )
547545
548546 @staticmethod
549- def _get_error (termination_reason : Optional [RunTerminationReason ]) -> Optional [str ]:
547+ def _termination_reason_to_error (
548+ termination_reason : Optional [RunTerminationReason ],
549+ ) -> Optional [str ]:
550550 if termination_reason == RunTerminationReason .RETRY_LIMIT_EXCEEDED :
551551 return "retry limit exceeded"
552552 elif termination_reason == RunTerminationReason .SERVER_ERROR :
553553 return "server error"
554554 else :
555555 return None
556556
557- @root_validator
558- def _status_message (cls , values ) -> Dict :
557+ def _get_status_message (self ) -> Optional [str ]:
558+ if len (self .jobs ) == 0 :
559+ return self .status .value
560+
561+ last_job = self .jobs [0 ]
559562 # FIXME: status_message should not require all job submissions for status calculation
560563 # since it's very expensive and is not required for anything else.
561564 # May return a different status if not all job submissions requested.
562565 # TODO: Calculate status_message by looking at job models directly instead job submissions.
563- try :
564- status = values ["status" ]
565- jobs : List [Job ] = values ["jobs" ]
566- retry_on_events = (
567- jobs [0 ].job_spec .retry .on_events if jobs and jobs [0 ].job_spec .retry else []
568- )
569- job_status = (
570- jobs [0 ].job_submissions [- 1 ].status
571- if len (jobs ) == 1 and jobs [0 ].job_submissions
572- else None
573- )
574- termination_reason = Run .get_last_termination_reason (jobs [0 ]) if jobs else None
575- except KeyError :
576- return values
577- values ["status_message" ] = Run ._get_status_message (
578- status = status ,
579- job_status = job_status ,
580- retry_on_events = retry_on_events ,
581- termination_reason = termination_reason ,
582- )
583- return values
566+ last_job_termination_reason = last_job .get_last_termination_reason ()
584567
585- @staticmethod
586- def get_last_termination_reason (job : "Job" ) -> Optional [JobTerminationReason ]:
587- for submission in reversed (job .job_submissions ):
588- if submission .termination_reason is not None :
589- return submission .termination_reason
590- return None
568+ if len (self .jobs ) == 1 :
569+ # FIXME: Clarify why show "pulling" only in case of one job
570+ if (
571+ last_job .job_submissions
572+ and last_job .job_submissions [- 1 ].status == JobStatus .PULLING
573+ ):
574+ return "pulling"
591575
592- @staticmethod
593- def _get_status_message (
594- status : RunStatus ,
595- job_status : Optional [JobStatus ],
596- retry_on_events : List [RetryEvent ],
597- termination_reason : Optional [JobTerminationReason ],
598- ) -> str :
599- if job_status == JobStatus .PULLING :
600- return "pulling"
576+ retry_on_events = last_job .job_spec .retry .on_events if last_job .job_spec .retry else []
601577 # Currently, `retrying` is shown only for `no-capacity` events
602578 if (
603- status in [RunStatus .SUBMITTED , RunStatus .PENDING ]
604- and termination_reason == JobTerminationReason .FAILED_TO_START_DUE_TO_NO_CAPACITY
579+ self .status in [RunStatus .SUBMITTED , RunStatus .PENDING ]
580+ and last_job_termination_reason
581+ == JobTerminationReason .FAILED_TO_START_DUE_TO_NO_CAPACITY
605582 and RetryEvent .NO_CAPACITY in retry_on_events
606583 ):
607584 return "retrying"
608- return status .value
585+
586+ return self .status .value
609587
610588 def is_deployment_in_progress (self ) -> bool :
611589 return any (
0 commit comments