From 3f2643c97cbf7c7b15a0d14c8d9435cf0a016d97 Mon Sep 17 00:00:00 2001 From: Harikrishnan Balagopal Date: Mon, 19 Jan 2026 20:19:06 +0530 Subject: [PATCH] fix: only trigger on_save if the output checkpoint path actually contains files Signed-off-by: Harikrishnan Balagopal --- tuning/sft_trainer.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index 7a6a11d46a..f89ec162ea 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -576,10 +576,36 @@ def save(path: str, trainer: SFTTrainer, tc_callback, log_level="WARNING", args= logger.info("Saving tuned model to path: %s", path) trainer.save_model(path) + actually_saved = False if tc_callback and args: - tc_callback.on_save( - args, trainer.state, trainer.control, path=path, is_final=True - ) + if os.path.exists(path): + try: + saved_files = os.listdir(path) + logger.info( + "sanity check, we found %d files at checkpoint path '%s'", + len(saved_files), + path, + ) + actually_saved = len(saved_files) > 0 + except Exception as e: # pylint: disable=broad-exception-caught + logger.error( + "sanity check, failed to list files in checkpoint path '%s' , error: %s", + path, + e, + ) + else: + logger.warning( + "sanity check, failed because checkpoint path '%s' doesn't exist", path + ) + if actually_saved: + tc_callback.on_save( + args, trainer.state, trainer.control, path=path, is_final=True + ) + else: + logger.warning( + "skip triggering on_save event since checkpoint path is empty: '%s'", + path, + ) def get_parser():