From 955cf07271de6095f8ee838a2ce8ab426cdec65c Mon Sep 17 00:00:00 2001 From: David <44320468+gusario@users.noreply.github.com> Date: Tue, 3 Mar 2026 18:04:21 +0300 Subject: [PATCH] Fix for load_dataset function to restore ability to use custom loading of dataset With update 4.0.0 function load_dataset function creates base DataLoader and don't respect loader field of DatasetMeta. In 3.x.x version it was honored via load_function, so this small change restore ability to use custom load of dataset --- swift/dataset/loader.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/swift/dataset/loader.py b/swift/dataset/loader.py index ecd98a4dd2..ee803b16b2 100644 --- a/swift/dataset/loader.py +++ b/swift/dataset/loader.py @@ -297,16 +297,6 @@ def load_dataset( num_proc = None train_datasets = [] val_datasets = [] - loader = DatasetLoader( - num_proc=num_proc, - load_from_cache_file=load_from_cache_file, - streaming=streaming, - hub_token=hub_token, - strict=strict, - download_mode=download_mode, - columns=columns, # columns_mapping - remove_unused_columns=remove_unused_columns, - ) use_hf_default = use_hf if use_hf_default is None: @@ -324,6 +314,16 @@ def load_dataset( dataset_syntax.dataset = dataset_meta.hf_dataset_id if use_hf else dataset_meta.ms_dataset_id else: dataset_meta = dataset_syntax.get_dataset_meta(use_hf) + loader = dataset_meta.loader( + num_proc=num_proc, + load_from_cache_file=load_from_cache_file, + streaming=streaming, + hub_token=hub_token, + strict=strict, + download_mode=download_mode, + columns=columns, # columns_mapping + remove_unused_columns=remove_unused_columns, + ) train_dataset = loader.load(dataset_syntax, dataset_meta, use_hf=use_hf) train_dataset, val_dataset = loader.post_process( train_dataset,