diff --git a/dlio_benchmark/data_generator/parquet_generator.py b/dlio_benchmark/data_generator/parquet_generator.py
index 88867426..96358475 100755
--- a/dlio_benchmark/data_generator/parquet_generator.py
+++ b/dlio_benchmark/data_generator/parquet_generator.py
@@ -20,7 +20,7 @@
 import pyarrow as pa
 import pyarrow.parquet as pq
 
-from dlio_benchmark.common.enumerations import Compression
+from dlio_benchmark.common.enumerations import Compression, StorageType
 from dlio_benchmark.data_generator.data_generator import DataGenerator
 from dlio_benchmark.utils.utility import progress, gen_random_tensor, DLIOMPI
 import dgen_py as _dgen_py
@@ -314,7 +314,10 @@ def generate(self):
             # When enabled, hand off entirely to s3dlio.generate_and_write_parquet_schema_streaming().
             # Row groups are pipelined: generation and multipart upload run
             # concurrently — no full-file buffer, peak RAM ~2× one row group.
-            if self.use_s3dlio_gen and self.parquet_columns:
+            # Restricted to object storage (S3/AISTORE): s3dlio requires an
+            # s3:// URI and raises RuntimeError for local paths (issue #385).
+            _s3_storage = (StorageType.S3, StorageType.AISTORE)
+            if self.use_s3dlio_gen and self.parquet_columns and self._args.storage_type in _s3_storage:
                 import s3dlio as _s3dlio
                 _cols = [(str(c.get('name', 'data')), int(c.get('size', 1)))
                          for c in self.parquet_columns]
diff --git a/dlio_benchmark/data_loader/torch_data_loader.py b/dlio_benchmark/data_loader/torch_data_loader.py
index 7fe2e254..3ca9ccf2 100644
--- a/dlio_benchmark/data_loader/torch_data_loader.py
+++ b/dlio_benchmark/data_loader/torch_data_loader.py
@@ -465,16 +465,23 @@ def read(self):
                 StorageType.LOCAL_FS,
             )
         )
+        _s3_types = (StorageType.S3, StorageType.AISTORE)
+        # TorchIterableDatasetSimple uses DataLoader(num_workers>0) which forks
+        # worker processes via os.fork(). On LOCAL_FS, this fork-after-module-import
+        # pattern causes a ThreadPoolExecutor deadlock (the executor's background
+        # thread is not fork-safe). Restrict the iterable path to object storage
+        # (S3/AISTORE) only where the prefetch benefit is most significant and
+        # the fork issue does not apply. LOCAL_FS falls through to map-style TorchDataset.
         use_simple_iterable_dataset = (
             self.format_type in _simple_iterable_formats
             and not use_rg_iterable_dataset
+            and self._args.storage_type in _s3_types
         )
 
         # Determine concrete reader class name and access pattern for logging.
         _opts = getattr(self._args, "storage_options", {}) or {}
         _lib  = _opts.get("storage_library", "none")
         _st   = self._args.storage_type
-        _s3_types = (StorageType.S3, StorageType.AISTORE)
         _s3_libs  = ("s3dlio", "s3torchconnector", "minio")
         _nw = self._args.read_threads
 
diff --git a/uv.lock b/uv.lock
index 98fc2e45..1f35c215 100644
--- a/uv.lock
+++ b/uv.lock
@@ -226,7 +226,7 @@ wheels = [
 
 [[package]]
 name = "dlio-benchmark"
-version = "3.0.1"
+version = "3.0.2"
 source = { editable = "." }
 dependencies = [
     { name = "dgen-py", marker = "sys_platform == 'linux'" },