[rank0]: ValueError: Hidden size 4096 must be divisible by num_heads 48
[rank0]:[W122 17:04:15.573881898 ProcessGroupNCCL.cpp:1524] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())
[2026-01-22 17:04:16,571] [INFO] [launch.py:335:sigkill_handler] Killing subprocess 460250
[2026-01-22 17:04:16,572] [INFO] [launch.py:335:sigkill_handler] Killing subprocess 460251
[2026-01-22 17:04:16,627] [INFO] [launch.py:335:sigkill_handler] Killing subprocess 460252
[2026-01-22 17:04:16,660] [INFO] [launch.py:335:sigkill_handler] Killing subprocess 460253
[2026-01-22 17:04:16,681] [ERROR] [launch.py:341:sigkill_handler] ['/home/billus/diffusionpipe/bin/python3.12', '-u', 'train.py', '--local_rank=3', '--deepspeed', '--config', '/mnt/diffusion-pipe/nvme/Billus_image_edit.toml'] exits with return code = 1
[model]
type = 'flux2'
diffusion_model = '/media/billus/AI专用/model/flux-2-klein-base-9b.safetensors'
vae = '/media/billus/AI专用/model/flux2-vae.safetensors'
text_encoders = [
{path = '/media/billus/AI专用/model/qwen_3_8b.safetensors', type = 'flux2'}
]
dtype = 'bfloat16'
diffusion_model_dtype = 'float8'
timestep_sample_method = 'logit_normal'
shift = 3
This issue has been constantly raised without a good solution
[rank0]: ValueError: Hidden size 4096 must be divisible by num_heads 48
[rank0]:[W122 17:04:15.573881898 ProcessGroupNCCL.cpp:1524] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())
[2026-01-22 17:04:16,571] [INFO] [launch.py:335:sigkill_handler] Killing subprocess 460250
[2026-01-22 17:04:16,572] [INFO] [launch.py:335:sigkill_handler] Killing subprocess 460251
[2026-01-22 17:04:16,627] [INFO] [launch.py:335:sigkill_handler] Killing subprocess 460252
[2026-01-22 17:04:16,660] [INFO] [launch.py:335:sigkill_handler] Killing subprocess 460253
[2026-01-22 17:04:16,681] [ERROR] [launch.py:341:sigkill_handler] ['/home/billus/diffusionpipe/bin/python3.12', '-u', 'train.py', '--local_rank=3', '--deepspeed', '--config', '/mnt/diffusion-pipe/nvme/Billus_image_edit.toml'] exits with return code = 1
[model]
type = 'flux2'
diffusion_model = '/media/billus/AI专用/model/flux-2-klein-base-9b.safetensors'
vae = '/media/billus/AI专用/model/flux2-vae.safetensors'
text_encoders = [
{path = '/media/billus/AI专用/model/qwen_3_8b.safetensors', type = 'flux2'}
]
dtype = 'bfloat16'
diffusion_model_dtype = 'float8'
timestep_sample_method = 'logit_normal'
shift = 3
This issue has been constantly raised without a good solution