@@ -99,6 +99,16 @@ def test_intercept_paddle_loggers():
9999# -- get_worker ----------------------------------------------------------------
100100
101101
102+ def test_get_worker_logprob_unsupported ():
103+ from fastdeploy .worker .worker_process import get_worker
104+
105+ with patch (f"{ WP } .current_platform" ) as plat :
106+ for a in ("is_dcu" , "is_cuda" , "is_xpu" , "is_iluvatar" , "is_gcu" , "is_maca" , "is_intel_hpu" ):
107+ getattr (plat , a ).return_value = False
108+ with pytest .raises (NotImplementedError ):
109+ get_worker (_cfg (** {"model_config.enable_logprob" : True }), local_rank = 0 , rank = 0 )
110+
111+
102112@pytest .mark .parametrize (
103113 "platform,module_path,class_name" ,
104114 [
@@ -254,7 +264,7 @@ def test_initialize_fd_config():
254264 initialize_fd_config (args , ranks = 1 , local_rank = 0 )
255265 m ["FDConfig" ].assert_called_once ()
256266 m ["update_fd_config_for_mm" ].assert_called_once ()
257- # EP + quant path
267+ # EP + quant path (list moe_num_experts)
258268 args2 , stack2 , m2 = _fd_config_env ()
259269 with stack2 :
260270 m2 ["ParallelConfig" ].return_value .data_parallel_size = 2
@@ -265,6 +275,67 @@ def test_initialize_fd_config():
265275 m2 ["ModelConfig" ].return_value .is_quantized = True
266276 initialize_fd_config (args2 , ranks = 2 , local_rank = 0 )
267277 m2 ["FDConfig" ].assert_called_once ()
278+ # EP with int moe_num_experts + num_local_experts=None → else branch
279+ args3 , stack3 , m3 = _fd_config_env ()
280+ with stack3 :
281+ m3 ["ParallelConfig" ].return_value .expert_parallel_size = 2
282+ m3 ["ModelConfig" ].return_value .moe_num_experts = 8
283+ m3 ["ModelConfig" ].return_value .num_local_experts = None
284+ m3 ["EPLBConfig" ].return_value .redundant_experts_num = 0
285+ initialize_fd_config (args3 , ranks = 1 , local_rank = 0 )
286+ # All platforms False → ENABLE_V1_KVCACHE_SCHEDULER set to 0
287+ args4 , stack4 , m4 = _fd_config_env ()
288+ with stack4 :
289+ for a in ("is_cuda" , "is_xpu" , "is_maca" , "is_iluvatar" , "is_intel_hpu" ):
290+ getattr (m4 ["current_platform" ], a ).return_value = False
291+ initialize_fd_config (args4 , ranks = 1 , local_rank = 0 )
292+ # v1_loader_support returns False → load_choices fallback
293+ args5 , stack5 , m5 = _fd_config_env ()
294+ with stack5 :
295+ m5 ["v1_loader_support" ].return_value = False
296+ fd = m5 ["FDConfig" ].return_value
297+ fd .load_config .load_choices = "default_v1"
298+ fd .model_config .architectures = ["LlamaForCausalLM" ]
299+ initialize_fd_config (args5 , ranks = 1 , local_rank = 0 )
300+ # PaddleOCR architecture
301+ args6 , stack6 , m6 = _fd_config_env ()
302+ with stack6 :
303+ fd6 = m6 ["FDConfig" ].return_value
304+ fd6 .model_config .architectures = ["PaddleOCRForCausalLM" ]
305+ fd6 .load_config .load_choices = "default"
306+ initialize_fd_config (args6 , ranks = 1 , local_rank = 0 )
307+ # EP with num_local_experts int (not list, not None) → elif branch (L1089)
308+ args_ep , stack_ep , m_ep = _fd_config_env ()
309+ with stack_ep :
310+ m_ep ["ParallelConfig" ].return_value .expert_parallel_size = 2
311+ m_ep ["ModelConfig" ].return_value .moe_num_experts = 8
312+ m_ep ["ModelConfig" ].return_value .num_local_experts = 4
313+ m_ep ["EPLBConfig" ].return_value .redundant_experts_num = 0
314+ initialize_fd_config (args_ep , ranks = 1 , local_rank = 0 )
315+ # Quant config present but not pre-quantized → online quant log (L1138)
316+ args_q , stack_q , m_q = _fd_config_env ()
317+ with stack_q :
318+ m_q ["parse_quant_config" ].return_value = MagicMock ()
319+ m_q ["ModelConfig" ].return_value .is_quantized = False
320+ initialize_fd_config (args_q , ranks = 1 , local_rank = 0 )
321+ # Splitwise prefill with V1 scheduler → PREFILL_NODE_ONE_STEP_STOP_V1="1" (L1159)
322+ args_sp , stack_sp , m_sp = _fd_config_env ()
323+ with stack_sp , patch (f"{ WP } .envs" ) as env_sp , patch .dict ("os.environ" , {}, clear = False ):
324+ env_sp .ENABLE_V1_KVCACHE_SCHEDULER = True
325+ args_sp .splitwise_role = "prefill"
326+ initialize_fd_config (args_sp , ranks = 1 , local_rank = 0 )
327+ # Splitwise decode → PREFILL_NODE_ONE_STEP_STOP_V1="0" (L1161)
328+ args_sd , stack_sd , m_sd = _fd_config_env ()
329+ with stack_sd , patch (f"{ WP } .envs" ) as env_sd , patch .dict ("os.environ" , {}, clear = False ):
330+ env_sd .ENABLE_V1_KVCACHE_SCHEDULER = True
331+ args_sd .splitwise_role = "decode"
332+ initialize_fd_config (args_sd , ranks = 1 , local_rank = 0 )
333+ # num_hidden_layers is None → ValueError
334+ args7 , stack7 , m7 = _fd_config_env ()
335+ with stack7 :
336+ m7 ["ModelConfig" ].return_value .num_hidden_layers = None
337+ with pytest .raises (ValueError ):
338+ initialize_fd_config (args7 , ranks = 1 , local_rank = 0 )
268339
269340
270341# -- PaddleDisWorkerProc -------------------------------------------------------
@@ -372,6 +443,20 @@ def test_kv_cache(pw):
372443 p2 .worker .cal_theortical_kvcache .return_value = 1024 ** 2
373444 p2 .initialize_kv_cache ()
374445 p2 .worker .initialize_cache .assert_called_once_with (num_gpu_blocks = 1024 )
446+ # Multi-rank profile → dist.all_reduce path (L626-628)
447+ _ , gw2 = pw
448+ gw2 .return_value = MagicMock ()
449+ with patch (f"{ WP } .IPCSignal" ), patch (f"{ WP } .dist" ) as d2 :
450+ p2r = _make (pw , ranks = 2 , ** {"parallel_config.do_profile" : True })
451+ p2r .worker .determine_available_memory .return_value = 1024 ** 3
452+ p2r .worker .cal_theortical_kvcache .return_value = 1024 ** 2
453+ mock_t2 = MagicMock ()
454+ mock_t2 .item .return_value = 512
455+ d2 .all_reduce .return_value = None
456+ with patch (f"{ WP } .paddle" ) as pdl2 :
457+ pdl2 .full .return_value = mock_t2
458+ p2r .initialize_kv_cache ()
459+ d2 .all_reduce .assert_called_once ()
375460 # Zero memory → ValueError
376461 with patch (f"{ WP } .IPCSignal" ), patch (f"{ WP } .dist" ):
377462 p3 = _make (pw , ** {"parallel_config.do_profile" : True })
0 commit comments