Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,11 @@
"server_script": "gptoss_fp4_mi300x_docker.sh",
"bench_serv_script": "benchmark_serving.py",
"result_dict": {
"output_throughput_per_sec": "4200",
"mean_ttft_ms": "500",
"mean_tpot_ms": "15"
"ISL=8192,OSL=1024,TP=8,CONC=64": {
"total_throughput_per_sec": "4200",
"mean_ttft_ms": "500",
"mean_tpot_ms": "15"
}
}
}
}
Expand Down
205 changes: 192 additions & 13 deletions cvs/input/config_file/inference/vllm/mi355x_singlenode_vllm.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,57 @@
"metric_percentiles": "99",
"server_script": "gpt-oss-120b_fp4_mi355x_vllm_docker.sh",
"bench_serv_script": "benchmark_serving.py",
"vllm_env_vars": {
"VLLM_USE_AITER_UNIFIED_ATTENTION": "1",
"VLLM_ROCM_USE_AITER_MHA": "0",
"VLLM_ROCM_USE_AITER_FUSED_MOE_A16W4": "1"
},
"result_dict": {
"output_throughput_per_sec": "10600",
"mean_ttft_ms": "200",
"mean_tpot_ms": "10"
"ISL=1024,OSL=1024,TP=1,CONC=16": {
"total_throughput_per_sec": "4651",
"mean_ttft_ms": "70",
"mean_tpot_ms": "8"
},
"ISL=1024,OSL=1024,TP=1,CONC=32": {
"total_throughput_per_sec": "7043",
"mean_ttft_ms": "180",
"mean_tpot_ms": "9"
},
"ISL=1024,OSL=1024,TP=1,CONC=64": {
"total_throughput_per_sec": "10677",
"mean_ttft_ms": "76",
"mean_tpot_ms": "13"
},
"ISL=1024,OSL=8192,TP=1,CONC=16": {
"total_throughput_per_sec": "2735",
"mean_ttft_ms": "57",
"mean_tpot_ms": "7"
},
"ISL=1024,OSL=8192,TP=1,CONC=32": {
"total_throughput_per_sec": "4038",
"mean_ttft_ms": "67",
"mean_tpot_ms": "10"
},
"ISL=1024,OSL=8192,TP=1,CONC=64": {
"total_throughput_per_sec": "6140",
"mean_ttft_ms": "93",
"mean_tpot_ms": "13"
},
"ISL=8192,OSL=1024,TP=1,CONC=16": {
"total_throughput_per_sec": "16509",
"mean_ttft_ms": "335",
"mean_tpot_ms": "24"
},
"ISL=8192,OSL=1024,TP=1,CONC=32": {
"total_throughput_per_sec": "22072",
"mean_ttft_ms": "320",
"mean_tpot_ms": "19"
},
"ISL=8192,OSL=1024,TP=1,CONC=64": {
"total_throughput_per_sec": "28863",
"mean_ttft_ms": "280",
"mean_tpot_ms": "22"
}
}
},
"qwen3-235b": {
Expand Down Expand Up @@ -117,10 +164,59 @@
"metric_percentiles": "99",
"server_script": "qwen3-235b-bf16_mi355x_vllm_docker.sh",
"bench_serv_script": "benchmark_serving.py",
"vllm_env_vars": {
"VLLM_V1_USE_PREFILL_DECODE_ATTENTION": "1",
"VLLM_ROCM_USE_AITER": "1",
"VLLM_ROCM_USE_AITER_MHA": "1",
"VLLM_ROCM_QUICK_REDUCE_QUANTIZATION": "INT4",
"SAFETENSORS_FAST_GPU": "1"
},
"result_dict": {
"output_throughput_per_sec": "8000",
"mean_ttft_ms": "300",
"mean_tpot_ms": "12"
"ISL=1024,OSL=1024,TP=8,CONC=16": {
"total_throughput_per_sec": "2000",
"mean_ttft_ms": "850",
"mean_tpot_ms": "18"
},
"ISL=1024,OSL=1024,TP=8,CONC=32": {
"total_throughput_per_sec": "3435",
"mean_ttft_ms": "80",
"mean_tpot_ms": "10"
},
"ISL=1024,OSL=1024,TP=8,CONC=64": {
"total_throughput_per_sec": "5840",
"mean_ttft_ms": "260",
"mean_tpot_ms": "10"
},
"ISL=1024,OSL=8192,TP=8,CONC=16": {
"total_throughput_per_sec": "1119",
"mean_ttft_ms": "415",
"mean_tpot_ms": "25"
},
"ISL=1024,OSL=8192,TP=8,CONC=32": {
"total_throughput_per_sec": "1876",
"mean_ttft_ms": "70",
"mean_tpot_ms": "10"
},
"ISL=1024,OSL=8192,TP=8,CONC=64": {
"total_throughput_per_sec": "3139",
"mean_ttft_ms": "310",
"mean_tpot_ms": "14"
},
"ISL=8192,OSL=1024,TP=8,CONC=16": {
"total_throughput_per_sec": "7476",
"mean_ttft_ms": "300",
"mean_tpot_ms": "21"
},
"ISL=8192,OSL=1024,TP=8,CONC=32": {
"total_throughput_per_sec": "11312",
"mean_ttft_ms": "355",
"mean_tpot_ms": "27"
},
"ISL=8192,OSL=1024,TP=8,CONC=64": {
"total_throughput_per_sec": "16082",
"mean_ttft_ms": "450",
"mean_tpot_ms": "39"
}
}
},
"qwen3-80b": {
Expand Down Expand Up @@ -165,10 +261,59 @@
"metric_percentiles": "99",
"server_script": "qwen3-80b-bf16_mi355x_vllm_docker.sh",
"bench_serv_script": "benchmark_serving.py",
"vllm_env_vars": {
"AITER_ONLINE_TUNE": "0",
"VLLM_ROCM_USE_AITER": "1",
"VLLM_ROCM_USE_AITER_MOE": "1",
"VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION": "0",
"VLLM_ROCM_USE_AITER_MHA": "0"
},
"result_dict": {
"output_throughput_per_sec": "12000",
"mean_ttft_ms": "150",
"mean_tpot_ms": "8"
"ISL=1024,OSL=1024,TP=1,CONC=16": {
"total_throughput_per_sec": "2003",
"mean_ttft_ms": "180",
"mean_tpot_ms": "9"
},
"ISL=1024,OSL=1024,TP=1,CONC=32": {
"total_throughput_per_sec": "3155",
"mean_ttft_ms": "160",
"mean_tpot_ms": "8"
},
"ISL=1024,OSL=1024,TP=1,CONC=64": {
"total_throughput_per_sec": "4570",
"mean_ttft_ms": "150",
"mean_tpot_ms": "8"
},
"ISL=1024,OSL=8192,TP=1,CONC=16": {
"total_throughput_per_sec": "1200",
"mean_ttft_ms": "220",
"mean_tpot_ms": "14"
},
"ISL=1024,OSL=8192,TP=1,CONC=32": {
"total_throughput_per_sec": "1800",
"mean_ttft_ms": "200",
"mean_tpot_ms": "12"
},
"ISL=1024,OSL=8192,TP=1,CONC=64": {
"total_throughput_per_sec": "2600",
"mean_ttft_ms": "180",
"mean_tpot_ms": "10"
},
"ISL=8192,OSL=1024,TP=1,CONC=16": {
"total_throughput_per_sec": "7500",
"mean_ttft_ms": "300",
"mean_tpot_ms": "18"
},
"ISL=8192,OSL=1024,TP=1,CONC=32": {
"total_throughput_per_sec": "11300",
"mean_ttft_ms": "280",
"mean_tpot_ms": "17"
},
"ISL=8192,OSL=1024,TP=1,CONC=64": {
"total_throughput_per_sec": "16000",
"mean_ttft_ms": "260",
"mean_tpot_ms": "16"
}
}
},
"deepseek-v31": {
Expand All @@ -182,7 +327,7 @@
32,
64
],
"model": "/models/dsv31-fp8",
"model": "deepseek-ai/DeepSeek-V3.1",
"num_prompts": "3200",
"sequence_combinations": [
{
Expand All @@ -208,10 +353,44 @@
"metric_percentiles": "99",
"server_script": "dsr1_fp8_mi355x_vllm_docker.sh",
"bench_serv_script": "benchmark_serving.py",
"vllm_env_vars": {
"VLLM_ROCM_USE_AITER": "1",
"VLLM_USE_AITER_TRITON_ROPE": "1",
"VLLM_ROCM_USE_AITER_RMSNORM": "1",
"VLLM_ROCM_USE_AITER_TRITON_LINEAR": "1",
"VLLM_ROCM_QUICK_REDUCE_QUANTIZATION": "INT4"
},
"result_dict": {
"output_throughput_per_sec": "9000",
"mean_ttft_ms": "250",
"mean_tpot_ms": "11"
"ISL=1024,OSL=1024,TP=8,CONC=16": {
"total_throughput_per_sec": "1944",
"mean_ttft_ms": "280",
"mean_tpot_ms": "12"
},
"ISL=1024,OSL=1024,TP=8,CONC=32": {
"total_throughput_per_sec": "2939",
"mean_ttft_ms": "260",
"mean_tpot_ms": "11"
},
"ISL=1024,OSL=1024,TP=8,CONC=64": {
"total_throughput_per_sec": "4834",
"mean_ttft_ms": "250",
"mean_tpot_ms": "11"
},
"ISL=1024,OSL=8192,TP=8,CONC=16": {
"total_throughput_per_sec": "1109",
"mean_ttft_ms": "320",
"mean_tpot_ms": "16"
},
"ISL=1024,OSL=8192,TP=8,CONC=32": {
"total_throughput_per_sec": "1676",
"mean_ttft_ms": "300",
"mean_tpot_ms": "14"
},
"ISL=1024,OSL=8192,TP=8,CONC=64": {
"total_throughput_per_sec": "2716",
"mean_ttft_ms": "280",
"mean_tpot_ms": "13"
}
}
}
}
Expand Down
Loading
Loading