diff --git a/speed-bench/m3_ultra_60core_q2.csv b/speed-bench/m3_ultra_60core_q2.csv new file mode 100644 index 000000000..67560cda2 --- /dev/null +++ b/speed-bench/m3_ultra_60core_q2.csv @@ -0,0 +1,33 @@ +ctx_tokens,prefill_tokens,prefill_tps,gen_tokens,gen_tps,kvcache_bytes +2048,2048,413.64,128,33.57,52184460 +4096,2048,383.76,128,26.78,80373132 +6144,2048,379.42,128,26.28,108561804 +8192,2048,375.63,128,26.38,136750476 +10240,2048,368.84,128,26.08,164939148 +12288,2048,365.40,128,26.26,193127820 +14336,2048,361.71,128,25.95,221316492 +16384,2048,357.90,128,25.92,249505164 +18432,2048,352.19,128,25.59,277693836 +20480,2048,348.88,128,25.74,305882508 +22528,2048,345.06,128,25.44,334071180 +24576,2048,342.00,128,25.64,362259852 +26624,2048,336.52,128,25.34,390448524 +28672,2048,333.69,128,25.41,418637196 +30720,2048,329.87,128,25.05,446825868 +32768,2048,326.81,128,24.94,475014540 +34816,2048,321.33,128,24.52,503203212 +36864,2048,319.32,128,24.73,531391884 +38912,2048,316.06,128,24.50,559580556 +40960,2048,313.10,128,24.82,587769228 +43008,2048,308.49,128,24.48,615957900 +45056,2048,305.89,128,24.57,644146572 +47104,2048,302.89,128,24.31,672335244 +49152,2048,299.90,128,24.45,700523916 +51200,2048,296.60,128,24.18,728712588 +53248,2048,294.48,128,24.21,756901260 +55296,2048,291.46,128,23.95,785089932 +57344,2048,288.75,128,24.03,813278604 +59392,2048,285.18,128,23.78,841467276 +61440,2048,283.31,128,23.85,869655948 +63488,2048,280.65,128,23.60,897844620 +65536,2048,278.45,128,23.53,926033292 diff --git a/speed-bench/m3_ultra_60core_q2_ts.svg b/speed-bench/m3_ultra_60core_q2_ts.svg new file mode 100644 index 000000000..d32962ed4 --- /dev/null +++ b/speed-bench/m3_ultra_60core_q2_ts.svg @@ -0,0 +1,50 @@ + + + + +M3 Ultra 60-core Q2 t/s + +0 + +100 + +200 + +300 + +400 + +500 +0 +10 +20 +30 +40 + +0 + +20k + +40k + +60k + + + +ctx size +prefill t/s +generation t/s + + + + +prefill + +generation + diff --git a/speed-bench/m3_ultra_60core_q4.csv b/speed-bench/m3_ultra_60core_q4.csv new file mode 100644 index 000000000..3ca265fac --- /dev/null +++ b/speed-bench/m3_ultra_60core_q4.csv @@ -0,0 +1,33 @@ +ctx_tokens,prefill_tokens,prefill_tps,gen_tokens,gen_tps,kvcache_bytes +2048,2048,401.45,128,32.17,52184460 +4096,2048,381.38,128,25.80,80373132 +6144,2048,376.98,128,25.29,108561804 +8192,2048,373.25,128,25.38,136750476 +10240,2048,366.26,128,25.03,164939148 +12288,2048,363.12,128,25.22,193127820 +14336,2048,359.36,128,24.90,221316492 +16384,2048,355.48,128,24.94,249505164 +18432,2048,350.15,128,24.58,277693836 +20480,2048,346.82,128,24.75,305882508 +22528,2048,342.78,128,24.49,334071180 +24576,2048,339.98,128,24.61,362259852 +26624,2048,334.38,128,24.31,390448524 +28672,2048,331.37,128,24.40,418637196 +30720,2048,327.71,128,24.09,446825868 +32768,2048,325.09,128,24.03,475014540 +34816,2048,319.52,128,23.58,503203212 +36864,2048,317.46,128,23.80,531391884 +38912,2048,314.37,128,23.51,559580556 +40960,2048,311.05,128,23.86,587769228 +43008,2048,307.26,128,23.57,615957900 +45056,2048,304.03,128,23.65,644146572 +47104,2048,301.09,128,23.38,672335244 +49152,2048,298.60,128,23.52,700523916 +51200,2048,295.22,128,23.16,728712588 +53248,2048,292.70,128,23.37,756901260 +55296,2048,289.84,128,23.02,785089932 +57344,2048,287.28,128,23.22,813278604 +59392,2048,283.67,128,22.86,841467276 +61440,2048,281.84,128,22.98,869655948 +63488,2048,279.39,128,22.70,897844620 +65536,2048,276.97,128,22.65,926033292 diff --git a/speed-bench/m3_ultra_60core_q4_ts.svg b/speed-bench/m3_ultra_60core_q4_ts.svg new file mode 100644 index 000000000..0938aabf3 --- /dev/null +++ b/speed-bench/m3_ultra_60core_q4_ts.svg @@ -0,0 +1,50 @@ + + + + +M3 Ultra 60-core Q4 t/s + +0 + +100 + +200 + +300 + +400 + +500 +0 +10 +20 +30 +40 + +0 + +20k + +40k + +60k + + + +ctx size +prefill t/s +generation t/s + + + + +prefill + +generation +