@@ -181,11 +181,11 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
181181 const int8x16_t v_yh = vec_xl (QK8_0 /2 , y [ib ].qs );
182182
183183 const int16x8_t v_xylso = vec_mulo (v_xls , v_yl );
184- const int16x8_t v_xylse = vec_mule (v_xls , v_yl );
184+ const int16x8_t v_xyl = vec_meadd (v_xls , v_yl , v_xylso );
185185 const int16x8_t v_xyhso = vec_mulo (v_xhs , v_yh );
186- const int16x8_t v_xyhse = vec_mule (v_xhs , v_yh );
186+ const int16x8_t v_xyh = vec_meadd (v_xhs , v_yh , v_xyhso );
187187
188- int16x8_t v_xy_ = v_xylso + v_xylse + v_xyhso + v_xyhse ; v_xy_ += vec_reve (v_xy_ );
188+ int16x8_t v_xy_ = v_xyl + v_xyh ; v_xy_ += vec_reve (v_xy_ );
189189
190190 const float32x4_t v_xy = vec_float (vec_unpackh (v_xy_ ));
191191 const float32x4_t v_d = vec_splats (GGML_CPU_FP16_TO_FP32 (x [ib ].d ) * GGML_CPU_FP16_TO_FP32 (y [ib ].d ));
@@ -890,8 +890,7 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
890890 const int16x8_t v_minsh = (int16x8_t )vec_unpackh ((uint8x16_t )v_mins8 );
891891
892892 const int32x4_t v_minso = vec_mulo (v_ysums , v_minsh );
893- const int32x4_t v_minse = vec_mule (v_ysums , v_minsh );
894- const int32x4_t v_mins = v_minso + v_minse ;
893+ const int32x4_t v_mins = vec_meadd (v_ysums , v_minsh , v_minso );
895894 sumf -= dmin * (v_mins [0 ] + v_mins [1 ] + v_mins [2 ] + v_mins [3 ]);
896895
897896 const uint8_t * scales = (const uint8_t * )utmp ;
@@ -1004,8 +1003,7 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
10041003 const int16x8_t v_minsh = (int16x8_t )vec_unpackh (v_mins8 );
10051004
10061005 const int32x4_t v_minsho = vec_mulo (v_ysums , v_minsh );
1007- const int32x4_t v_minshe = vec_mule (v_ysums , v_minsh );
1008- const int32x4_t v_mins = vec_add (v_minsho , v_minshe );
1006+ const int32x4_t v_mins = vec_meadd (v_ysums , v_minsh , v_minsho );
10091007 const int32_t mins = vec_hsum_i32x4 (v_mins );
10101008
10111009 const uint8_t * scales = (const uint8_t * )utmp ;
@@ -1110,10 +1108,10 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
11101108 const int16x8_t v_scaleh = vec_unpackl (v_scale );
11111109
11121110 const int32x4_t v_minslo = vec_mulo (v_ysumsl , v_scalel );
1113- const int32x4_t v_minsle = vec_mule (v_ysumsl , v_scalel );
1111+ const int32x4_t v_minsl = vec_meadd (v_ysumsl , v_scalel , v_minslo );
11141112 const int32x4_t v_minsho = vec_mulo (v_ysumsh , v_scaleh );
1115- const int32x4_t v_minshe = vec_mule (v_ysumsh , v_scaleh );
1116- const int32x4_t v_mins = v_minslo + v_minsle + v_minsho + v_minshe ;
1113+ const int32x4_t v_minsh = vec_meadd (v_ysumsh , v_scaleh , v_minsho );
1114+ const int32x4_t v_mins = vec_add ( v_minsl , v_minsh ) ;
11171115
11181116 const int32_t mins = vec_hsum_i32x4 (v_mins );
11191117
0 commit comments