Skip to content

Commit 3b9ec79

Browse files
committed
Merge pull request #9727 from GregoryComer:f32-bf16-vcvt-scalar
PiperOrigin-RevId: 892897848
2 parents e0fe33a + 856538e commit 3b9ec79

26 files changed

Lines changed: 723 additions & 2 deletions

build_srcs.bzl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ LOGGING_SRCS = [
9494
]
9595

9696
MICROKERNEL_DEFS = [
97+
"src/bf16-f32-vcvt/bf16-f32-vcvt.inc",
9798
"src/f16-avgpool/f16-avgpool-minmax.inc",
9899
"src/f16-dwconv/f16-dwconv-minmax.inc",
99100
"src/f16-f32-vcvt/f16-f32-vcvt.inc",
@@ -156,6 +157,7 @@ MICROKERNEL_DEFS = [
156157
"src/f32-conv-hwc/f32-conv-hwc.inc",
157158
"src/f32-dwconv/f32-dwconv-minmax.inc",
158159
"src/f32-dwconv/f32-dwconv.inc",
160+
"src/f32-bf16-vcvt/f32-bf16-vcvt.inc",
159161
"src/f32-f16-vcvt/f32-f16-vcvt.inc",
160162
"src/f32-maxpool/f32-maxpool-minmax.inc",
161163
"src/f32-qs8-vcvt/f32-qs8-vcvt.inc",

cmake/gen/scalar_microkernels.cmake

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111

1212
SET(PROD_SCALAR_MICROKERNEL_SRCS
13+
src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u2.c
1314
src/f16-f32-vcvt/gen/f16-f32-vcvt-scalar-u4.c
1415
src/f16-qs8-vcvt/gen/f16-qs8-vcvt-scalar-imagic-u4.c
1516
src/f16-qu8-vcvt/gen/f16-qu8-vcvt-scalar-imagic-u4.c
@@ -25,6 +26,7 @@ SET(PROD_SCALAR_MICROKERNEL_SRCS
2526
src/f16-vsin/gen/f16-vsin-scalar-rational-3-2-div.c
2627
src/f32-argmaxpool/f32-argmaxpool-9p8x-scalar-c1.c
2728
src/f32-avgpool/gen/f32-avgpool-9p-minmax-scalar-u1.c
29+
src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u2.c
2830
src/f32-conv-hwc2chw/f32-conv-hwc2chw-3x3s2p1c3x4-scalar-1x1.c
2931
src/f32-dwconv/gen/f32-dwconv-3p1c-minmax-scalar-acc2.c
3032
src/f32-dwconv/gen/f32-dwconv-3p1c-scalar-acc2.c
@@ -248,6 +250,9 @@ SET(PROD_SCALAR_MICROKERNEL_SRCS
248250

249251
SET(NON_PROD_SCALAR_MICROKERNEL_SRCS
250252
src/bf16-f32-gemm/bf16-f32-gemm-1x4c2-minmax-scalar.c
253+
src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u1.c
254+
src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u3.c
255+
src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u4.c
251256
src/f16-f32-vcvt/gen/f16-f32-vcvt-scalar-u1.c
252257
src/f16-f32-vcvt/gen/f16-f32-vcvt-scalar-u2.c
253258
src/f16-f32-vcvt/gen/f16-f32-vcvt-scalar-u3.c
@@ -273,6 +278,9 @@ SET(NON_PROD_SCALAR_MICROKERNEL_SRCS
273278
src/f16-rminmax/gen/f16-rminmax-scalar-u3-acc3.c
274279
src/f16-rminmax/gen/f16-rminmax-scalar-u4-acc2.c
275280
src/f16-rminmax/gen/f16-rminmax-scalar-u4-acc4.c
281+
src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u1.c
282+
src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u3.c
283+
src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u4.c
276284
src/f32-conv-hwc/f32-conv-hwc-3x3s2p0p1c3x4-scalar-1x1.c
277285
src/f32-conv-hwc/f32-conv-hwc-3x3s2p1c3x4-scalar-1x1.c
278286
src/f32-dwconv/gen/f32-dwconv-3p1c-minmax-scalar.c

gen/scalar_microkernels.bzl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#
77

88
PROD_SCALAR_MICROKERNEL_SRCS = [
9+
"src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u2.c",
910
"src/f16-f32-vcvt/gen/f16-f32-vcvt-scalar-u4.c",
1011
"src/f16-qs8-vcvt/gen/f16-qs8-vcvt-scalar-imagic-u4.c",
1112
"src/f16-qu8-vcvt/gen/f16-qu8-vcvt-scalar-imagic-u4.c",
@@ -21,6 +22,7 @@ PROD_SCALAR_MICROKERNEL_SRCS = [
2122
"src/f16-vsin/gen/f16-vsin-scalar-rational-3-2-div.c",
2223
"src/f32-argmaxpool/f32-argmaxpool-9p8x-scalar-c1.c",
2324
"src/f32-avgpool/gen/f32-avgpool-9p-minmax-scalar-u1.c",
25+
"src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u2.c",
2426
"src/f32-conv-hwc2chw/f32-conv-hwc2chw-3x3s2p1c3x4-scalar-1x1.c",
2527
"src/f32-dwconv/gen/f32-dwconv-3p1c-minmax-scalar-acc2.c",
2628
"src/f32-dwconv/gen/f32-dwconv-3p1c-scalar-acc2.c",
@@ -245,6 +247,9 @@ PROD_SCALAR_MICROKERNEL_SRCS = [
245247

246248
NON_PROD_SCALAR_MICROKERNEL_SRCS = [
247249
"src/bf16-f32-gemm/bf16-f32-gemm-1x4c2-minmax-scalar.c",
250+
"src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u1.c",
251+
"src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u3.c",
252+
"src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u4.c",
248253
"src/f16-f32-vcvt/gen/f16-f32-vcvt-scalar-u1.c",
249254
"src/f16-f32-vcvt/gen/f16-f32-vcvt-scalar-u2.c",
250255
"src/f16-f32-vcvt/gen/f16-f32-vcvt-scalar-u3.c",
@@ -270,6 +275,9 @@ NON_PROD_SCALAR_MICROKERNEL_SRCS = [
270275
"src/f16-rminmax/gen/f16-rminmax-scalar-u3-acc3.c",
271276
"src/f16-rminmax/gen/f16-rminmax-scalar-u4-acc2.c",
272277
"src/f16-rminmax/gen/f16-rminmax-scalar-u4-acc4.c",
278+
"src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u1.c",
279+
"src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u3.c",
280+
"src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u4.c",
273281
"src/f32-conv-hwc/f32-conv-hwc-3x3s2p0p1c3x4-scalar-1x1.c",
274282
"src/f32-conv-hwc/f32-conv-hwc-3x3s2p1c3x4-scalar-1x1.c",
275283
"src/f32-dwconv/gen/f32-dwconv-3p1c-minmax-scalar.c",

scripts/generate-bf16-f32-vcvt.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/sh
2+
# Copyright 2021 Google LLC
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
#################################### Scalar ###################################
8+
tools/xngen src/bf16-f32-vcvt/scalar.c.in -D BATCH_TILE=1 -o src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u1.c &
9+
tools/xngen src/bf16-f32-vcvt/scalar.c.in -D BATCH_TILE=2 -o src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u2.c &
10+
tools/xngen src/bf16-f32-vcvt/scalar.c.in -D BATCH_TILE=3 -o src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u3.c &
11+
tools/xngen src/bf16-f32-vcvt/scalar.c.in -D BATCH_TILE=4 -o src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u4.c &
12+
13+
wait

scripts/generate-f32-bf16-vcvt.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/sh
2+
# Copyright 2021 Google LLC
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
#################################### Scalar ###################################
8+
tools/xngen src/f32-bf16-vcvt/scalar.c.in -D BATCH_TILE=1 -o src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u1.c &
9+
tools/xngen src/f32-bf16-vcvt/scalar.c.in -D BATCH_TILE=2 -o src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u2.c &
10+
tools/xngen src/f32-bf16-vcvt/scalar.c.in -D BATCH_TILE=3 -o src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u3.c &
11+
tools/xngen src/f32-bf16-vcvt/scalar.c.in -D BATCH_TILE=4 -o src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u4.c &
12+
13+
wait
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// clang-format off
2+
// Copyright 2026 Google LLC
3+
//
4+
// This source code is licensed under the BSD-style license found in the
5+
// LICENSE file in the root directory of this source tree.
6+
7+
XNN_UKERNEL(xnn_arch_none, xnn_bf16_f32_vcvt_ukernel__scalar_u1, 1, false, xnn_bfloat16, float, void, NULL)
8+
XNN_UKERNEL(xnn_arch_none, xnn_bf16_f32_vcvt_ukernel__scalar_u2, 2, false, xnn_bfloat16, float, void, NULL)
9+
XNN_UKERNEL(xnn_arch_none, xnn_bf16_f32_vcvt_ukernel__scalar_u3, 3, false, xnn_bfloat16, float, void, NULL)
10+
XNN_UKERNEL(xnn_arch_none, xnn_bf16_f32_vcvt_ukernel__scalar_u4, 4, false, xnn_bfloat16, float, void, NULL)
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// clang-format off
2+
// Auto-generated file. Do not edit!
3+
// Template: src/bf16-f32-vcvt/scalar.c.in
4+
// Generator: tools/xngen
5+
//
6+
// Copyright 2021 Google LLC
7+
//
8+
// This source code is licensed under the BSD-style license found in the
9+
// LICENSE file in the root directory of this source tree.
10+
11+
#include <assert.h>
12+
#include <stddef.h>
13+
#include <stdint.h>
14+
15+
#include "src/xnnpack/common.h"
16+
#include "src/xnnpack/math.h"
17+
#include "src/xnnpack/vcvt.h"
18+
19+
20+
void xnn_bf16_f32_vcvt_ukernel__scalar_u1(
21+
size_t batch,
22+
const xnn_bfloat16* input,
23+
float* output,
24+
const void* params)
25+
{
26+
assert(batch != 0);
27+
assert(batch % sizeof(xnn_bfloat16) == 0);
28+
assert(input != NULL);
29+
assert(output != NULL);
30+
31+
const xnn_bfloat16* i = input;
32+
float* o = output;
33+
do {
34+
const xnn_bfloat16 vh = *i++;
35+
36+
*o++ = xnn_bfloat16_to_float(vh);
37+
38+
batch -= sizeof(xnn_bfloat16);
39+
} while (batch != 0);
40+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// clang-format off
2+
// Auto-generated file. Do not edit!
3+
// Template: src/bf16-f32-vcvt/scalar.c.in
4+
// Generator: tools/xngen
5+
//
6+
// Copyright 2021 Google LLC
7+
//
8+
// This source code is licensed under the BSD-style license found in the
9+
// LICENSE file in the root directory of this source tree.
10+
11+
#include <assert.h>
12+
#include <stddef.h>
13+
#include <stdint.h>
14+
15+
#include "src/xnnpack/common.h"
16+
#include "src/xnnpack/math.h"
17+
#include "src/xnnpack/vcvt.h"
18+
19+
20+
void xnn_bf16_f32_vcvt_ukernel__scalar_u2(
21+
size_t batch,
22+
const xnn_bfloat16* input,
23+
float* output,
24+
const void* params)
25+
{
26+
assert(batch != 0);
27+
assert(batch % sizeof(xnn_bfloat16) == 0);
28+
assert(input != NULL);
29+
assert(output != NULL);
30+
31+
const xnn_bfloat16* i = input;
32+
float* o = output;
33+
for (; batch >= 2 * sizeof(xnn_bfloat16); batch -= 2 * sizeof(xnn_bfloat16)) {
34+
const xnn_bfloat16 vh0 = i[0];
35+
const xnn_bfloat16 vh1 = i[1];
36+
i += 2;
37+
38+
o[0] = xnn_bfloat16_to_float(vh0);
39+
o[1] = xnn_bfloat16_to_float(vh1);
40+
o += 2;
41+
}
42+
if XNN_UNLIKELY(batch != 0) {
43+
const xnn_bfloat16 vh = *i;
44+
45+
*o = xnn_bfloat16_to_float(vh);
46+
}
47+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// clang-format off
2+
// Auto-generated file. Do not edit!
3+
// Template: src/bf16-f32-vcvt/scalar.c.in
4+
// Generator: tools/xngen
5+
//
6+
// Copyright 2021 Google LLC
7+
//
8+
// This source code is licensed under the BSD-style license found in the
9+
// LICENSE file in the root directory of this source tree.
10+
11+
#include <assert.h>
12+
#include <stddef.h>
13+
#include <stdint.h>
14+
15+
#include "src/xnnpack/common.h"
16+
#include "src/xnnpack/math.h"
17+
#include "src/xnnpack/vcvt.h"
18+
19+
20+
void xnn_bf16_f32_vcvt_ukernel__scalar_u3(
21+
size_t batch,
22+
const xnn_bfloat16* input,
23+
float* output,
24+
const void* params)
25+
{
26+
assert(batch != 0);
27+
assert(batch % sizeof(xnn_bfloat16) == 0);
28+
assert(input != NULL);
29+
assert(output != NULL);
30+
31+
const xnn_bfloat16* i = input;
32+
float* o = output;
33+
for (; batch >= 3 * sizeof(xnn_bfloat16); batch -= 3 * sizeof(xnn_bfloat16)) {
34+
const xnn_bfloat16 vh0 = i[0];
35+
const xnn_bfloat16 vh1 = i[1];
36+
const xnn_bfloat16 vh2 = i[2];
37+
i += 3;
38+
39+
o[0] = xnn_bfloat16_to_float(vh0);
40+
o[1] = xnn_bfloat16_to_float(vh1);
41+
o[2] = xnn_bfloat16_to_float(vh2);
42+
o += 3;
43+
}
44+
if XNN_UNLIKELY(batch != 0) {
45+
do {
46+
const xnn_bfloat16 vh = *i++;
47+
48+
*o++ = xnn_bfloat16_to_float(vh);
49+
50+
batch -= sizeof(xnn_bfloat16);
51+
} while (batch != 0);
52+
}
53+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// clang-format off
2+
// Auto-generated file. Do not edit!
3+
// Template: src/bf16-f32-vcvt/scalar.c.in
4+
// Generator: tools/xngen
5+
//
6+
// Copyright 2021 Google LLC
7+
//
8+
// This source code is licensed under the BSD-style license found in the
9+
// LICENSE file in the root directory of this source tree.
10+
11+
#include <assert.h>
12+
#include <stddef.h>
13+
#include <stdint.h>
14+
15+
#include "src/xnnpack/common.h"
16+
#include "src/xnnpack/math.h"
17+
#include "src/xnnpack/vcvt.h"
18+
19+
20+
void xnn_bf16_f32_vcvt_ukernel__scalar_u4(
21+
size_t batch,
22+
const xnn_bfloat16* input,
23+
float* output,
24+
const void* params)
25+
{
26+
assert(batch != 0);
27+
assert(batch % sizeof(xnn_bfloat16) == 0);
28+
assert(input != NULL);
29+
assert(output != NULL);
30+
31+
const xnn_bfloat16* i = input;
32+
float* o = output;
33+
for (; batch >= 4 * sizeof(xnn_bfloat16); batch -= 4 * sizeof(xnn_bfloat16)) {
34+
const xnn_bfloat16 vh0 = i[0];
35+
const xnn_bfloat16 vh1 = i[1];
36+
const xnn_bfloat16 vh2 = i[2];
37+
const xnn_bfloat16 vh3 = i[3];
38+
i += 4;
39+
40+
o[0] = xnn_bfloat16_to_float(vh0);
41+
o[1] = xnn_bfloat16_to_float(vh1);
42+
o[2] = xnn_bfloat16_to_float(vh2);
43+
o[3] = xnn_bfloat16_to_float(vh3);
44+
o += 4;
45+
}
46+
if XNN_UNLIKELY(batch != 0) {
47+
do {
48+
const xnn_bfloat16 vh = *i++;
49+
50+
*o++ = xnn_bfloat16_to_float(vh);
51+
52+
batch -= sizeof(xnn_bfloat16);
53+
} while (batch != 0);
54+
}
55+
}

0 commit comments

Comments
 (0)