Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions build_srcs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ LOGGING_SRCS = [
]

MICROKERNEL_DEFS = [
"src/bf16-f32-vcvt/bf16-f32-vcvt.inc",
"src/f16-avgpool/f16-avgpool-minmax.inc",
"src/f16-dwconv/f16-dwconv-minmax.inc",
"src/f16-f32-vcvt/f16-f32-vcvt.inc",
Expand Down Expand Up @@ -156,6 +157,7 @@ MICROKERNEL_DEFS = [
"src/f32-conv-hwc/f32-conv-hwc.inc",
"src/f32-dwconv/f32-dwconv-minmax.inc",
"src/f32-dwconv/f32-dwconv.inc",
"src/f32-bf16-vcvt/f32-bf16-vcvt.inc",
"src/f32-f16-vcvt/f32-f16-vcvt.inc",
"src/f32-maxpool/f32-maxpool-minmax.inc",
"src/f32-qs8-vcvt/f32-qs8-vcvt.inc",
Expand Down
8 changes: 8 additions & 0 deletions cmake/gen/scalar_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@


SET(PROD_SCALAR_MICROKERNEL_SRCS
src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u2.c
src/f16-f32-vcvt/gen/f16-f32-vcvt-scalar-u4.c
src/f16-qs8-vcvt/gen/f16-qs8-vcvt-scalar-imagic-u4.c
src/f16-qu8-vcvt/gen/f16-qu8-vcvt-scalar-imagic-u4.c
Expand All @@ -25,6 +26,7 @@ SET(PROD_SCALAR_MICROKERNEL_SRCS
src/f16-vsin/gen/f16-vsin-scalar-rational-3-2-div.c
src/f32-argmaxpool/f32-argmaxpool-9p8x-scalar-c1.c
src/f32-avgpool/gen/f32-avgpool-9p-minmax-scalar-u1.c
src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u2.c
src/f32-conv-hwc2chw/f32-conv-hwc2chw-3x3s2p1c3x4-scalar-1x1.c
src/f32-dwconv/gen/f32-dwconv-3p1c-minmax-scalar-acc2.c
src/f32-dwconv/gen/f32-dwconv-3p1c-scalar-acc2.c
Expand Down Expand Up @@ -248,6 +250,9 @@ SET(PROD_SCALAR_MICROKERNEL_SRCS

SET(NON_PROD_SCALAR_MICROKERNEL_SRCS
src/bf16-f32-gemm/bf16-f32-gemm-1x4c2-minmax-scalar.c
src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u1.c
src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u3.c
src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u4.c
src/f16-f32-vcvt/gen/f16-f32-vcvt-scalar-u1.c
src/f16-f32-vcvt/gen/f16-f32-vcvt-scalar-u2.c
src/f16-f32-vcvt/gen/f16-f32-vcvt-scalar-u3.c
Expand All @@ -273,6 +278,9 @@ SET(NON_PROD_SCALAR_MICROKERNEL_SRCS
src/f16-rminmax/gen/f16-rminmax-scalar-u3-acc3.c
src/f16-rminmax/gen/f16-rminmax-scalar-u4-acc2.c
src/f16-rminmax/gen/f16-rminmax-scalar-u4-acc4.c
src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u1.c
src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u3.c
src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u4.c
src/f32-conv-hwc/f32-conv-hwc-3x3s2p0p1c3x4-scalar-1x1.c
src/f32-conv-hwc/f32-conv-hwc-3x3s2p1c3x4-scalar-1x1.c
src/f32-dwconv/gen/f32-dwconv-3p1c-minmax-scalar.c
Expand Down
8 changes: 8 additions & 0 deletions gen/scalar_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#

PROD_SCALAR_MICROKERNEL_SRCS = [
"src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u2.c",
"src/f16-f32-vcvt/gen/f16-f32-vcvt-scalar-u4.c",
"src/f16-qs8-vcvt/gen/f16-qs8-vcvt-scalar-imagic-u4.c",
"src/f16-qu8-vcvt/gen/f16-qu8-vcvt-scalar-imagic-u4.c",
Expand All @@ -21,6 +22,7 @@ PROD_SCALAR_MICROKERNEL_SRCS = [
"src/f16-vsin/gen/f16-vsin-scalar-rational-3-2-div.c",
"src/f32-argmaxpool/f32-argmaxpool-9p8x-scalar-c1.c",
"src/f32-avgpool/gen/f32-avgpool-9p-minmax-scalar-u1.c",
"src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u2.c",
"src/f32-conv-hwc2chw/f32-conv-hwc2chw-3x3s2p1c3x4-scalar-1x1.c",
"src/f32-dwconv/gen/f32-dwconv-3p1c-minmax-scalar-acc2.c",
"src/f32-dwconv/gen/f32-dwconv-3p1c-scalar-acc2.c",
Expand Down Expand Up @@ -245,6 +247,9 @@ PROD_SCALAR_MICROKERNEL_SRCS = [

NON_PROD_SCALAR_MICROKERNEL_SRCS = [
"src/bf16-f32-gemm/bf16-f32-gemm-1x4c2-minmax-scalar.c",
"src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u1.c",
"src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u3.c",
"src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u4.c",
"src/f16-f32-vcvt/gen/f16-f32-vcvt-scalar-u1.c",
"src/f16-f32-vcvt/gen/f16-f32-vcvt-scalar-u2.c",
"src/f16-f32-vcvt/gen/f16-f32-vcvt-scalar-u3.c",
Expand All @@ -270,6 +275,9 @@ NON_PROD_SCALAR_MICROKERNEL_SRCS = [
"src/f16-rminmax/gen/f16-rminmax-scalar-u3-acc3.c",
"src/f16-rminmax/gen/f16-rminmax-scalar-u4-acc2.c",
"src/f16-rminmax/gen/f16-rminmax-scalar-u4-acc4.c",
"src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u1.c",
"src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u3.c",
"src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u4.c",
"src/f32-conv-hwc/f32-conv-hwc-3x3s2p0p1c3x4-scalar-1x1.c",
"src/f32-conv-hwc/f32-conv-hwc-3x3s2p1c3x4-scalar-1x1.c",
"src/f32-dwconv/gen/f32-dwconv-3p1c-minmax-scalar.c",
Expand Down
13 changes: 13 additions & 0 deletions scripts/generate-bf16-f32-vcvt.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/sh
# Copyright 2021 Google LLC
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

#################################### Scalar ###################################
tools/xngen src/bf16-f32-vcvt/scalar.c.in -D BATCH_TILE=1 -o src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u1.c &
tools/xngen src/bf16-f32-vcvt/scalar.c.in -D BATCH_TILE=2 -o src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u2.c &
tools/xngen src/bf16-f32-vcvt/scalar.c.in -D BATCH_TILE=3 -o src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u3.c &
tools/xngen src/bf16-f32-vcvt/scalar.c.in -D BATCH_TILE=4 -o src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u4.c &

wait
13 changes: 13 additions & 0 deletions scripts/generate-f32-bf16-vcvt.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/sh
# Copyright 2021 Google LLC
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

#################################### Scalar ###################################
tools/xngen src/f32-bf16-vcvt/scalar.c.in -D BATCH_TILE=1 -o src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u1.c &
tools/xngen src/f32-bf16-vcvt/scalar.c.in -D BATCH_TILE=2 -o src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u2.c &
tools/xngen src/f32-bf16-vcvt/scalar.c.in -D BATCH_TILE=3 -o src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u3.c &
tools/xngen src/f32-bf16-vcvt/scalar.c.in -D BATCH_TILE=4 -o src/f32-bf16-vcvt/gen/f32-bf16-vcvt-scalar-u4.c &

wait
10 changes: 10 additions & 0 deletions src/bf16-f32-vcvt/bf16-f32-vcvt.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// clang-format off
// Copyright 2026 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

XNN_UKERNEL(xnn_arch_none, xnn_bf16_f32_vcvt_ukernel__scalar_u1, 1, false, xnn_bfloat16, float, void, NULL)
XNN_UKERNEL(xnn_arch_none, xnn_bf16_f32_vcvt_ukernel__scalar_u2, 2, false, xnn_bfloat16, float, void, NULL)
XNN_UKERNEL(xnn_arch_none, xnn_bf16_f32_vcvt_ukernel__scalar_u3, 3, false, xnn_bfloat16, float, void, NULL)
XNN_UKERNEL(xnn_arch_none, xnn_bf16_f32_vcvt_ukernel__scalar_u4, 4, false, xnn_bfloat16, float, void, NULL)
40 changes: 40 additions & 0 deletions src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// clang-format off
// Auto-generated file. Do not edit!
// Template: src/bf16-f32-vcvt/scalar.c.in
// Generator: tools/xngen
//
// Copyright 2021 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>
#include <stddef.h>
#include <stdint.h>

#include "src/xnnpack/common.h"
#include "src/xnnpack/math.h"
#include "src/xnnpack/vcvt.h"


void xnn_bf16_f32_vcvt_ukernel__scalar_u1(
size_t batch,
const xnn_bfloat16* input,
float* output,
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(xnn_bfloat16) == 0);
assert(input != NULL);
assert(output != NULL);

const xnn_bfloat16* i = input;
float* o = output;
do {
const xnn_bfloat16 vh = *i++;

*o++ = xnn_bfloat16_to_float(vh);

batch -= sizeof(xnn_bfloat16);
} while (batch != 0);
}
47 changes: 47 additions & 0 deletions src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// clang-format off
// Auto-generated file. Do not edit!
// Template: src/bf16-f32-vcvt/scalar.c.in
// Generator: tools/xngen
//
// Copyright 2021 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>
#include <stddef.h>
#include <stdint.h>

#include "src/xnnpack/common.h"
#include "src/xnnpack/math.h"
#include "src/xnnpack/vcvt.h"


void xnn_bf16_f32_vcvt_ukernel__scalar_u2(
size_t batch,
const xnn_bfloat16* input,
float* output,
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(xnn_bfloat16) == 0);
assert(input != NULL);
assert(output != NULL);

const xnn_bfloat16* i = input;
float* o = output;
for (; batch >= 2 * sizeof(xnn_bfloat16); batch -= 2 * sizeof(xnn_bfloat16)) {
const xnn_bfloat16 vh0 = i[0];
const xnn_bfloat16 vh1 = i[1];
i += 2;

o[0] = xnn_bfloat16_to_float(vh0);
o[1] = xnn_bfloat16_to_float(vh1);
o += 2;
}
if XNN_UNLIKELY(batch != 0) {
const xnn_bfloat16 vh = *i;

*o = xnn_bfloat16_to_float(vh);
}
}
53 changes: 53 additions & 0 deletions src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// clang-format off
// Auto-generated file. Do not edit!
// Template: src/bf16-f32-vcvt/scalar.c.in
// Generator: tools/xngen
//
// Copyright 2021 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>
#include <stddef.h>
#include <stdint.h>

#include "src/xnnpack/common.h"
#include "src/xnnpack/math.h"
#include "src/xnnpack/vcvt.h"


void xnn_bf16_f32_vcvt_ukernel__scalar_u3(
size_t batch,
const xnn_bfloat16* input,
float* output,
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(xnn_bfloat16) == 0);
assert(input != NULL);
assert(output != NULL);

const xnn_bfloat16* i = input;
float* o = output;
for (; batch >= 3 * sizeof(xnn_bfloat16); batch -= 3 * sizeof(xnn_bfloat16)) {
const xnn_bfloat16 vh0 = i[0];
const xnn_bfloat16 vh1 = i[1];
const xnn_bfloat16 vh2 = i[2];
i += 3;

o[0] = xnn_bfloat16_to_float(vh0);
o[1] = xnn_bfloat16_to_float(vh1);
o[2] = xnn_bfloat16_to_float(vh2);
o += 3;
}
if XNN_UNLIKELY(batch != 0) {
do {
const xnn_bfloat16 vh = *i++;

*o++ = xnn_bfloat16_to_float(vh);

batch -= sizeof(xnn_bfloat16);
} while (batch != 0);
}
}
55 changes: 55 additions & 0 deletions src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u4.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// clang-format off
// Auto-generated file. Do not edit!
// Template: src/bf16-f32-vcvt/scalar.c.in
// Generator: tools/xngen
//
// Copyright 2021 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>
#include <stddef.h>
#include <stdint.h>

#include "src/xnnpack/common.h"
#include "src/xnnpack/math.h"
#include "src/xnnpack/vcvt.h"


void xnn_bf16_f32_vcvt_ukernel__scalar_u4(
size_t batch,
const xnn_bfloat16* input,
float* output,
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(xnn_bfloat16) == 0);
assert(input != NULL);
assert(output != NULL);

const xnn_bfloat16* i = input;
float* o = output;
for (; batch >= 4 * sizeof(xnn_bfloat16); batch -= 4 * sizeof(xnn_bfloat16)) {
const xnn_bfloat16 vh0 = i[0];
const xnn_bfloat16 vh1 = i[1];
const xnn_bfloat16 vh2 = i[2];
const xnn_bfloat16 vh3 = i[3];
i += 4;

o[0] = xnn_bfloat16_to_float(vh0);
o[1] = xnn_bfloat16_to_float(vh1);
o[2] = xnn_bfloat16_to_float(vh2);
o[3] = xnn_bfloat16_to_float(vh3);
o += 4;
}
if XNN_UNLIKELY(batch != 0) {
do {
const xnn_bfloat16 vh = *i++;

*o++ = xnn_bfloat16_to_float(vh);

batch -= sizeof(xnn_bfloat16);
} while (batch != 0);
}
}
63 changes: 63 additions & 0 deletions src/bf16-f32-vcvt/scalar.c.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright 2021 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

$assert BATCH_TILE >= 1
#include <assert.h>
#include <stddef.h>
#include <stdint.h>

#include "src/xnnpack/common.h"
#include "src/xnnpack/math.h"
#include "src/xnnpack/vcvt.h"


void xnn_bf16_f32_vcvt_ukernel__scalar_u${BATCH_TILE}(
size_t batch,
const xnn_bfloat16* input,
float* output,
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(xnn_bfloat16) == 0);
assert(input != NULL);
assert(output != NULL);

const xnn_bfloat16* i = input;
float* o = output;
$if BATCH_TILE > 1:
for (; batch >= ${BATCH_TILE} * sizeof(xnn_bfloat16); batch -= ${BATCH_TILE} * sizeof(xnn_bfloat16)) {
$for N in range(BATCH_TILE):
const xnn_bfloat16 vh${N} = i[${N}];
i += ${BATCH_TILE};

$for N in range(BATCH_TILE):
o[${N}] = xnn_bfloat16_to_float(vh${N});
o += ${BATCH_TILE};
}
$if BATCH_TILE == 1:
do {
const xnn_bfloat16 vh = *i++;

*o++ = xnn_bfloat16_to_float(vh);

batch -= sizeof(xnn_bfloat16);
} while (batch != 0);
$elif BATCH_TILE == 2:
if XNN_UNLIKELY(batch != 0) {
const xnn_bfloat16 vh = *i;

*o = xnn_bfloat16_to_float(vh);
}
$else:
if XNN_UNLIKELY(batch != 0) {
do {
const xnn_bfloat16 vh = *i++;

*o++ = xnn_bfloat16_to_float(vh);

batch -= sizeof(xnn_bfloat16);
} while (batch != 0);
}
}
Loading
Loading