From 600150d12dc1dbb905f1411565dbe03a4ec9e7df Mon Sep 17 00:00:00 2001 From: Dillon Sharlet Date: Thu, 26 Mar 2026 22:29:27 -0700 Subject: [PATCH] Add CMake build for YNNPACK PiperOrigin-RevId: 890235712 --- CMakeLists.txt | 6 + ynnpack/CMakeLists.txt | 175 ++++++++++++++++++ ynnpack/base/CMakeLists.txt | 91 ++++++++++ ynnpack/base/arch.cc | 1 + ynnpack/base/test/CMakeLists.txt | 35 ++++ ynnpack/cmake/DownloadSlinky.cmake | 26 +++ ynnpack/kernels/CMakeLists.txt | 12 ++ ynnpack/kernels/arch_copts.cmake | 180 +++++++++++++++++++ ynnpack/kernels/binary/CMakeLists.txt | 64 +++++++ ynnpack/kernels/dot/CMakeLists.txt | 217 +++++++++++++++++++++++ ynnpack/kernels/lut/CMakeLists.txt | 7 + ynnpack/kernels/reduce/CMakeLists.txt | 44 +++++ ynnpack/kernels/ternary/CMakeLists.txt | 65 +++++++ ynnpack/kernels/transpose/CMakeLists.txt | 46 +++++ ynnpack/kernels/unary/CMakeLists.txt | 73 ++++++++ ynnpack/subgraph/CMakeLists.txt | 69 +++++++ ynnpack/subgraph/test/CMakeLists.txt | 42 +++++ ynnpack/xnnpack/CMakeLists.txt | 47 +++++ 18 files changed, 1200 insertions(+) create mode 100644 ynnpack/CMakeLists.txt create mode 100644 ynnpack/base/CMakeLists.txt create mode 100644 ynnpack/base/test/CMakeLists.txt create mode 100644 ynnpack/cmake/DownloadSlinky.cmake create mode 100644 ynnpack/kernels/CMakeLists.txt create mode 100644 ynnpack/kernels/arch_copts.cmake create mode 100644 ynnpack/kernels/binary/CMakeLists.txt create mode 100644 ynnpack/kernels/dot/CMakeLists.txt create mode 100644 ynnpack/kernels/lut/CMakeLists.txt create mode 100644 ynnpack/kernels/reduce/CMakeLists.txt create mode 100644 ynnpack/kernels/ternary/CMakeLists.txt create mode 100644 ynnpack/kernels/transpose/CMakeLists.txt create mode 100644 ynnpack/kernels/unary/CMakeLists.txt create mode 100644 ynnpack/subgraph/CMakeLists.txt create mode 100644 ynnpack/subgraph/test/CMakeLists.txt create mode 100644 ynnpack/xnnpack/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 47c6ff59f5a..55698168c86 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,6 +62,7 @@ OPTION(XNNPACK_BUILD_TESTS "Build XNNPACK unit tests" ON) OPTION(XNNPACK_BUILD_ALL_MICROKERNELS "Builds all XNNPACK Microkernels" ON) OPTION(XNNPACK_BUILD_BENCHMARKS "Build XNNPACK benchmarks" ON) OPTION(XNNPACK_BUILD_WITH_LIBM "Build XNNPACK with libm, can turn off on Windows to avoid mutiple math functions issue." ON) +OPTION(XNNPACK_BUILD_YNNPACK "Build YNNPACK library" ON) OPTION(XNNPACK_USE_SYSTEM_LIBS "Use system-provided dependency libraries" OFF) OPTION(USE_GNU_SOURCE "Use _GNU_SOURCE macro" OFF) IF(XNNPACK_BUILD_BENCHMARKS OR XNNPACK_BUILD_TESTS) @@ -1352,3 +1353,8 @@ ENDIF() IF(XNNPACK_BUILD_BENCHMARKS) ADD_SUBDIRECTORY(bench) ENDIF() + +# ---[ YNNPACK +IF(XNNPACK_BUILD_YNNPACK) + ADD_SUBDIRECTORY(ynnpack) +ENDIF() diff --git a/ynnpack/CMakeLists.txt b/ynnpack/CMakeLists.txt new file mode 100644 index 00000000000..727e53c6647 --- /dev/null +++ b/ynnpack/CMakeLists.txt @@ -0,0 +1,175 @@ +# Copyright 2026 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.15 FATAL_ERROR) + +PROJECT(ynnpack C CXX) + +INCLUDE_DIRECTORIES("${PROJECT_SOURCE_DIR}/..") + +SET(CMAKE_CXX_STANDARD 17) +SET(CMAKE_CXX_STANDARD_REQUIRED YES) +SET(CMAKE_CXX_EXTENSIONS NO) + +IF(NOT MSVC) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") +ENDIF() + +# ---[ Options. +OPTION(YNNPACK_BUILD_TESTS "Build YNNPACK unit tests" ${XNNPACK_BUILD_TESTS}) +OPTION(YNNPACK_BUILD_BENCHMARKS "Build YNNPACK benchmarks" ${XNNPACK_BUILD_BENCHMARKS}) + +IF(YNNPACK_BUILD_TESTS) + ENABLE_TESTING() +ENDIF() + +# ---[ Architecture detection. +IF(NOT YNNPACK_TARGET_PROCESSOR) + IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^i[3-7]86$") + SET(YNNPACK_TARGET_PROCESSOR "x86") + ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64") + SET(YNNPACK_TARGET_PROCESSOR "x86_64") + ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]") + SET(YNNPACK_TARGET_PROCESSOR "arm") + ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") + SET(YNNPACK_TARGET_PROCESSOR "arm64") + ELSE() + SET(YNNPACK_TARGET_PROCESSOR "generic") + ENDIF() +ENDIF() + +MESSAGE(STATUS "YNNPACK target processor: ${YNNPACK_TARGET_PROCESSOR}") + +# ---[ ISA Options. +SET(YNN_X86_DEFAULT OFF) +IF(YNNPACK_TARGET_PROCESSOR MATCHES "^x86") + SET(YNN_X86_DEFAULT ON) +ENDIF() + +SET(YNN_ARM_DEFAULT OFF) +IF(YNNPACK_TARGET_PROCESSOR MATCHES "^arm") + SET(YNN_ARM_DEFAULT ON) +ENDIF() + +SET(YNN_ARM64_DEFAULT OFF) +IF(YNNPACK_TARGET_PROCESSOR STREQUAL "arm64") + SET(YNN_ARM64_DEFAULT ON) +ENDIF() + +# x86 +OPTION(YNN_ENABLE_X86_SSE "Enable x86 SSE" ${YNN_X86_DEFAULT}) +OPTION(YNN_ENABLE_X86_SSE2 "Enable x86 SSE2" ${YNN_ENABLE_X86_SSE}) +OPTION(YNN_ENABLE_X86_SSSE3 "Enable x86 SSSE3" ${YNN_ENABLE_X86_SSE}) +OPTION(YNN_ENABLE_X86_SSE41 "Enable x86 SSE4.1" ${YNN_ENABLE_X86_SSE}) +OPTION(YNN_ENABLE_X86_AVX "Enable x86 AVX" ${YNN_ENABLE_X86_SSE}) +OPTION(YNN_ENABLE_X86_F16C "Enable x86 F16C" ${YNN_ENABLE_X86_AVX}) +OPTION(YNN_ENABLE_X86_AVX2 "Enable x86 AVX2" ${YNN_ENABLE_X86_AVX}) +OPTION(YNN_ENABLE_X86_FMA3 "Enable x86 FMA3" ${YNN_ENABLE_X86_AVX}) +OPTION(YNN_ENABLE_X86_AVX512 "Enable x86 AVX512" ${YNN_ENABLE_X86_AVX}) + +IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND YNN_ENABLE_X86_AVX512) + SET(YNN_X86_AVX512BF16_DEFAULT ON) +ELSE() + SET(YNN_X86_AVX512BF16_DEFAULT OFF) +ENDIF() +OPTION(YNN_ENABLE_X86_AVX512BF16 "Enable x86 AVX512BF16" ${YNN_X86_AVX512BF16_DEFAULT}) +OPTION(YNN_ENABLE_X86_AVX512FP16 "Enable x86 AVX512FP16" ${YNN_ENABLE_X86_AVX512}) +OPTION(YNN_ENABLE_X86_AVX512VNNI "Enable x86 AVX512VNNI" ${YNN_ENABLE_X86_AVX512}) + +SET(YNN_X86_AMX_DEFAULT OFF) +IF(YNNPACK_TARGET_PROCESSOR STREQUAL "x86_64") + SET(YNN_X86_AMX_DEFAULT ON) +ENDIF() +OPTION(YNN_ENABLE_X86_AMX "Enable x86 AMX" ${YNN_X86_AMX_DEFAULT}) +OPTION(YNN_ENABLE_X86_AMXBF16 "Enable x86 AMXBF16" ${YNN_ENABLE_X86_AMX}) +OPTION(YNN_ENABLE_X86_AMXFP16 "Enable x86 AMXFP16" ${YNN_ENABLE_X86_AMX}) +OPTION(YNN_ENABLE_X86_AMXINT8 "Enable x86 AMXINT8" ${YNN_ENABLE_X86_AMX}) + +# Derived x86 +IF(YNN_ENABLE_X86_F16C AND YNN_ENABLE_X86_FMA3) + SET(YNN_X86_F16C_FMA3_DEFAULT ON) +ELSE() + SET(YNN_X86_F16C_FMA3_DEFAULT OFF) +ENDIF() +OPTION(YNN_ENABLE_X86_F16C_FMA3 "Enable x86 F16C+FMA3" ${YNN_X86_F16C_FMA3_DEFAULT}) + +IF(YNN_ENABLE_X86_AVX2 AND YNN_ENABLE_X86_FMA3) + SET(YNN_X86_AVX2_FMA3_DEFAULT ON) +ELSE() + SET(YNN_X86_AVX2_FMA3_DEFAULT OFF) +ENDIF() +OPTION(YNN_ENABLE_X86_AVX2_FMA3 "Enable x86 AVX2+FMA3" ${YNN_X86_AVX2_FMA3_DEFAULT}) + +# ARM +OPTION(YNN_ENABLE_ARM_NEON "Enable ARM NEON" ${YNN_ARM_DEFAULT}) +OPTION(YNN_ENABLE_ARM_NEONDOT "Enable ARM NEONDOT" ${YNN_ENABLE_ARM_NEON}) +OPTION(YNN_ENABLE_ARM_NEONFMA "Enable ARM NEONFMA" ${YNN_ENABLE_ARM_NEON}) +OPTION(YNN_ENABLE_ARM_NEONFP16 "Enable ARM NEONFP16" ${YNN_ENABLE_ARM_NEON}) +OPTION(YNN_ENABLE_ARM_NEONFP16ARITH "Enable ARM NEONFP16ARITH" ${YNN_ENABLE_ARM_NEON}) +OPTION(YNN_ENABLE_ARM_NEONBF16 "Enable ARM NEONBF16" ${YNN_ENABLE_ARM_NEON}) + +OPTION(YNN_ENABLE_ARM64 "Enable ARM64" ${YNN_ARM64_DEFAULT}) +OPTION(YNN_ENABLE_ARM64_NEONI8MM "Enable ARM64 NEONI8MM" ${YNN_ENABLE_ARM64}) +OPTION(YNN_ENABLE_ARM64_NEON "Enable ARM64 NEON" ${YNN_ENABLE_ARM64}) +OPTION(YNN_ENABLE_ARM64_SME "Enable ARM64 SME" ${YNN_ENABLE_ARM64}) +OPTION(YNN_ENABLE_ARM64_SME2 "Enable ARM64 SME2" ${YNN_ENABLE_ARM64_SME}) + +IF(YNN_ENABLE_ARM64 AND NOT APPLE) + SET(YNN_ARM64_SVE_DEFAULT ON) +ELSE() + SET(YNN_ARM64_SVE_DEFAULT OFF) +ENDIF() +OPTION(YNN_ENABLE_ARM64_SVE "Enable ARM64 SVE" ${YNN_ARM64_SVE_DEFAULT}) + +# Others +SET(YNN_HEXAGON_DEFAULT OFF) +IF(YNNPACK_TARGET_PROCESSOR STREQUAL "hexagon") + SET(YNN_HEXAGON_DEFAULT ON) +ENDIF() +OPTION(YNN_ENABLE_HVX "Enable Hexagon HVX" ${YNN_HEXAGON_DEFAULT}) + +SET(YNN_WASM_DEFAULT OFF) +IF(YNNPACK_TARGET_PROCESSOR STREQUAL "wasm") + SET(YNN_WASM_DEFAULT ON) +ENDIF() +OPTION(YNN_ENABLE_WASM_SIMD128 "Enable Wasm SIMD128" ${YNN_WASM_DEFAULT}) + +OPTION(YNN_ENABLE_CPUINFO "Enable cpuinfo" ON) + +# ---[ Dependencies. +IF(NOT TARGET slinky_base) + IF(XNNPACK_USE_SYSTEM_LIBS) + FIND_PACKAGE(slinky REQUIRED) + ELSE() + SET(SLINKY_ENABLE_TESTS OFF CACHE BOOL "" FORCE) + IF(NOT DEFINED SLINKY_SOURCE_DIR) + MESSAGE(STATUS "Downloading slinky to ${CMAKE_BINARY_DIR}/slinky-source (define SLINKY_SOURCE_DIR to avoid it)") + CONFIGURE_FILE(cmake/DownloadSlinky.cmake "${CMAKE_BINARY_DIR}/slinky-download/CMakeLists.txt") + EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" . + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/slinky-download") + EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build . + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/slinky-download") + SET(SLINKY_SOURCE_DIR "${CMAKE_BINARY_DIR}/slinky-source" CACHE STRING "slinky source directory") + ENDIF() + # Temporarily disable testing while adding slinky to avoid including its tests. + INCLUDE_DIRECTORIES("${SLINKY_SOURCE_DIR}") + ADD_SUBDIRECTORY("${SLINKY_SOURCE_DIR}" "${CMAKE_BINARY_DIR}/slinky") + ENDIF() +ENDIF() + +# ---[ Includes. +INCLUDE_DIRECTORIES(include) +INCLUDE_DIRECTORIES(.) + +# ---[ Subdirectories. +ADD_SUBDIRECTORY(base) +ADD_SUBDIRECTORY(kernels) +ADD_SUBDIRECTORY(subgraph) +ADD_SUBDIRECTORY(xnnpack) + +# ---[ Main library. +ADD_LIBRARY(ynnpack INTERFACE) +TARGET_INCLUDE_DIRECTORIES(ynnpack INTERFACE include) +TARGET_LINK_LIBRARIES(ynnpack INTERFACE ynnpack_subgraph) diff --git a/ynnpack/base/CMakeLists.txt b/ynnpack/base/CMakeLists.txt new file mode 100644 index 00000000000..3d2fbaac34d --- /dev/null +++ b/ynnpack/base/CMakeLists.txt @@ -0,0 +1,91 @@ +# Copyright 2026 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# ---[ Options. +SET(YNN_LOG_LEVEL "warning" CACHE STRING "Log level (error, warning, info, debug, verbose)") +SET_PROPERTY(CACHE YNN_LOG_LEVEL PROPERTY STRINGS error warning info debug verbose) + +# ---[ Log library. +ADD_LIBRARY(ynnpack_log INTERFACE) +TARGET_COMPILE_DEFINITIONS(ynnpack_log INTERFACE YNN_LOG_LEVEL=YNN_LOG_LEVEL_${YNN_LOG_LEVEL}) +TARGET_LINK_LIBRARIES(ynnpack_log INTERFACE slinky_base) + +# ---[ Base library. +ADD_LIBRARY(ynnpack_base STATIC + arch.cc + to_string.cc + type.cc +) + +TARGET_INCLUDE_DIRECTORIES(ynnpack_base PUBLIC .) +TARGET_LINK_LIBRARIES(ynnpack_base PUBLIC ynnpack_log slinky_base) + +IF(YNN_ENABLE_CPUINFO) + # Check if cpuinfo target exists (it should if XNNPACK is set up correctly) + IF(TARGET cpuinfo) + TARGET_LINK_LIBRARIES(ynnpack_base PUBLIC cpuinfo) + TARGET_COMPILE_DEFINITIONS(ynnpack_base PUBLIC YNN_ENABLE_CPUINFO) + ENDIF() +ENDIF() + +# Architecture-specific defines. +IF(YNNPACK_TARGET_PROCESSOR STREQUAL "x86") + TARGET_COMPILE_DEFINITIONS(ynnpack_base PUBLIC YNN_ARCH_X86 YNN_ARCH_X86_32) +ELSEIF(YNNPACK_TARGET_PROCESSOR STREQUAL "x86_64") + TARGET_COMPILE_DEFINITIONS(ynnpack_base PUBLIC YNN_ARCH_X86 YNN_ARCH_X86_64) +ELSEIF(YNNPACK_TARGET_PROCESSOR STREQUAL "arm") + TARGET_COMPILE_DEFINITIONS(ynnpack_base PUBLIC YNN_ARCH_ARM YNN_ARCH_ARM32) +ELSEIF(YNNPACK_TARGET_PROCESSOR STREQUAL "arm64") + TARGET_COMPILE_DEFINITIONS(ynnpack_base PUBLIC YNN_ARCH_ARM YNN_ARCH_ARM64) +ELSEIF(YNNPACK_TARGET_PROCESSOR STREQUAL "hexagon") + TARGET_COMPILE_DEFINITIONS(ynnpack_base PUBLIC YNN_ARCH_HEXAGON) +ELSEIF(YNNPACK_TARGET_PROCESSOR STREQUAL "wasm") + TARGET_COMPILE_DEFINITIONS(ynnpack_base PUBLIC YNN_ARCH_WASM) +ENDIF() + +MACRO(YNN_MAP_OPTION OPTION DEFINE) + IF(${OPTION}) + TARGET_COMPILE_DEFINITIONS(ynnpack_base PUBLIC ${DEFINE}) + ENDIF() +ENDMACRO() + +# x86 mapping +YNN_MAP_OPTION(YNN_ENABLE_X86_SSE2 YNN_ARCH_X86_SSE2) +YNN_MAP_OPTION(YNN_ENABLE_X86_SSSE3 YNN_ARCH_X86_SSSE3) +YNN_MAP_OPTION(YNN_ENABLE_X86_SSE41 YNN_ARCH_X86_SSE41) +YNN_MAP_OPTION(YNN_ENABLE_X86_AVX YNN_ARCH_X86_AVX) +YNN_MAP_OPTION(YNN_ENABLE_X86_F16C YNN_ARCH_X86_F16C) +YNN_MAP_OPTION(YNN_ENABLE_X86_F16C_FMA3 YNN_ARCH_X86_F16C_FMA3) +YNN_MAP_OPTION(YNN_ENABLE_X86_AVX2 YNN_ARCH_X86_AVX2) +YNN_MAP_OPTION(YNN_ENABLE_X86_FMA3 YNN_ARCH_X86_FMA3) +YNN_MAP_OPTION(YNN_ENABLE_X86_AVX2_FMA3 YNN_ARCH_X86_AVX2_FMA3) +YNN_MAP_OPTION(YNN_ENABLE_X86_AVX512 YNN_ARCH_X86_AVX512) +YNN_MAP_OPTION(YNN_ENABLE_X86_AVX512BF16 YNN_ARCH_X86_AVX512BF16) +YNN_MAP_OPTION(YNN_ENABLE_X86_AVX512FP16 YNN_ARCH_X86_AVX512FP16) +YNN_MAP_OPTION(YNN_ENABLE_X86_AVX512VNNI YNN_ARCH_X86_AVX512VNNI) +YNN_MAP_OPTION(YNN_ENABLE_X86_AMXBF16 YNN_ARCH_X86_AMXBF16) +YNN_MAP_OPTION(YNN_ENABLE_X86_AMXFP16 YNN_ARCH_X86_AMXFP16) +YNN_MAP_OPTION(YNN_ENABLE_X86_AMXINT8 YNN_ARCH_X86_AMXINT8) + +# ARM mapping +YNN_MAP_OPTION(YNN_ENABLE_ARM_NEON YNN_ARCH_ARM_NEON) +YNN_MAP_OPTION(YNN_ENABLE_ARM_NEONDOT YNN_ARCH_ARM_NEONDOT) +YNN_MAP_OPTION(YNN_ENABLE_ARM_NEONFMA YNN_ARCH_ARM_NEONFMA) +YNN_MAP_OPTION(YNN_ENABLE_ARM_NEONFP16 YNN_ARCH_ARM_NEONFP16) +YNN_MAP_OPTION(YNN_ENABLE_ARM_NEONFP16ARITH YNN_ARCH_ARM_NEONFP16ARITH) +YNN_MAP_OPTION(YNN_ENABLE_ARM_NEONBF16 YNN_ARCH_ARM_NEONBF16) +YNN_MAP_OPTION(YNN_ENABLE_ARM64_NEONI8MM YNN_ARCH_ARM64_NEONI8MM) +YNN_MAP_OPTION(YNN_ENABLE_ARM64_NEON YNN_ARCH_ARM64_NEON) +YNN_MAP_OPTION(YNN_ENABLE_ARM64_SME YNN_ARCH_ARM64_SME) +YNN_MAP_OPTION(YNN_ENABLE_ARM64_SME2 YNN_ARCH_ARM64_SME2) +YNN_MAP_OPTION(YNN_ENABLE_ARM64_SVE YNN_ARCH_ARM64_SVE) + +# Others +YNN_MAP_OPTION(YNN_ENABLE_HVX YNN_ARCH_HVX) +YNN_MAP_OPTION(YNN_ENABLE_WASM_SIMD128 YNN_ARCH_WASM_SIMD128) + +IF(YNNPACK_BUILD_TESTS) + ADD_SUBDIRECTORY(test) +ENDIF() diff --git a/ynnpack/base/arch.cc b/ynnpack/base/arch.cc index 131d939323b..f03a019449b 100644 --- a/ynnpack/base/arch.cc +++ b/ynnpack/base/arch.cc @@ -19,6 +19,7 @@ namespace ynn { #if defined(YNN_ARCH_X86_64) && defined(__linux__) && !defined(CHROMIUM) #include +#include #define XFEATURE_XTILEDATA 18 #define ARCH_REQ_XCOMP_PERM 0x1023 diff --git a/ynnpack/base/test/CMakeLists.txt b/ynnpack/base/test/CMakeLists.txt new file mode 100644 index 00000000000..9dcb837f4bd --- /dev/null +++ b/ynnpack/base/test/CMakeLists.txt @@ -0,0 +1,35 @@ +# Copyright 2026 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +ADD_LIBRARY(ynnpack_base_test_buffer INTERFACE) +TARGET_INCLUDE_DIRECTORIES(ynnpack_base_test_buffer INTERFACE .) + +ADD_LIBRARY(ynnpack_base_test_tensor INTERFACE) +TARGET_INCLUDE_DIRECTORIES(ynnpack_base_test_tensor INTERFACE .) +TARGET_LINK_LIBRARIES(ynnpack_base_test_tensor INTERFACE ynnpack_base_test_buffer ynnpack_base) + +ADD_LIBRARY(ynnpack_base_test_random INTERFACE) +TARGET_INCLUDE_DIRECTORIES(ynnpack_base_test_random INTERFACE .) +TARGET_LINK_LIBRARIES(ynnpack_base_test_random INTERFACE ynnpack ynnpack_base) + +ADD_LIBRARY(ynnpack_base_test_util STATIC util.cc) +TARGET_INCLUDE_DIRECTORIES(ynnpack_base_test_util PUBLIC .) +TARGET_LINK_LIBRARIES(ynnpack_base_test_util PUBLIC ynnpack_base gtest) + +ADD_LIBRARY(ynnpack_base_test_fuzz INTERFACE) +TARGET_INCLUDE_DIRECTORIES(ynnpack_base_test_fuzz INTERFACE .) +TARGET_LINK_LIBRARIES(ynnpack_base_test_fuzz INTERFACE ynnpack_base gtest) + +ADD_EXECUTABLE(ynnpack_random_test random_test.cc) +TARGET_LINK_LIBRARIES(ynnpack_random_test ynnpack_base ynnpack_base_test_fuzz ynnpack_base_test_random gtest gtest_main) +ADD_TEST(NAME ynnpack_random_test COMMAND ynnpack_random_test) + +ADD_EXECUTABLE(ynnpack_tensor_test tensor_test.cc) +TARGET_LINK_LIBRARIES(ynnpack_tensor_test ynnpack_base_test_fuzz ynnpack_base_test_tensor gmock gtest gtest_main) +ADD_TEST(NAME ynnpack_tensor_test COMMAND ynnpack_tensor_test) + +ADD_EXECUTABLE(ynnpack_float_test float_test.cc) +TARGET_LINK_LIBRARIES(ynnpack_float_test ynnpack ynnpack_base ynnpack_base_test_fuzz ynnpack_base_test_random gtest gtest_main) +ADD_TEST(NAME ynnpack_float_test COMMAND ynnpack_float_test) diff --git a/ynnpack/cmake/DownloadSlinky.cmake b/ynnpack/cmake/DownloadSlinky.cmake new file mode 100644 index 00000000000..f75331a3fd6 --- /dev/null +++ b/ynnpack/cmake/DownloadSlinky.cmake @@ -0,0 +1,26 @@ +# Copyright 2026 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.5 FATAL_ERROR) + +PROJECT(slinky-download NONE) + +# Set file timestamps to the time of extraction. +IF(POLICY CMP0135) + CMAKE_POLICY(SET CMP0135 NEW) +ENDIF() + +INCLUDE(ExternalProject) +ExternalProject_Add(slinky + URL https://github.com/dsharlet/slinky/archive/b6b9cdfba51ad17959a8314f338e2931c55f74d6.zip + URL_HASH SHA256=71584e401c60a576cc934353d5d437b49b032dd315820fd810bb44629acff2bf + SOURCE_DIR "${CMAKE_BINARY_DIR}/slinky-source" + BINARY_DIR "${CMAKE_BINARY_DIR}/slinky" + CONFIGURE_COMMAND "" + PATCH_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) diff --git a/ynnpack/kernels/CMakeLists.txt b/ynnpack/kernels/CMakeLists.txt new file mode 100644 index 00000000000..b650a1c0e49 --- /dev/null +++ b/ynnpack/kernels/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright 2026 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +ADD_SUBDIRECTORY(binary) +ADD_SUBDIRECTORY(dot) +ADD_SUBDIRECTORY(lut) +ADD_SUBDIRECTORY(reduce) +ADD_SUBDIRECTORY(ternary) +ADD_SUBDIRECTORY(transpose) +ADD_SUBDIRECTORY(unary) diff --git a/ynnpack/kernels/arch_copts.cmake b/ynnpack/kernels/arch_copts.cmake new file mode 100644 index 00000000000..8c029317188 --- /dev/null +++ b/ynnpack/kernels/arch_copts.cmake @@ -0,0 +1,180 @@ +# Copyright 2026 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Mapping from architecture name to the enablement variable. +FUNCTION(YNN_CHECK_ARCH_ENABLED ARCH VAR) + IF(ARCH STREQUAL "x86_sse2") + SET(${VAR} ${YNN_ENABLE_X86_SSE2} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_ssse3") + SET(${VAR} ${YNN_ENABLE_X86_SSSE3} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_sse41") + SET(${VAR} ${YNN_ENABLE_X86_SSE41} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_avx") + SET(${VAR} ${YNN_ENABLE_X86_AVX} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_f16c") + SET(${VAR} ${YNN_ENABLE_X86_F16C} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_f16c_fma3") + SET(${VAR} ${YNN_ENABLE_X86_F16C_FMA3} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_avx2") + SET(${VAR} ${YNN_ENABLE_X86_AVX2} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_fma3") + SET(${VAR} ${YNN_ENABLE_X86_FMA3} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_avx2_fma3") + SET(${VAR} ${YNN_ENABLE_X86_AVX2_FMA3} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_avx512") + SET(${VAR} ${YNN_ENABLE_X86_AVX512} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_avx512bf16") + SET(${VAR} ${YNN_ENABLE_X86_AVX512BF16} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_avx512fp16") + SET(${VAR} ${YNN_ENABLE_X86_AVX512FP16} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_avx512vnni") + SET(${VAR} ${YNN_ENABLE_X86_AVX512VNNI} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_amxbf16") + SET(${VAR} ${YNN_ENABLE_X86_AMXBF16} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_amxfp16") + SET(${VAR} ${YNN_ENABLE_X86_AMXFP16} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_amxint8") + SET(${VAR} ${YNN_ENABLE_X86_AMXINT8} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "arm_neon") + SET(${VAR} ${YNN_ENABLE_ARM_NEON} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "arm_neondot") + SET(${VAR} ${YNN_ENABLE_ARM_NEONDOT} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "arm_neonfma") + SET(${VAR} ${YNN_ENABLE_ARM_NEONFMA} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "arm_neonfp16") + SET(${VAR} ${YNN_ENABLE_ARM_NEONFP16} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "arm_neonfp16arith") + SET(${VAR} ${YNN_ENABLE_ARM_NEONFP16ARITH} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "arm_neonbf16") + SET(${VAR} ${YNN_ENABLE_ARM_NEONBF16} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "arm64_neon") + SET(${VAR} ${YNN_ENABLE_ARM64_NEON} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "arm64_neoni8mm") + SET(${VAR} ${YNN_ENABLE_ARM64_NEONI8MM} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "arm64_sme") + SET(${VAR} ${YNN_ENABLE_ARM64_SME} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "arm64_sme2") + SET(${VAR} ${YNN_ENABLE_ARM64_SME2} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "arm64_sve") + SET(${VAR} ${YNN_ENABLE_ARM64_SVE} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "hexagon_hvx") + SET(${VAR} ${YNN_ENABLE_HVX} PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "wasm_simd128") + SET(${VAR} ${YNN_ENABLE_WASM_SIMD128} PARENT_SCOPE) + ELSE() + SET(${VAR} OFF PARENT_SCOPE) + ENDIF() +ENDFUNCTION() + +# Function to get compiler flags for a specific architecture. +FUNCTION(YNN_GET_ARCH_COPTS ARCH VAR) + IF(MSVC) + IF(ARCH MATCHES "^x86_avx512") + SET(${VAR} "/arch:AVX512" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_avx2" OR ARCH STREQUAL "x86_avx2_fma3") + SET(${VAR} "/arch:AVX2" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_avx" OR ARCH STREQUAL "x86_fma3" OR ARCH STREQUAL "x86_f16c") + SET(${VAR} "/arch:AVX" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_sse2" OR ARCH STREQUAL "x86_ssse3" OR ARCH STREQUAL "x86_sse41") + SET(${VAR} "/arch:SSE2" PARENT_SCOPE) + ELSE() + SET(${VAR} "" PARENT_SCOPE) + ENDIF() + ELSE() + IF(ARCH STREQUAL "x86_sse2") + SET(${VAR} "-msse2" "-mno-ssse3" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_ssse3") + SET(${VAR} "-mssse3" "-mno-sse4.1" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_sse41") + SET(${VAR} "-msse4.1" "-mno-sse4.2" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_avx") + SET(${VAR} "-mavx" "-mno-avx2" "-mno-f16c" "-mno-fma" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_f16c") + SET(${VAR} "-mf16c" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_f16c_fma3") + SET(${VAR} "-mf16c" "-mavx" "-mfma" "-mno-avx2" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_avx2") + SET(${VAR} "-mavx2" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_fma3") + SET(${VAR} "-mavx" "-mfma" "-mno-avx2" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_avx2_fma3") + SET(${VAR} "-mavx2" "-mfma" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_avx512") + SET(${VAR} "-mavx512f" "-mavx512bw" "-mavx512vl" "-mavx512dq" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_avx512bf16") + SET(${VAR} "-mavx512f" "-mavx512bw" "-mavx512vl" "-mavx512dq" "-mavx512bf16" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_avx512fp16") + SET(${VAR} "-mavx512f" "-mavx512bw" "-mavx512vl" "-mavx512dq" "-mavx512fp16" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_avx512vnni") + SET(${VAR} "-mavx512f" "-mavx512bw" "-mavx512vl" "-mavx512dq" "-mavx512vnni" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_amxbf16") + SET(${VAR} "-mamx-tile" "-mamx-bf16" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_amxfp16") + SET(${VAR} "-mamx-tile" "-mamx-fp16" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "x86_amxint8") + SET(${VAR} "-mamx-tile" "-mamx-int8" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "arm_neon") + IF(YNNPACK_TARGET_PROCESSOR STREQUAL "arm") + SET(${VAR} "-marm" "-march=armv7-a" "-mfpu=neon" PARENT_SCOPE) + ELSE() + SET(${VAR} "" PARENT_SCOPE) + ENDIF() + ELSEIF(ARCH STREQUAL "arm_neondot") + IF(YNNPACK_TARGET_PROCESSOR STREQUAL "arm") + SET(${VAR} "-marm" "-march=armv8.2-a+dotprod" "-mfpu=neon-fp-armv8" PARENT_SCOPE) + ELSE() + SET(${VAR} "-march=armv8.2-a+dotprod" PARENT_SCOPE) + ENDIF() + ELSEIF(ARCH STREQUAL "arm_neonfma") + IF(YNNPACK_TARGET_PROCESSOR STREQUAL "arm") + SET(${VAR} "-marm" "-march=armv7-a" "-mfpu=neon-vfpv4" PARENT_SCOPE) + ELSE() + SET(${VAR} "" PARENT_SCOPE) + ENDIF() + ELSEIF(ARCH STREQUAL "arm_neonfp16") + IF(YNNPACK_TARGET_PROCESSOR STREQUAL "arm") + SET(${VAR} "-marm" "-march=armv7-a" "-mfpu=neon-fp16" PARENT_SCOPE) + ELSE() + SET(${VAR} "" PARENT_SCOPE) + ENDIF() + ELSEIF(ARCH STREQUAL "arm_neonfp16arith") + IF(YNNPACK_TARGET_PROCESSOR STREQUAL "arm") + SET(${VAR} "-marm" "-march=armv8.2-a+fp16" "-mfpu=neon-fp-armv8" PARENT_SCOPE) + ELSE() + SET(${VAR} "-march=armv8.2-a+fp16" PARENT_SCOPE) + ENDIF() + ELSEIF(ARCH STREQUAL "arm_neonbf16") + IF(YNNPACK_TARGET_PROCESSOR STREQUAL "arm") + SET(${VAR} "-marm" "-march=armv8.2-a+bf16" "-mfpu=neon-fp-armv8" PARENT_SCOPE) + ELSE() + SET(${VAR} "-march=armv8.2-a+bf16" PARENT_SCOPE) + ENDIF() + ELSEIF(ARCH STREQUAL "arm64_neon") + SET(${VAR} "" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "arm64_neoni8mm") + SET(${VAR} "-march=armv8.2-a+i8mm" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "arm64_sme") + IF(APPLE) + SET(${VAR} "-march=armv8.2-a+sme" PARENT_SCOPE) + ELSE() + SET(${VAR} "-march=armv8.2-a+sve+sme" PARENT_SCOPE) + ENDIF() + ELSEIF(ARCH STREQUAL "arm64_sme2") + IF(APPLE) + SET(${VAR} "-march=armv8.2-a+sme2" PARENT_SCOPE) + ELSE() + SET(${VAR} "-march=armv8.2-a+sve+sme2" PARENT_SCOPE) + ENDIF() + ELSEIF(ARCH STREQUAL "arm64_sve") + SET(${VAR} "-march=armv8.2-a+sve" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "hexagon_hvx") + SET(${VAR} "-mhvx" PARENT_SCOPE) + ELSEIF(ARCH STREQUAL "wasm_simd128") + SET(${VAR} "-msimd128" PARENT_SCOPE) + ELSE() + SET(${VAR} "" PARENT_SCOPE) + ENDIF() + ENDIF() +ENDFUNCTION() diff --git a/ynnpack/kernels/binary/CMakeLists.txt b/ynnpack/kernels/binary/CMakeLists.txt new file mode 100644 index 00000000000..aec5e3033e1 --- /dev/null +++ b/ynnpack/kernels/binary/CMakeLists.txt @@ -0,0 +1,64 @@ +# Copyright 2026 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +FIND_PACKAGE(Python3 REQUIRED) + +INCLUDE(../arch_copts.cmake) + +SET(BINARY_KERNEL_SRCS) + +FUNCTION(YNN_GENERATE_BINARY_KERNEL NAME ARCH) + YNN_CHECK_ARCH_ENABLED(${ARCH} ENABLED) + IF(ENABLED) + SET(OUTPUT_SRC "${CMAKE_BINARY_DIR}/ynnpack/kernels/binary/${NAME}.cc") + SET(OUTPUT_INC "${CMAKE_BINARY_DIR}/ynnpack/kernels/binary/${NAME}.inc") + + ADD_CUSTOM_COMMAND( + OUTPUT "${OUTPUT_SRC}" "${OUTPUT_INC}" + COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/ynnpack/kernels/binary" + COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_SOURCE_DIR}" + ${Python3_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/generator.py" + "${OUTPUT_SRC}" "${OUTPUT_INC}" "${ARCH}" ${ARGN} + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/generator.py" + "${CMAKE_CURRENT_SOURCE_DIR}/kernels.py" + "${CMAKE_SOURCE_DIR}/ynnpack/kernels/elementwise/generator.py" + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + ) + + YNN_GET_ARCH_COPTS(${ARCH} ARCH_COPTS) + SET_SOURCE_FILES_PROPERTIES("${OUTPUT_SRC}" PROPERTIES COMPILE_OPTIONS "${ARCH_COPTS}") + + SET(${NAME}_SRC "${OUTPUT_SRC}" PARENT_SCOPE) + LIST(APPEND BINARY_KERNEL_SRCS "${OUTPUT_SRC}") + SET(BINARY_KERNEL_SRCS ${BINARY_KERNEL_SRCS} PARENT_SCOPE) + ENDIF() +ENDFUNCTION() + +# x86 +YNN_GENERATE_BINARY_KERNEL(x86_sse2 x86_sse2) +YNN_GENERATE_BINARY_KERNEL(x86_avx x86_avx) +YNN_GENERATE_BINARY_KERNEL(x86_avx2 x86_avx2) +YNN_GENERATE_BINARY_KERNEL(x86_avx512 x86_avx512) +YNN_GENERATE_BINARY_KERNEL(x86_avx512bf16 x86_avx512bf16) + +# ARM +YNN_GENERATE_BINARY_KERNEL(arm_neon arm_neon) + +ADD_LIBRARY(ynnpack_kernels_binary STATIC + binary.cc + ${BINARY_KERNEL_SRCS} +) + +TARGET_INCLUDE_DIRECTORIES(ynnpack_kernels_binary PUBLIC . ${CMAKE_BINARY_DIR}) +TARGET_LINK_LIBRARIES(ynnpack_kernels_binary PUBLIC ynnpack_base) + +IF(YNNPACK_BUILD_TESTS) + ADD_LIBRARY(ynnpack_kernels_binary_reference STATIC reference.cc) + TARGET_LINK_LIBRARIES(ynnpack_kernels_binary_reference PUBLIC ynnpack ynnpack_base ynnpack_base_test_tensor gtest) + + ADD_EXECUTABLE(ynnpack_kernels_binary_test test.cc) + TARGET_LINK_LIBRARIES(ynnpack_kernels_binary_test ynnpack_kernels_binary ynnpack_kernels_binary_reference ynnpack ynnpack_base ynnpack_base_test_fuzz ynnpack_base_test_random ynnpack_base_test_tensor ynnpack_base_test_util gtest gtest_main) + ADD_TEST(NAME ynnpack_kernels_binary_test COMMAND ynnpack_kernels_binary_test) +ENDIF() diff --git a/ynnpack/kernels/dot/CMakeLists.txt b/ynnpack/kernels/dot/CMakeLists.txt new file mode 100644 index 00000000000..059460680d5 --- /dev/null +++ b/ynnpack/kernels/dot/CMakeLists.txt @@ -0,0 +1,217 @@ +# Copyright 2026 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +FIND_PACKAGE(Python3 REQUIRED) + +INCLUDE(../arch_copts.cmake) + +# Generates dot kernels from a script. +FUNCTION(YNN_GENERATE_DOT_KERNELS SCRIPT) + SET(OUTPUTS) + FOREACH(NAME ${ARGN}) + LIST(APPEND OUTPUTS "${CMAKE_BINARY_DIR}/ynnpack/kernels/dot/${NAME}.cc" "${CMAKE_BINARY_DIR}/ynnpack/kernels/dot/${NAME}.inc") + ENDFOREACH() + + ADD_CUSTOM_COMMAND( + OUTPUT ${OUTPUTS} + COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/ynnpack/kernels/dot" + COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_SOURCE_DIR}" + ${Python3_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/generator/${SCRIPT}.py" ${OUTPUTS} + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/generator/${SCRIPT}.py" + "${CMAKE_CURRENT_SOURCE_DIR}/generator/dot_base.py" + "${CMAKE_CURRENT_SOURCE_DIR}/generator/x86.py" + "${CMAKE_CURRENT_SOURCE_DIR}/generator/arm.py" + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + ) +ENDFUNCTION() + +SET(DOT_KERNEL_SRCS) + +# Helper to collect variants and generate them if any are enabled. +FUNCTION(YNN_ADD_DOT_KERNELS SCRIPT) + SET(ACTIVE_VARIANTS) + SET(ACTIVE_ARCHS) + # Arguments after SCRIPT are pairs of (ARCH, VARIANT_NAME) + SET(INDEX 0) + LIST(LENGTH ARGN ARGC) + WHILE(INDEX LESS ARGC) + LIST(GET ARGN ${INDEX} ARCH_NAME) + MATH(EXPR INDEX "${INDEX} + 1") + LIST(GET ARGN ${INDEX} VARIANT_NAME) + MATH(EXPR INDEX "${INDEX} + 1") + + YNN_CHECK_ARCH_ENABLED(${ARCH_NAME} ENABLED) + IF(ENABLED) + LIST(APPEND ACTIVE_VARIANTS ${VARIANT_NAME}) + LIST(APPEND ACTIVE_ARCHS ${ARCH_NAME}) + ENDIF() + ENDWHILE() + + IF(ACTIVE_VARIANTS) + YNN_GENERATE_DOT_KERNELS(${SCRIPT} ${ACTIVE_VARIANTS}) + + LIST(LENGTH ACTIVE_VARIANTS LEN) + MATH(EXPR LIMIT "${LEN} - 1") + FOREACH(I RANGE ${LIMIT}) + LIST(GET ACTIVE_VARIANTS ${I} V_NAME) + LIST(GET ACTIVE_ARCHS ${I} A_NAME) + SET(OUT_CC "${CMAKE_BINARY_DIR}/ynnpack/kernels/dot/${V_NAME}.cc") + YNN_GET_ARCH_COPTS(${A_NAME} ARCH_COPTS) + SET_SOURCE_FILES_PROPERTIES("${OUT_CC}" PROPERTIES COMPILE_OPTIONS "${ARCH_COPTS}") + LIST(APPEND DOT_KERNEL_SRCS "${OUT_CC}") + ENDFOREACH() + SET(DOT_KERNEL_SRCS ${DOT_KERNEL_SRCS} PARENT_SCOPE) + ENDIF() +ENDFUNCTION() + +# Helper for hand-written sources. +FUNCTION(YNN_ADD_DOT_SRC ARCH SRC) + YNN_CHECK_ARCH_ENABLED(${ARCH} ENABLED) + IF(ENABLED) + YNN_GET_ARCH_COPTS(${ARCH} ARCH_COPTS) + SET_SOURCE_FILES_PROPERTIES(${SRC} PROPERTIES COMPILE_OPTIONS "${ARCH_COPTS}") + LIST(APPEND DOT_KERNEL_SRCS ${SRC}) + SET(DOT_KERNEL_SRCS ${DOT_KERNEL_SRCS} PARENT_SCOPE) + ENDIF() +ENDFUNCTION() + +# --- x86 kernels --- + +YNN_ADD_DOT_KERNELS(x86_fp32 + x86_sse2 x86_sse2_fp32 + x86_avx x86_avx_fp32 + x86_fma3 x86_fma3_fp32 + x86_avx512 x86_avx512_fp32 +) + +YNN_ADD_DOT_KERNELS(x86_fp32_k2 + x86_avx2 x86_avx2_fp32_k2 + x86_avx2_fma3 x86_avx2_fma3_fp32_k2 + x86_avx512 x86_avx512_fp32_k2 +) + +YNN_ADD_DOT_KERNELS(x86_fp32_k4 + x86_avx512 x86_avx512_fp32_k4 +) + +YNN_ADD_DOT_KERNELS(x86_fp64 + x86_avx x86_avx_fp64 + x86_fma3 x86_fma3_fp64 + x86_avx512 x86_avx512_fp64 +) + +YNN_ADD_DOT_KERNELS(x86_fp16_fp16_fp32 + x86_f16c x86_f16c_fp16_fp16_fp32 + x86_f16c_fma3 x86_f16c_fma3_fp16_fp16_fp32 + x86_avx512fp16 x86_avx512_fp16_fp16_fp32 +) + +YNN_ADD_DOT_KERNELS(x86_bf16_bf16_fp32 + x86_avx2_fma3 x86_avx2_fma3_bf16_bf16_fp32 + x86_avx512 x86_avx512_bf16_bf16_fp32 + x86_avx512bf16 x86_avx512bf16_bf16_bf16_fp32 +) + +YNN_ADD_DOT_KERNELS(x86_bf16_bf16_fp32_k1 + x86_avx2_fma3 x86_avx2_fma3_bf16_bf16_fp32_k1 + x86_avx512 x86_avx512_bf16_bf16_fp32_k1 +) + +YNN_ADD_DOT_KERNELS(x86_int8_int8_int32 + x86_avx2 x86_avx2_int8_int8_int32 + x86_avx512 x86_avx512_int8_int8_int32 +) + +YNN_ADD_DOT_KERNELS(x86_int8_int8_int32_k1 + x86_avx2 x86_avx2_int8_int8_int32_k1 + x86_avx512 x86_avx512_int8_int8_int32_k1 +) + +YNN_ADD_DOT_KERNELS(x86_int8_int8_int32_k16 + x86_avx512 x86_avx512_int8_int8_int32_k16 +) + +YNN_ADD_DOT_KERNELS(x86_uint8_int8_int32 + x86_avx512vnni x86_avx512vnni_uint8_int8_int32 +) + +YNN_ADD_DOT_KERNELS(x86_uint8_int8_int32_k16 + x86_avx512vnni x86_avx512vnni_uint8_int8_int32_k16 +) + +YNN_ADD_DOT_SRC(x86_amxbf16 x86_amxbf16.cc) +YNN_ADD_DOT_SRC(x86_amxfp16 x86_amxfp16.cc) +YNN_ADD_DOT_SRC(x86_amxint8 x86_amxint8.cc) + +# --- ARM kernels --- + +YNN_ADD_DOT_KERNELS(arm_fp32 + arm64_neon arm64_neon_fp32 +) + +YNN_ADD_DOT_KERNELS(arm_fp64 + arm64_neon arm64_neon_fp64 +) + +YNN_ADD_DOT_KERNELS(arm_bf16_bf16_fp32 + arm64_neon arm64_neon_bf16_bf16_fp32 +) + +YNN_ADD_DOT_KERNELS(arm_int8_int8_int32 + arm64_neoni8mm arm64_neoni8mm_int8_int8_int32 + arm_neon arm_neon_int8_int8_int32 + arm_neondot arm_neondot_int8_int8_int32 +) + +YNN_ADD_DOT_KERNELS(arm_bf16_bf16_fp32_k2 + arm_neonbf16 arm_neonbf16_bf16_bf16_fp32_k2 +) + +YNN_ADD_DOT_KERNELS(arm_bf16_bf16_fp32_k4 + arm_neonbf16 arm_neonbf16_bf16_bf16_fp32_k4 +) + +YNN_ADD_DOT_SRC(arm64_sme arm64_sme.cc) +YNN_ADD_DOT_SRC(arm64_sme2 arm64_sme2.cc) + +ADD_LIBRARY(ynnpack_kernels_dot STATIC + dot.cc + pack.cc + schedule.cc + ${DOT_KERNEL_SRCS} +) + +TARGET_INCLUDE_DIRECTORIES(ynnpack_kernels_dot PUBLIC . ${CMAKE_BINARY_DIR}) +TARGET_LINK_LIBRARIES(ynnpack_kernels_dot PUBLIC ynnpack_base ynnpack_kernels_transpose) + +IF(YNNPACK_TARGET_PROCESSOR STREQUAL "arm64") + TARGET_SOURCES(ynnpack_kernels_dot PRIVATE arm64_sme_stubs.cc) +ENDIF() + +IF(YNNPACK_BUILD_TESTS) + ADD_LIBRARY(ynnpack_kernels_dot_test_tensor INTERFACE) + TARGET_INCLUDE_DIRECTORIES(ynnpack_kernels_dot_test_tensor INTERFACE .) + TARGET_LINK_LIBRARIES(ynnpack_kernels_dot_test_tensor INTERFACE ynnpack_kernels_dot ynnpack_base ynnpack_base_test_buffer ynnpack_base_test_tensor) + + ADD_EXECUTABLE(ynnpack_kernels_dot_test test.cc) + TARGET_LINK_LIBRARIES(ynnpack_kernels_dot_test ynnpack_kernels_dot ynnpack_kernels_dot_test_tensor ynnpack ynnpack_base ynnpack_base_test_buffer ynnpack_base_test_fuzz ynnpack_base_test_random ynnpack_base_test_tensor ynnpack_base_test_util gtest gtest_main) + ADD_TEST(NAME ynnpack_kernels_dot_test COMMAND ynnpack_kernels_dot_test) + + ADD_EXECUTABLE(ynnpack_kernels_dot_consistent_arithmetic_test consistent_arithmetic_test.cc) + TARGET_LINK_LIBRARIES(ynnpack_kernels_dot_consistent_arithmetic_test ynnpack_kernels_dot ynnpack_kernels_dot_test_tensor ynnpack_base ynnpack_base_test_buffer ynnpack_base_test_fuzz ynnpack_base_test_random ynnpack_base_test_tensor ynnpack_base_test_util gtest gtest_main) + ADD_TEST(NAME ynnpack_kernels_dot_consistent_arithmetic_test COMMAND ynnpack_kernels_dot_consistent_arithmetic_test) + + ADD_EXECUTABLE(ynnpack_kernels_dot_schedule_test schedule_test.cc) + TARGET_LINK_LIBRARIES(ynnpack_kernels_dot_schedule_test ynnpack_kernels_dot gmock gtest gtest_main) + ADD_TEST(NAME ynnpack_kernels_dot_schedule_test COMMAND ynnpack_kernels_dot_schedule_test) + + ADD_EXECUTABLE(ynnpack_kernels_dot_pack_test pack_test.cc) + TARGET_LINK_LIBRARIES(ynnpack_kernels_dot_pack_test ynnpack_kernels_dot ynnpack_base gmock gtest gtest_main) + ADD_TEST(NAME ynnpack_kernels_dot_pack_test COMMAND ynnpack_kernels_dot_pack_test) + + ADD_EXECUTABLE(ynnpack_kernels_dot_get_dot_kernel_test get_dot_kernel_test.cc) + TARGET_LINK_LIBRARIES(ynnpack_kernels_dot_get_dot_kernel_test ynnpack_kernels_dot ynnpack ynnpack_base gtest gtest_main) + ADD_TEST(NAME ynnpack_kernels_dot_get_dot_kernel_test COMMAND ynnpack_kernels_dot_get_dot_kernel_test) +ENDIF() diff --git a/ynnpack/kernels/lut/CMakeLists.txt b/ynnpack/kernels/lut/CMakeLists.txt new file mode 100644 index 00000000000..4d282f847cd --- /dev/null +++ b/ynnpack/kernels/lut/CMakeLists.txt @@ -0,0 +1,7 @@ +# Copyright 2026 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +ADD_LIBRARY(ynnpack_kernels_lut STATIC lut.cc) +TARGET_LINK_LIBRARIES(ynnpack_kernels_lut PUBLIC ynnpack_base) diff --git a/ynnpack/kernels/reduce/CMakeLists.txt b/ynnpack/kernels/reduce/CMakeLists.txt new file mode 100644 index 00000000000..93067833c6e --- /dev/null +++ b/ynnpack/kernels/reduce/CMakeLists.txt @@ -0,0 +1,44 @@ +# Copyright 2026 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +ADD_LIBRARY(ynnpack_kernels_reduce STATIC reduce.cc) +TARGET_LINK_LIBRARIES(ynnpack_kernels_reduce PUBLIC ynnpack_base) + +INCLUDE(../arch_copts.cmake) + +SET(REDUCE_KERNEL_SRCS) + +FUNCTION(YNN_ADD_REDUCE_SRC ARCH SRC) + YNN_CHECK_ARCH_ENABLED(${ARCH} ENABLED) + IF(ENABLED) + YNN_GET_ARCH_COPTS(${ARCH} ARCH_COPTS) + SET_SOURCE_FILES_PROPERTIES(${SRC} PROPERTIES COMPILE_OPTIONS "${ARCH_COPTS}") + LIST(APPEND REDUCE_KERNEL_SRCS ${SRC}) + SET(REDUCE_KERNEL_SRCS ${REDUCE_KERNEL_SRCS} PARENT_SCOPE) + ENDIF() +ENDFUNCTION() + +# ARM +YNN_ADD_REDUCE_SRC(arm_neon arm_neon.cc) +YNN_ADD_REDUCE_SRC(arm_neonbf16 arm_neonbf16.cc) +YNN_ADD_REDUCE_SRC(arm_neondot arm_neondot.cc) +YNN_ADD_REDUCE_SRC(arm_neonfma arm_neonfma.cc) +YNN_ADD_REDUCE_SRC(arm_neonfp16 arm_neonfp16.cc) + +# x86 +YNN_ADD_REDUCE_SRC(x86_sse2 x86_sse2.cc) +YNN_ADD_REDUCE_SRC(x86_sse41 x86_sse41.cc) +YNN_ADD_REDUCE_SRC(x86_ssse3 x86_ssse3.cc) +YNN_ADD_REDUCE_SRC(x86_avx x86_avx.cc) +YNN_ADD_REDUCE_SRC(x86_avx2 x86_avx2.cc) +YNN_ADD_REDUCE_SRC(x86_avx2_fma3 x86_avx2_fma3.cc) +YNN_ADD_REDUCE_SRC(x86_avx512 x86_avx512.cc) +YNN_ADD_REDUCE_SRC(x86_avx512bf16 x86_avx512bf16.cc) +YNN_ADD_REDUCE_SRC(x86_f16c x86_f16c.cc) + +# Others +YNN_ADD_REDUCE_SRC(hexagon_hvx hexagon_hvx.cc) + +TARGET_SOURCES(ynnpack_kernels_reduce PRIVATE ${REDUCE_KERNEL_SRCS}) diff --git a/ynnpack/kernels/ternary/CMakeLists.txt b/ynnpack/kernels/ternary/CMakeLists.txt new file mode 100644 index 00000000000..a1579b42d44 --- /dev/null +++ b/ynnpack/kernels/ternary/CMakeLists.txt @@ -0,0 +1,65 @@ +# Copyright 2026 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +FIND_PACKAGE(Python3 REQUIRED) + +INCLUDE(../arch_copts.cmake) + +SET(TERNARY_KERNEL_SRCS) + +FUNCTION(YNN_GENERATE_TERNARY_KERNEL NAME ARCH) + YNN_CHECK_ARCH_ENABLED(${ARCH} ENABLED) + IF(ENABLED) + SET(OUTPUT_SRC "${CMAKE_BINARY_DIR}/ynnpack/kernels/ternary/${NAME}.cc") + SET(OUTPUT_INC "${CMAKE_BINARY_DIR}/ynnpack/kernels/ternary/${NAME}.inc") + + ADD_CUSTOM_COMMAND( + OUTPUT "${OUTPUT_SRC}" "${OUTPUT_INC}" + COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/ynnpack/kernels/ternary" + COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_SOURCE_DIR}" + ${Python3_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/generator.py" + "${OUTPUT_SRC}" "${OUTPUT_INC}" "${ARCH}" ${ARGN} + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/generator.py" + "${CMAKE_CURRENT_SOURCE_DIR}/kernels.py" + "${CMAKE_CURRENT_SOURCE_DIR}/convert.py" + "${CMAKE_SOURCE_DIR}/ynnpack/kernels/elementwise/generator.py" + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + ) + + YNN_GET_ARCH_COPTS(${ARCH} ARCH_COPTS) + SET_SOURCE_FILES_PROPERTIES("${OUTPUT_SRC}" PROPERTIES COMPILE_OPTIONS "${ARCH_COPTS}") + + SET(${NAME}_SRC "${OUTPUT_SRC}" PARENT_SCOPE) + LIST(APPEND TERNARY_KERNEL_SRCS "${OUTPUT_SRC}") + SET(TERNARY_KERNEL_SRCS ${TERNARY_KERNEL_SRCS} PARENT_SCOPE) + ENDIF() +ENDFUNCTION() + +# x86 +YNN_GENERATE_TERNARY_KERNEL(x86_sse2 x86_sse2) +YNN_GENERATE_TERNARY_KERNEL(x86_sse41 x86_sse41) +YNN_GENERATE_TERNARY_KERNEL(x86_avx x86_avx) +YNN_GENERATE_TERNARY_KERNEL(x86_avx2 x86_avx2) +YNN_GENERATE_TERNARY_KERNEL(x86_avx512 x86_avx512) + +# ARM +YNN_GENERATE_TERNARY_KERNEL(arm_neon arm_neon) + +ADD_LIBRARY(ynnpack_kernels_ternary STATIC + ternary.cc + ${TERNARY_KERNEL_SRCS} +) + +TARGET_INCLUDE_DIRECTORIES(ynnpack_kernels_ternary PUBLIC . ${CMAKE_BINARY_DIR}) +TARGET_LINK_LIBRARIES(ynnpack_kernels_ternary PUBLIC ynnpack_base) + +IF(YNNPACK_BUILD_TESTS) + ADD_LIBRARY(ynnpack_kernels_ternary_reference STATIC reference.cc) + TARGET_LINK_LIBRARIES(ynnpack_kernels_ternary_reference PUBLIC ynnpack ynnpack_base ynnpack_base_test_tensor gtest) + + ADD_EXECUTABLE(ynnpack_kernels_ternary_test test.cc) + TARGET_LINK_LIBRARIES(ynnpack_kernels_ternary_test ynnpack_kernels_ternary ynnpack_kernels_ternary_reference ynnpack ynnpack_base ynnpack_base_test_fuzz ynnpack_base_test_random ynnpack_base_test_tensor ynnpack_base_test_util gtest gtest_main) + ADD_TEST(NAME ynnpack_kernels_ternary_test COMMAND ynnpack_kernels_ternary_test) +ENDIF() diff --git a/ynnpack/kernels/transpose/CMakeLists.txt b/ynnpack/kernels/transpose/CMakeLists.txt new file mode 100644 index 00000000000..7f2ccb5cbcc --- /dev/null +++ b/ynnpack/kernels/transpose/CMakeLists.txt @@ -0,0 +1,46 @@ +# Copyright 2026 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +ADD_LIBRARY(ynnpack_kernels_transpose STATIC + interleave.cc + transpose.cc +) + +TARGET_LINK_LIBRARIES(ynnpack_kernels_transpose PUBLIC ynnpack_base) + +INCLUDE(../arch_copts.cmake) + +SET(TRANSPOSE_KERNEL_SRCS) + +FUNCTION(YNN_ADD_TRANSPOSE_SRC ARCH SRC) + YNN_CHECK_ARCH_ENABLED(${ARCH} ENABLED) + IF(ENABLED) + YNN_GET_ARCH_COPTS(${ARCH} ARCH_COPTS) + SET_SOURCE_FILES_PROPERTIES(${SRC} PROPERTIES COMPILE_OPTIONS "${ARCH_COPTS}") + LIST(APPEND TRANSPOSE_KERNEL_SRCS ${SRC}) + SET(TRANSPOSE_KERNEL_SRCS ${TRANSPOSE_KERNEL_SRCS} PARENT_SCOPE) + ENDIF() +ENDFUNCTION() + +# ARM +YNN_ADD_TRANSPOSE_SRC(arm_neon arm_neon.cc) +YNN_ADD_TRANSPOSE_SRC(arm64_sve arm64_sve.cc) + +# x86 +YNN_ADD_TRANSPOSE_SRC(x86_sse2 x86_sse2.cc) +YNN_ADD_TRANSPOSE_SRC(x86_avx x86_avx.cc) +YNN_ADD_TRANSPOSE_SRC(x86_avx2 x86_avx2.cc) +YNN_ADD_TRANSPOSE_SRC(x86_avx512 x86_avx512.cc) + +# Others +YNN_ADD_TRANSPOSE_SRC(hexagon_hvx hexagon_hvx.cc) + +TARGET_SOURCES(ynnpack_kernels_transpose PRIVATE ${TRANSPOSE_KERNEL_SRCS}) + +IF(YNNPACK_BUILD_TESTS) + ADD_EXECUTABLE(ynnpack_kernels_transpose_test test.cc) + TARGET_LINK_LIBRARIES(ynnpack_kernels_transpose_test ynnpack_kernels_transpose ynnpack_base ynnpack_base_test_tensor ynnpack_base_test_util gtest gtest_main) + ADD_TEST(NAME ynnpack_kernels_transpose_test COMMAND ynnpack_kernels_transpose_test) +ENDIF() diff --git a/ynnpack/kernels/unary/CMakeLists.txt b/ynnpack/kernels/unary/CMakeLists.txt new file mode 100644 index 00000000000..327bee695c7 --- /dev/null +++ b/ynnpack/kernels/unary/CMakeLists.txt @@ -0,0 +1,73 @@ +# Copyright 2026 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +FIND_PACKAGE(Python3 REQUIRED) + +INCLUDE(../arch_copts.cmake) + +SET(UNARY_KERNEL_SRCS) + +FUNCTION(YNN_GENERATE_UNARY_KERNEL NAME ARCH) + YNN_CHECK_ARCH_ENABLED(${ARCH} ENABLED) + IF(ENABLED) + SET(OUTPUT_SRC "${CMAKE_BINARY_DIR}/ynnpack/kernels/unary/${NAME}.cc") + SET(OUTPUT_INC "${CMAKE_BINARY_DIR}/ynnpack/kernels/unary/${NAME}.inc") + + ADD_CUSTOM_COMMAND( + OUTPUT "${OUTPUT_SRC}" "${OUTPUT_INC}" + COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/ynnpack/kernels/unary" + COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_SOURCE_DIR}" + ${Python3_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/generator.py" + "${OUTPUT_SRC}" "${OUTPUT_INC}" "${ARCH}" ${ARGN} + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/generator.py" + "${CMAKE_CURRENT_SOURCE_DIR}/kernels.py" + "${CMAKE_CURRENT_SOURCE_DIR}/convert.py" + "${CMAKE_CURRENT_SOURCE_DIR}/exp.py" + "${CMAKE_CURRENT_SOURCE_DIR}/sigmoid.py" + "${CMAKE_CURRENT_SOURCE_DIR}/sine_cosine.py" + "${CMAKE_CURRENT_SOURCE_DIR}/tanh.py" + "${CMAKE_SOURCE_DIR}/ynnpack/kernels/elementwise/generator.py" + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + ) + + YNN_GET_ARCH_COPTS(${ARCH} ARCH_COPTS) + SET_SOURCE_FILES_PROPERTIES("${OUTPUT_SRC}" PROPERTIES COMPILE_OPTIONS "${ARCH_COPTS}") + + SET(${NAME}_SRC "${OUTPUT_SRC}" PARENT_SCOPE) + LIST(APPEND UNARY_KERNEL_SRCS "${OUTPUT_SRC}") + SET(UNARY_KERNEL_SRCS ${UNARY_KERNEL_SRCS} PARENT_SCOPE) + ENDIF() +ENDFUNCTION() + +# x86 +YNN_GENERATE_UNARY_KERNEL(x86_sse2 x86_sse2) +YNN_GENERATE_UNARY_KERNEL(x86_sse41 x86_sse41) +YNN_GENERATE_UNARY_KERNEL(x86_avx x86_avx) +YNN_GENERATE_UNARY_KERNEL(x86_avx2 x86_avx2) +YNN_GENERATE_UNARY_KERNEL(x86_fma3 x86_fma3) +YNN_GENERATE_UNARY_KERNEL(x86_f16c x86_f16c) +YNN_GENERATE_UNARY_KERNEL(x86_avx512 x86_avx512) +YNN_GENERATE_UNARY_KERNEL(x86_avx512bf16 x86_avx512bf16) + +# ARM +YNN_GENERATE_UNARY_KERNEL(arm_neon arm_neon) +YNN_GENERATE_UNARY_KERNEL(arm_neonfp16 arm_neonfp16) + +ADD_LIBRARY(ynnpack_kernels_unary STATIC + unary.cc + ${UNARY_KERNEL_SRCS} +) + +TARGET_INCLUDE_DIRECTORIES(ynnpack_kernels_unary PUBLIC . ${CMAKE_BINARY_DIR}) +TARGET_LINK_LIBRARIES(ynnpack_kernels_unary PUBLIC ynnpack_base) + +IF(YNNPACK_BUILD_TESTS) + ADD_LIBRARY(ynnpack_kernels_unary_reference STATIC reference.cc) + TARGET_LINK_LIBRARIES(ynnpack_kernels_unary_reference PUBLIC ynnpack ynnpack_base ynnpack_base_test_tensor gtest) + + ADD_EXECUTABLE(ynnpack_kernels_unary_test test.cc) + TARGET_LINK_LIBRARIES(ynnpack_kernels_unary_test ynnpack_kernels_unary ynnpack_kernels_unary_reference ynnpack ynnpack_base ynnpack_base_test_fuzz ynnpack_base_test_random ynnpack_base_test_tensor ynnpack_base_test_util gtest gtest_main) + ADD_TEST(NAME ynnpack_kernels_unary_test COMMAND ynnpack_kernels_unary_test) +ENDIF() diff --git a/ynnpack/subgraph/CMakeLists.txt b/ynnpack/subgraph/CMakeLists.txt new file mode 100644 index 00000000000..bd8edb58d51 --- /dev/null +++ b/ynnpack/subgraph/CMakeLists.txt @@ -0,0 +1,69 @@ +# Copyright 2026 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# ---[ Slinky thread pool. +ADD_LIBRARY(ynnpack_slinky_thread_pool STATIC + slinky_thread_pool.cc +) + +TARGET_LINK_LIBRARIES(ynnpack_slinky_thread_pool PUBLIC + ynnpack_base + slinky_base + slinky_thread_pool_impl +) + +# ---[ Subgraph library. +ADD_LIBRARY(ynnpack_subgraph STATIC + broadcast.cc + broadcast_like.cc + concatenate.cc + copy.cc + dot.cc + elementwise.cc + even_split.cc + fusion.cc + fusion_lut.cc + fusion_types.cc + get_tensor_shape.cc + reduce.cc + runtime.cc + slinky.cc + stack.cc + static_pad.cc + static_slice.cc + static_transpose.cc + stencil_copy.cc + subgraph.cc + tensor.cc + threadpool.cc + utils.cc +) + +TARGET_LINK_LIBRARIES(ynnpack_subgraph PUBLIC + ynnpack_base + ynnpack_slinky_thread_pool + ynnpack_kernels_binary + ynnpack_kernels_dot + ynnpack_kernels_lut + ynnpack_kernels_reduce + ynnpack_kernels_ternary + ynnpack_kernels_transpose + ynnpack_kernels_unary + slinky_builder + slinky_runtime +) + +# Handle optional Perfetto. +SET(YNN_ENABLE_PERFETTO OFF CACHE BOOL "Enable Perfetto tracing for YNNPACK") +IF(YNN_ENABLE_PERFETTO) + # This would require finding/adding perfetto. + # ADD_LIBRARY(ynnpack_perfetto STATIC perfetto.cc) + # TARGET_COMPILE_DEFINITIONS(ynnpack_subgraph PRIVATE YNN_ENABLE_PERFETTO) + # TARGET_LINK_LIBRARIES(ynnpack_subgraph PRIVATE ynnpack_perfetto) +ENDIF() + +IF(YNNPACK_BUILD_TESTS) + ADD_SUBDIRECTORY(test) +ENDIF() diff --git a/ynnpack/subgraph/test/CMakeLists.txt b/ynnpack/subgraph/test/CMakeLists.txt new file mode 100644 index 00000000000..05184f3af4a --- /dev/null +++ b/ynnpack/subgraph/test/CMakeLists.txt @@ -0,0 +1,42 @@ +# Copyright 2026 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +ADD_LIBRARY(ynnpack_subgraph_test_scheduler STATIC scheduler.cc) +TARGET_LINK_LIBRARIES(ynnpack_subgraph_test_scheduler PUBLIC ynnpack ynnpack_base slinky_thread_pool_impl gtest) + +ADD_LIBRARY(ynnpack_subgraph_test_builder STATIC subgraph_builder.cc) +TARGET_LINK_LIBRARIES(ynnpack_subgraph_test_builder PUBLIC + ynnpack_subgraph_test_scheduler + ynnpack + ynnpack_base + ynnpack_subgraph + gtest +) + +FUNCTION(YNN_ADD_SUBGRAPH_TEST NAME) + ADD_EXECUTABLE(ynnpack_subgraph_${NAME}_test ${NAME}.cc) + TARGET_LINK_LIBRARIES(ynnpack_subgraph_${NAME}_test + ynnpack + ynnpack_base + ynnpack_subgraph + ynnpack_subgraph_test_builder + ynnpack_base_test_tensor + gmock + gtest + gtest_main + ) + ADD_TEST(NAME ynnpack_subgraph_${NAME}_test COMMAND ynnpack_subgraph_${NAME}_test) +ENDFUNCTION() + +YNN_ADD_SUBGRAPH_TEST(broadcast_like) +YNN_ADD_SUBGRAPH_TEST(concatenate) +YNN_ADD_SUBGRAPH_TEST(copy) +YNN_ADD_SUBGRAPH_TEST(errors) +YNN_ADD_SUBGRAPH_TEST(even_split) +YNN_ADD_SUBGRAPH_TEST(fuse_dim) +YNN_ADD_SUBGRAPH_TEST(fuse_dims) +YNN_ADD_SUBGRAPH_TEST(get_tensor_shape) +YNN_ADD_SUBGRAPH_TEST(lut) +YNN_ADD_SUBGRAPH_TEST(reduce) diff --git a/ynnpack/xnnpack/CMakeLists.txt b/ynnpack/xnnpack/CMakeLists.txt new file mode 100644 index 00000000000..96e660d63e8 --- /dev/null +++ b/ynnpack/xnnpack/CMakeLists.txt @@ -0,0 +1,47 @@ +# Copyright 2026 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +ADD_LIBRARY(ynnpack_xnnpack_utils STATIC utils.cc) +TARGET_LINK_LIBRARIES(ynnpack_xnnpack_utils PUBLIC ynnpack ynnpack_base ynnpack_subgraph pthreadpool) + +ADD_LIBRARY(ynnpack_dynamic_quantization STATIC dynamic_quantization.cc) +TARGET_LINK_LIBRARIES(ynnpack_dynamic_quantization PUBLIC + ynnpack_xnnpack_utils + ynnpack + ynnpack_base + ynnpack_kernels_dot + ynnpack_subgraph + pthreadpool + slinky_builder + slinky_runtime +) + +ADD_LIBRARY(ynnpack_as_xnnpack STATIC + deprecated.cc + experimental.cc + operator.cc + runtime.cc + subgraph.cc + tensor.cc + weights_cache.cc + workspace.cc +) + +TARGET_COMPILE_DEFINITIONS(ynnpack_as_xnnpack PUBLIC XNNPACK_USING_YNNPACK=1) +TARGET_LINK_LIBRARIES(ynnpack_as_xnnpack PUBLIC + ynnpack_dynamic_quantization + ynnpack_xnnpack_utils + ynnpack + ynnpack_base + ynnpack_subgraph + pthreadpool + slinky_thread_pool_impl +) + +IF(YNNPACK_BUILD_TESTS) + ADD_EXECUTABLE(ynnpack_dynamic_quantization_test dynamic_quantization_test.cc) + TARGET_LINK_LIBRARIES(ynnpack_dynamic_quantization_test ynnpack_dynamic_quantization ynnpack ynnpack_base ynnpack_base_test_fuzz ynnpack_base_test_random ynnpack_base_test_tensor ynnpack_base_test_util ynnpack_subgraph_test_builder gtest gtest_main) + ADD_TEST(NAME ynnpack_dynamic_quantization_test COMMAND ynnpack_dynamic_quantization_test) +ENDIF()