From 9548e52266e03d4465a32e77b8a1835bfef430c9 Mon Sep 17 00:00:00 2001 From: jysh1214 Date: Mon, 1 Dec 2025 22:30:06 +0800 Subject: [PATCH 1/7] Refactor arithmetic ops to use std::variant + std::visit Remove Arithmetic_internal.hpp/cpp and Ari_ii from linalg_internal_interface --- .../Arithmetic_internal.cpp | 3042 ----------------- .../Arithmetic_internal.hpp | 805 ----- .../linalg_internal_cpu/CMakeLists.txt | 2 - src/backend/linalg_internal_interface.cpp | 134 - src/backend/linalg_internal_interface.hpp | 8 +- src/linalg/Add.cpp | 171 +- src/linalg/Cpr.cpp | 171 +- src/linalg/Div.cpp | 308 +- src/linalg/Mod.cpp | 308 +- src/linalg/Mul.cpp | 171 +- src/linalg/Sub.cpp | 308 +- 11 files changed, 1077 insertions(+), 4351 deletions(-) delete mode 100644 src/backend/linalg_internal_cpu/Arithmetic_internal.cpp delete mode 100644 src/backend/linalg_internal_cpu/Arithmetic_internal.hpp diff --git a/src/backend/linalg_internal_cpu/Arithmetic_internal.cpp b/src/backend/linalg_internal_cpu/Arithmetic_internal.cpp deleted file mode 100644 index fa83b7e9c..000000000 --- a/src/backend/linalg_internal_cpu/Arithmetic_internal.cpp +++ /dev/null @@ -1,3042 +0,0 @@ -#include "Arithmetic_internal.hpp" - -namespace cytnx { - namespace linalg_internal { - - void Arithmetic_internal_cdtcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cdtcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cdtd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cdtf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cdtu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cdti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cdtu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cdti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cdti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cdtu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cdtb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - //----------------------- - void Arithmetic_internal_cftcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cftcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cftd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cftf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cftu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cfti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cftu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cfti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cfti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cftu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_cftb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - //--------------------------- - void Arithmetic_internal_dtcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_dtcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_dtd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_dtf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_dtu64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_dti64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_dtu32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_dti32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_dti16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_dtu16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_dtb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - //---------------------- - void Arithmetic_internal_ftcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_ftcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_ftd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_ftf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_ftu64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_fti64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_ftu32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_fti32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_fti16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_ftu16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_ftb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - - //----------------------- - void Arithmetic_internal_u64tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u64tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u64td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u64tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u64tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u64ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u64tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u64ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u64ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u64tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u64tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - //---------------------- - void Arithmetic_internal_i64tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i64tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i64td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i64tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_i64tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i64ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_i64tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i64ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_i64ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_i64tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i64tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - - //------------------- - void Arithmetic_internal_u32tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u32tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u32td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u32tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u32tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u32ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u32tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u32ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u32ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u32tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u32tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - - //---------------------- - void Arithmetic_internal_i32tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i32tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i32td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i32tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_i32tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i32ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_i32tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i32ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_i32ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_i32tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i32tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - //---------------------- - void Arithmetic_internal_i16tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i16tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i16td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i16tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_i16tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i16ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_i16tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i16ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_i16ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_i16tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_i16tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - - //---------------------- - void Arithmetic_internal_u16tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u16tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u16td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u16tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u16tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u16ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u16tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u16ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u16ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u16tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_u16tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - //---------------------- - void Arithmetic_internal_btcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_btcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl( - out, Lin, Rin, len, shape, invmapper_L, invmapper_R); - } - void Arithmetic_internal_btd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_btf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_btu64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_bti64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_btu32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_bti32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_bti16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_btu16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - void Arithmetic_internal_btb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::AddInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 1) - cytnx::linalg_internal::MulInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 2) - cytnx::linalg_internal::SubInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 3) - cytnx::linalg_internal::DivInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else if (type == 4) - cytnx::linalg_internal::CprInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - else - cytnx::linalg_internal::ModInternalImpl(out, Lin, Rin, len, shape, - invmapper_L, invmapper_R); - } - - } // namespace linalg_internal -} // namespace cytnx diff --git a/src/backend/linalg_internal_cpu/Arithmetic_internal.hpp b/src/backend/linalg_internal_cpu/Arithmetic_internal.hpp deleted file mode 100644 index 0ed4add51..000000000 --- a/src/backend/linalg_internal_cpu/Arithmetic_internal.hpp +++ /dev/null @@ -1,805 +0,0 @@ -#ifndef CYTNX_BACKEND_LINALG_INTERNAL_CPU_ARITHMETIC_INTERNAL_H_ -#define CYTNX_BACKEND_LINALG_INTERNAL_CPU_ARITHMETIC_INTERNAL_H_ - -#include "Type.hpp" -#include "backend/Storage.hpp" -#include "Add_internal.hpp" -#include "Mul_internal.hpp" -#include "Sub_internal.hpp" -#include "Div_internal.hpp" -#include "Cpr_internal.hpp" -#include "Mod_internal.hpp" -namespace cytnx { - namespace linalg_internal { - - /// Arithmetic - void Arithmetic_internal_cdtcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cdtcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cdtd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cdtf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cdti64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cdtu64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cdti32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cdtu32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cdtu16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cdti16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cdtb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void Arithmetic_internal_cftcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cftcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cftd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cftf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cfti64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cftu64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cfti32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cftu32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cftu16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cfti16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_cftb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void Arithmetic_internal_dtcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_dtcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_dtd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_dtf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_dti64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_dtu64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_dti32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_dtu32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_dtu16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_dti16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_dtb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void Arithmetic_internal_ftcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_ftcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_ftd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_ftf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_fti64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_ftu64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_fti32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_ftu32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_ftu16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_fti16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_ftb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void Arithmetic_internal_i64tcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i64tcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i64td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i64tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i64ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i64tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i64ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i64tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i64tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i64ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i64tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void Arithmetic_internal_u64tcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u64tcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u64td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u64tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u64ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u64tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u64ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u64tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u64ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u64tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u64tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void Arithmetic_internal_i32tcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i32tcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i32td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i32tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i32ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i32tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i32ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i32tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i32tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i32ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i32tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void Arithmetic_internal_u32tcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u32tcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u32td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u32tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u32ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u32tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u32ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u32tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u32tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u32ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u32tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void Arithmetic_internal_i16tcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i16tcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i16td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i16tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i16ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i16tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i16ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i16tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i16tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i16ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_i16tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void Arithmetic_internal_u16tcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u16tcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u16td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u16tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u16ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u16tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u16ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u16tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u16tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u16ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_u16tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void Arithmetic_internal_btcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_btcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_btd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_btf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_bti64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_btu64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_bti32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_btu32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_btu16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_bti16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void Arithmetic_internal_btb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - } // namespace linalg_internal -} // namespace cytnx - -#endif // CYTNX_BACKEND_LINALG_INTERNAL_CPU_ARITHMETIC_INTERNAL_H_ diff --git a/src/backend/linalg_internal_cpu/CMakeLists.txt b/src/backend/linalg_internal_cpu/CMakeLists.txt index 29aa58257..3f00ecb4f 100644 --- a/src/backend/linalg_internal_cpu/CMakeLists.txt +++ b/src/backend/linalg_internal_cpu/CMakeLists.txt @@ -3,7 +3,6 @@ target_sources_local(cytnx Norm_internal.hpp Add_internal.hpp iAdd_internal.hpp - Arithmetic_internal.hpp iArithmetic_internal.hpp Conj_inplace_internal.hpp Cpr_internal.hpp @@ -46,7 +45,6 @@ target_sources_local(cytnx memcpyTruncation.hpp iAdd_internal.cpp - Arithmetic_internal.cpp iArithmetic_internal.cpp Conj_inplace_internal.cpp Diag_internal.cpp diff --git a/src/backend/linalg_internal_interface.cpp b/src/backend/linalg_internal_interface.cpp index a2080c87a..fbb800d98 100644 --- a/src/backend/linalg_internal_interface.cpp +++ b/src/backend/linalg_internal_interface.cpp @@ -13,140 +13,6 @@ namespace cytnx { linalg_internal_interface::~linalg_internal_interface() {} linalg_internal_interface::linalg_internal_interface() { - Ari_ii = vector>(N_Type, vector(N_Type, NULL)); - - Ari_ii[Type.ComplexDouble][Type.ComplexDouble] = Arithmetic_internal_cdtcd; - Ari_ii[Type.ComplexDouble][Type.ComplexFloat] = Arithmetic_internal_cdtcf; - Ari_ii[Type.ComplexDouble][Type.Double] = Arithmetic_internal_cdtd; - Ari_ii[Type.ComplexDouble][Type.Float] = Arithmetic_internal_cdtf; - Ari_ii[Type.ComplexDouble][Type.Int64] = Arithmetic_internal_cdti64; - Ari_ii[Type.ComplexDouble][Type.Uint64] = Arithmetic_internal_cdtu64; - Ari_ii[Type.ComplexDouble][Type.Int32] = Arithmetic_internal_cdti32; - Ari_ii[Type.ComplexDouble][Type.Uint32] = Arithmetic_internal_cdtu32; - Ari_ii[Type.ComplexDouble][Type.Uint16] = Arithmetic_internal_cdtu16; - Ari_ii[Type.ComplexDouble][Type.Int16] = Arithmetic_internal_cdti16; - Ari_ii[Type.ComplexDouble][Type.Bool] = Arithmetic_internal_cdtb; - - Ari_ii[Type.ComplexFloat][Type.ComplexDouble] = Arithmetic_internal_cftcd; - Ari_ii[Type.ComplexFloat][Type.ComplexFloat] = Arithmetic_internal_cftcf; - Ari_ii[Type.ComplexFloat][Type.Double] = Arithmetic_internal_cftd; - Ari_ii[Type.ComplexFloat][Type.Float] = Arithmetic_internal_cftf; - Ari_ii[Type.ComplexFloat][Type.Int64] = Arithmetic_internal_cfti64; - Ari_ii[Type.ComplexFloat][Type.Uint64] = Arithmetic_internal_cftu64; - Ari_ii[Type.ComplexFloat][Type.Int32] = Arithmetic_internal_cfti32; - Ari_ii[Type.ComplexFloat][Type.Uint32] = Arithmetic_internal_cftu32; - Ari_ii[Type.ComplexFloat][Type.Uint16] = Arithmetic_internal_cftu16; - Ari_ii[Type.ComplexFloat][Type.Int16] = Arithmetic_internal_cfti16; - Ari_ii[Type.ComplexFloat][Type.Bool] = Arithmetic_internal_cftb; - - Ari_ii[Type.Double][Type.ComplexDouble] = Arithmetic_internal_dtcd; - Ari_ii[Type.Double][Type.ComplexFloat] = Arithmetic_internal_dtcf; - Ari_ii[Type.Double][Type.Double] = Arithmetic_internal_dtd; - Ari_ii[Type.Double][Type.Float] = Arithmetic_internal_dtf; - Ari_ii[Type.Double][Type.Int64] = Arithmetic_internal_dti64; - Ari_ii[Type.Double][Type.Uint64] = Arithmetic_internal_dtu64; - Ari_ii[Type.Double][Type.Int32] = Arithmetic_internal_dti32; - Ari_ii[Type.Double][Type.Uint32] = Arithmetic_internal_dtu32; - Ari_ii[Type.Double][Type.Uint16] = Arithmetic_internal_dtu16; - Ari_ii[Type.Double][Type.Int16] = Arithmetic_internal_dti16; - Ari_ii[Type.Double][Type.Bool] = Arithmetic_internal_dtb; - - Ari_ii[Type.Float][Type.ComplexDouble] = Arithmetic_internal_ftcd; - Ari_ii[Type.Float][Type.ComplexFloat] = Arithmetic_internal_ftcf; - Ari_ii[Type.Float][Type.Double] = Arithmetic_internal_ftd; - Ari_ii[Type.Float][Type.Float] = Arithmetic_internal_ftf; - Ari_ii[Type.Float][Type.Int64] = Arithmetic_internal_fti64; - Ari_ii[Type.Float][Type.Uint64] = Arithmetic_internal_ftu64; - Ari_ii[Type.Float][Type.Int32] = Arithmetic_internal_fti32; - Ari_ii[Type.Float][Type.Uint32] = Arithmetic_internal_ftu32; - Ari_ii[Type.Float][Type.Uint16] = Arithmetic_internal_ftu16; - Ari_ii[Type.Float][Type.Int16] = Arithmetic_internal_fti16; - Ari_ii[Type.Float][Type.Bool] = Arithmetic_internal_ftb; - - Ari_ii[Type.Int64][Type.ComplexDouble] = Arithmetic_internal_i64tcd; - Ari_ii[Type.Int64][Type.ComplexFloat] = Arithmetic_internal_i64tcf; - Ari_ii[Type.Int64][Type.Double] = Arithmetic_internal_i64td; - Ari_ii[Type.Int64][Type.Float] = Arithmetic_internal_i64tf; - Ari_ii[Type.Int64][Type.Int64] = Arithmetic_internal_i64ti64; - Ari_ii[Type.Int64][Type.Uint64] = Arithmetic_internal_i64tu64; - Ari_ii[Type.Int64][Type.Int32] = Arithmetic_internal_i64ti32; - Ari_ii[Type.Int64][Type.Uint32] = Arithmetic_internal_i64tu32; - Ari_ii[Type.Int64][Type.Uint16] = Arithmetic_internal_i64tu16; - Ari_ii[Type.Int64][Type.Int16] = Arithmetic_internal_i64ti16; - Ari_ii[Type.Int64][Type.Bool] = Arithmetic_internal_i64tb; - - Ari_ii[Type.Uint64][Type.ComplexDouble] = Arithmetic_internal_u64tcd; - Ari_ii[Type.Uint64][Type.ComplexFloat] = Arithmetic_internal_u64tcf; - Ari_ii[Type.Uint64][Type.Double] = Arithmetic_internal_u64td; - Ari_ii[Type.Uint64][Type.Float] = Arithmetic_internal_u64tf; - Ari_ii[Type.Uint64][Type.Int64] = Arithmetic_internal_u64ti64; - Ari_ii[Type.Uint64][Type.Uint64] = Arithmetic_internal_u64tu64; - Ari_ii[Type.Uint64][Type.Int32] = Arithmetic_internal_u64ti32; - Ari_ii[Type.Uint64][Type.Uint32] = Arithmetic_internal_u64tu32; - Ari_ii[Type.Uint64][Type.Uint16] = Arithmetic_internal_u64tu16; - Ari_ii[Type.Uint64][Type.Int16] = Arithmetic_internal_u64ti16; - Ari_ii[Type.Uint64][Type.Bool] = Arithmetic_internal_u64tb; - - Ari_ii[Type.Int32][Type.ComplexDouble] = Arithmetic_internal_i32tcd; - Ari_ii[Type.Int32][Type.ComplexFloat] = Arithmetic_internal_i32tcf; - Ari_ii[Type.Int32][Type.Double] = Arithmetic_internal_i32td; - Ari_ii[Type.Int32][Type.Float] = Arithmetic_internal_i32tf; - Ari_ii[Type.Int32][Type.Int64] = Arithmetic_internal_i32ti64; - Ari_ii[Type.Int32][Type.Uint64] = Arithmetic_internal_i32tu64; - Ari_ii[Type.Int32][Type.Int32] = Arithmetic_internal_i32ti32; - Ari_ii[Type.Int32][Type.Uint32] = Arithmetic_internal_i32tu32; - Ari_ii[Type.Int32][Type.Uint16] = Arithmetic_internal_i32tu16; - Ari_ii[Type.Int32][Type.Int16] = Arithmetic_internal_i32ti16; - Ari_ii[Type.Int32][Type.Bool] = Arithmetic_internal_i32tb; - - Ari_ii[Type.Uint32][Type.ComplexDouble] = Arithmetic_internal_u32tcd; - Ari_ii[Type.Uint32][Type.ComplexFloat] = Arithmetic_internal_u32tcf; - Ari_ii[Type.Uint32][Type.Double] = Arithmetic_internal_u32td; - Ari_ii[Type.Uint32][Type.Float] = Arithmetic_internal_u32tf; - Ari_ii[Type.Uint32][Type.Int64] = Arithmetic_internal_u32ti64; - Ari_ii[Type.Uint32][Type.Uint64] = Arithmetic_internal_u32tu64; - Ari_ii[Type.Uint32][Type.Int32] = Arithmetic_internal_u32ti32; - Ari_ii[Type.Uint32][Type.Uint32] = Arithmetic_internal_u32tu32; - Ari_ii[Type.Uint32][Type.Uint16] = Arithmetic_internal_u32tu16; - Ari_ii[Type.Uint32][Type.Int16] = Arithmetic_internal_u32ti16; - Ari_ii[Type.Uint32][Type.Bool] = Arithmetic_internal_u32tb; - - Ari_ii[Type.Int16][Type.ComplexDouble] = Arithmetic_internal_i16tcd; - Ari_ii[Type.Int16][Type.ComplexFloat] = Arithmetic_internal_i16tcf; - Ari_ii[Type.Int16][Type.Double] = Arithmetic_internal_i16td; - Ari_ii[Type.Int16][Type.Float] = Arithmetic_internal_i16tf; - Ari_ii[Type.Int16][Type.Int64] = Arithmetic_internal_i16ti64; - Ari_ii[Type.Int16][Type.Uint64] = Arithmetic_internal_i16tu64; - Ari_ii[Type.Int16][Type.Int32] = Arithmetic_internal_i16ti32; - Ari_ii[Type.Int16][Type.Uint32] = Arithmetic_internal_i16tu32; - Ari_ii[Type.Int16][Type.Uint16] = Arithmetic_internal_i16tu16; - Ari_ii[Type.Int16][Type.Int16] = Arithmetic_internal_i16ti16; - Ari_ii[Type.Int16][Type.Bool] = Arithmetic_internal_i16tb; - - Ari_ii[Type.Uint16][Type.ComplexDouble] = Arithmetic_internal_u16tcd; - Ari_ii[Type.Uint16][Type.ComplexFloat] = Arithmetic_internal_u16tcf; - Ari_ii[Type.Uint16][Type.Double] = Arithmetic_internal_u16td; - Ari_ii[Type.Uint16][Type.Float] = Arithmetic_internal_u16tf; - Ari_ii[Type.Uint16][Type.Int64] = Arithmetic_internal_u16ti64; - Ari_ii[Type.Uint16][Type.Uint64] = Arithmetic_internal_u16tu64; - Ari_ii[Type.Uint16][Type.Int32] = Arithmetic_internal_u16ti32; - Ari_ii[Type.Uint16][Type.Uint32] = Arithmetic_internal_u16tu32; - Ari_ii[Type.Uint16][Type.Uint16] = Arithmetic_internal_u16tu16; - Ari_ii[Type.Uint16][Type.Int16] = Arithmetic_internal_u16ti16; - Ari_ii[Type.Uint16][Type.Bool] = Arithmetic_internal_u16tb; - - Ari_ii[Type.Bool][Type.ComplexDouble] = Arithmetic_internal_btcd; - Ari_ii[Type.Bool][Type.ComplexFloat] = Arithmetic_internal_btcf; - Ari_ii[Type.Bool][Type.Double] = Arithmetic_internal_btd; - Ari_ii[Type.Bool][Type.Float] = Arithmetic_internal_btf; - Ari_ii[Type.Bool][Type.Int64] = Arithmetic_internal_bti64; - Ari_ii[Type.Bool][Type.Uint64] = Arithmetic_internal_btu64; - Ari_ii[Type.Bool][Type.Int32] = Arithmetic_internal_bti32; - Ari_ii[Type.Bool][Type.Uint32] = Arithmetic_internal_btu32; - Ari_ii[Type.Bool][Type.Uint16] = Arithmetic_internal_btu16; - Ari_ii[Type.Bool][Type.Int16] = Arithmetic_internal_bti16; - Ari_ii[Type.Bool][Type.Bool] = Arithmetic_internal_btb; - iAri_ii = vector>(N_Type, vector(N_Type, NULL)); diff --git a/src/backend/linalg_internal_interface.hpp b/src/backend/linalg_internal_interface.hpp index 5835a7ecb..df53fa504 100644 --- a/src/backend/linalg_internal_interface.hpp +++ b/src/backend/linalg_internal_interface.hpp @@ -8,11 +8,13 @@ #include "backend/Scalar.hpp" #include "backend/Storage.hpp" #include "linalg_internal_cpu/Abs_internal.hpp" -#include "linalg_internal_cpu/Arithmetic_internal.hpp" +#include "linalg_internal_cpu/Add_internal.hpp" #include "linalg_internal_cpu/Axpy_internal.hpp" #include "linalg_internal_cpu/Conj_inplace_internal.hpp" +#include "linalg_internal_cpu/Cpr_internal.hpp" #include "linalg_internal_cpu/Det_internal.hpp" #include "linalg_internal_cpu/Diag_internal.hpp" +#include "linalg_internal_cpu/Div_internal.hpp" #include "linalg_internal_cpu/Eig_internal.hpp" #include "linalg_internal_cpu/Eigh_internal.hpp" #include "linalg_internal_cpu/Exp_internal.hpp" @@ -27,11 +29,14 @@ #include "linalg_internal_cpu/Matmul_internal.hpp" #include "linalg_internal_cpu/Matvec_internal.hpp" #include "linalg_internal_cpu/MaxMin_internal.hpp" +#include "linalg_internal_cpu/Mod_internal.hpp" +#include "linalg_internal_cpu/Mul_internal.hpp" #include "linalg_internal_cpu/Norm_internal.hpp" #include "linalg_internal_cpu/Outer_internal.hpp" #include "linalg_internal_cpu/Pow_internal.hpp" #include "linalg_internal_cpu/QR_internal.hpp" #include "linalg_internal_cpu/Sdd_internal.hpp" +#include "linalg_internal_cpu/Sub_internal.hpp" #include "linalg_internal_cpu/Sum_internal.hpp" #include "linalg_internal_cpu/Trace_internal.hpp" #include "linalg_internal_cpu/Tridiag_internal.hpp" @@ -212,7 +217,6 @@ namespace cytnx { #endif class linalg_internal_interface { public: - std::vector> Ari_ii; std::vector> iAri_ii; std::vector Sdd_ii; std::vector Gesvd_ii; diff --git a/src/linalg/Add.cpp b/src/linalg/Add.cpp index e0193e76f..4ee3bad05 100644 --- a/src/linalg/Add.cpp +++ b/src/linalg/Add.cpp @@ -41,9 +41,19 @@ namespace cytnx { if ((Lt.is_contiguous() && Rt.is_contiguous()) || icnst) { // contiguous section. if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - out._impl->storage()._impl->size(), {}, {}, {}, 0); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::AddInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, out._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -58,10 +68,20 @@ namespace cytnx { } else { // non-contiguous section if (Lt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - Lt._impl->storage()._impl->size(), Lt._impl->shape(), Lt._impl->invmapper(), - Rt._impl->invmapper(), 0); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::AddInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, Lt._impl->storage()._impl->size(), Lt._impl->shape(), + Lt._impl->invmapper(), Rt._impl->invmapper()); + }, + Rt.ptr()); + }, + Lt.ptr()); } else { #ifdef UNI_GPU linalg_internal::lii.cuAri_ii[Lt.dtype()][Rt.dtype()]( @@ -88,9 +108,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.ComplexDouble,Rt.device()); if (Rt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[Type.ComplexDouble][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::AddInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -119,9 +144,14 @@ namespace cytnx { // Rt.dtype()?Type.ComplexFloat:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[Type.ComplexFloat][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::AddInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -149,9 +179,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Double < Rt.dtype()?Type.Double:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[Type.Double][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::AddInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -178,9 +213,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Float < Rt.dtype()?Type.Float:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[Type.Float][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::AddInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -207,9 +247,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Int64 < Rt.dtype()?Type.Int64:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[Type.Int64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::AddInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -237,9 +282,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Uint64 < Rt.dtype()?Type.Uint64:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[Type.Uint64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::AddInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -266,9 +316,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Int32 < Rt.dtype()?Type.Int32:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[Type.Int32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::AddInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -296,9 +351,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Uint32 < Rt.dtype()?Type.Uint32:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[Type.Uint32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::AddInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -325,9 +385,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Int16 < Rt.dtype()?Type.Int16:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[Type.Int16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::AddInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -355,9 +420,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Uint16 < Rt.dtype()?Type.Uint16:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[Type.Uint16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::AddInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -384,9 +454,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Bool < Rt.dtype()?Type.Bool:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[Type.Bool][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::AddInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -413,9 +488,19 @@ namespace cytnx { Rt._impl->storage().size(), lc.dtype() < Rt.dtype() ? lc.dtype() : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[lc.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::AddInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); + }, + Tensor::from_storage(Cnst).ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); diff --git a/src/linalg/Cpr.cpp b/src/linalg/Cpr.cpp index e6214b3a6..6692ca870 100644 --- a/src/linalg/Cpr.cpp +++ b/src/linalg/Cpr.cpp @@ -22,9 +22,19 @@ namespace cytnx { if (Lt.is_contiguous() && Rt.is_contiguous()) { // contiguous section. if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 4); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::CprInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -39,10 +49,20 @@ namespace cytnx { } else { // non-contiguous section if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - Lt._impl->storage()._impl->size(), Lt._impl->shape(), Lt._impl->invmapper(), - Rt._impl->invmapper(), 4); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::CprInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, Lt._impl->storage()._impl->size(), Lt._impl->shape(), + Lt._impl->invmapper(), Rt._impl->invmapper()); + }, + Rt.ptr()); + }, + Lt.ptr()); } else { #ifdef UNI_GPU cytnx_error_msg(true, @@ -67,9 +87,14 @@ namespace cytnx { Tensor out(Rt.shape(), Type.Bool, Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.ComplexDouble][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 4); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::CprInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -93,9 +118,14 @@ namespace cytnx { Tensor out(Rt.shape(), Type.Bool, Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.ComplexFloat][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 4); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::CprInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -119,9 +149,14 @@ namespace cytnx { Tensor out(Rt.shape(), Type.Bool, Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Double][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 4); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::CprInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -145,9 +180,14 @@ namespace cytnx { Tensor out(Rt.shape(), Type.Bool, Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Float][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 4); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::CprInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -171,9 +211,14 @@ namespace cytnx { Tensor out(Rt.shape(), Type.Bool, Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Int64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 4); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::CprInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -197,9 +242,14 @@ namespace cytnx { Tensor out(Rt.shape(), Type.Bool, Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Uint64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 4); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::CprInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -223,9 +273,14 @@ namespace cytnx { Tensor out(Rt.shape(), Type.Bool, Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Int32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 4); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::CprInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -249,9 +304,14 @@ namespace cytnx { Tensor out(Rt.shape(), Type.Bool, Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Uint32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 4); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::CprInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -275,9 +335,14 @@ namespace cytnx { Tensor out(Rt.shape(), Type.Bool, Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Int16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 4); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::CprInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -301,9 +366,14 @@ namespace cytnx { Tensor out(Rt.shape(), Type.Bool, Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Uint16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 4); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::CprInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -327,9 +397,14 @@ namespace cytnx { Tensor out(Rt.shape(), Type.Bool, Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Bool][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 4); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::CprInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -353,9 +428,19 @@ namespace cytnx { Tensor out(Rt.shape(), Type.Bool, Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[lc.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 4); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::CprInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); + }, + Tensor::from_storage(Cnst).ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); diff --git a/src/linalg/Div.cpp b/src/linalg/Div.cpp index 2dad1e956..80d7fa0af 100644 --- a/src/linalg/Div.cpp +++ b/src/linalg/Div.cpp @@ -37,9 +37,19 @@ namespace cytnx { if ((Lt.is_contiguous() && Rt.is_contiguous()) || icnst) { // contiguous section if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - out._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, out._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -54,10 +64,20 @@ namespace cytnx { } else { // non-contiguous section if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - Lt._impl->storage()._impl->size(), Lt._impl->shape(), Lt._impl->invmapper(), - Rt._impl->invmapper(), 3); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, Lt._impl->storage()._impl->size(), Lt._impl->shape(), + Lt._impl->invmapper(), Rt._impl->invmapper()); + }, + Rt.ptr()); + }, + Lt.ptr()); } else { #ifdef UNI_GPU cytnx_error_msg(true, @@ -84,9 +104,14 @@ namespace cytnx { out._impl->storage() = Storage(Rt._impl->storage().size(), Type.ComplexDouble, Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.ComplexDouble][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -116,9 +141,14 @@ namespace cytnx { // Rt.dtype()?Type.ComplexFloat:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.ComplexFloat][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -146,9 +176,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Double < Rt.dtype()?Type.Double:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Double][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU cytnx::linalg_internal::lii.cuAri_ii[Type.Double][Rt.dtype()]( @@ -174,9 +209,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Float < Rt.dtype()?Type.Float:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Float][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -203,9 +243,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Int64 < Rt.dtype()?Type.Int64:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Int64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU cytnx::linalg_internal::lii.cuAri_ii[Type.Int64][Rt.dtype()]( @@ -232,9 +277,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Uint64 < Rt.dtype()?Type.Uint64:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Uint64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -261,9 +311,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Int32 < Rt.dtype()?Type.Int32:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Int32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -291,9 +346,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Uint32 < Rt.dtype()?Type.Uint32:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Uint32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -320,9 +380,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Int16 < Rt.dtype()?Type.Int16:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Int16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -350,9 +415,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Uint16 < Rt.dtype()?Type.Uint16:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Uint16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -379,9 +449,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Bool < Rt.dtype()?Type.Bool:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Bool][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -408,9 +483,19 @@ namespace cytnx { Rt._impl->storage().size(), lc.dtype() < Rt.dtype() ? lc.dtype() : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[lc.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); + }, + Tensor::from_storage(Cnst).ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -436,9 +521,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.ComplexDouble,Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.ComplexDouble]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -466,9 +556,14 @@ namespace cytnx { // Lt.dtype()?Type.ComplexFloat:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.ComplexFloat]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -496,9 +591,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Double < Lt.dtype()?Type.Double:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Double]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -524,9 +624,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Float < Lt.dtype()?Type.Float:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Float]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -552,9 +657,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Int64 < Lt.dtype()?Type.Int64:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Int64]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -581,9 +691,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Uint64 < Lt.dtype()?Type.Uint64:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Uint64]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -610,9 +725,14 @@ namespace cytnx { if (Lt.device() == Device.cpu) { // std::cout << "chk" << std::endl; - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Int32]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -639,9 +759,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Uint32 < Lt.dtype()?Type.Uint32:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Uint32]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -668,9 +793,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Int16 < Lt.dtype()?Type.Int16:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Int16]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -697,9 +827,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Uint16 < Lt.dtype()?Type.Uint16:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Uint16]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -725,9 +860,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Bool < Lt.dtype()?Type.Bool:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Bool]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -754,9 +894,19 @@ namespace cytnx { Lt._impl->storage().size(), Lt.dtype() < rc.dtype() ? Lt.dtype() : rc.dtype(), Lt.device()); if (Lt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[Lt.dtype()][rc.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::DivInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Tensor::from_storage(Cnst).ptr()); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); diff --git a/src/linalg/Mod.cpp b/src/linalg/Mod.cpp index 80880a20f..e26496ead 100644 --- a/src/linalg/Mod.cpp +++ b/src/linalg/Mod.cpp @@ -30,9 +30,19 @@ namespace cytnx { if ((Lt.is_contiguous() && Rt.is_contiguous()) || icnst) { // contiguous section if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - out._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, out._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -47,10 +57,20 @@ namespace cytnx { } else { // non-contiguous section if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - Lt._impl->storage()._impl->size(), Lt._impl->shape(), Lt._impl->invmapper(), - Rt._impl->invmapper(), 5); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, Lt._impl->storage()._impl->size(), Lt._impl->shape(), + Lt._impl->invmapper(), Rt._impl->invmapper()); + }, + Rt.ptr()); + }, + Lt.ptr()); } else { #ifdef UNI_GPU cytnx_error_msg(true, @@ -77,9 +97,14 @@ namespace cytnx { out._impl->storage() = Storage(Rt._impl->storage().size(), Type.ComplexDouble, Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.ComplexDouble][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -109,9 +134,14 @@ namespace cytnx { // Rt.dtype()?Type.ComplexFloat:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.ComplexFloat][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -139,9 +169,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Double < Rt.dtype()?Type.Double:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Double][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU cytnx::linalg_internal::lii.cuAri_ii[Type.Double][Rt.dtype()]( @@ -167,9 +202,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Float < Rt.dtype()?Type.Float:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Float][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -196,9 +236,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Int64 < Rt.dtype()?Type.Int64:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Int64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU cytnx::linalg_internal::lii.cuAri_ii[Type.Int64][Rt.dtype()]( @@ -225,9 +270,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Uint64 < Rt.dtype()?Type.Uint64:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Uint64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -254,9 +304,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Int32 < Rt.dtype()?Type.Int32:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Int32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -284,9 +339,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Uint32 < Rt.dtype()?Type.Uint32:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Uint32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -313,9 +373,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Int16 < Rt.dtype()?Type.Int16:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Int16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -343,9 +408,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Uint16 < Rt.dtype()?Type.Uint16:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Uint16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -372,9 +442,14 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Bool < Rt.dtype()?Type.Bool:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Bool][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -402,9 +477,19 @@ namespace cytnx { // Tensor out(Rt.shape(),Type.Bool < Rt.dtype()?Type.Bool:Rt.dtype(),Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[lc.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); + }, + Tensor::from_storage(Cnst).ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -431,9 +516,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.ComplexDouble,Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.ComplexDouble]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -461,9 +551,14 @@ namespace cytnx { // Lt.dtype()?Type.ComplexFloat:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.ComplexFloat]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -491,9 +586,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Double < Lt.dtype()?Type.Double:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Double]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -519,9 +619,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Float < Lt.dtype()?Type.Float:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Float]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -547,9 +652,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Int64 < Lt.dtype()?Type.Int64:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Int64]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -576,9 +686,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Uint64 < Lt.dtype()?Type.Uint64:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Uint64]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -604,9 +719,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Int32 < Lt.dtype()?Type.Int32:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Int32]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -633,9 +753,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Uint32 < Lt.dtype()?Type.Uint32:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Uint32]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -662,9 +787,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Int16 < Lt.dtype()?Type.Int16:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Int16]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -691,9 +821,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Uint16 < Lt.dtype()?Type.Uint16:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Uint16]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -719,9 +854,14 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Bool < Lt.dtype()?Type.Bool:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Bool]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -749,9 +889,19 @@ namespace cytnx { // Tensor out(Lt.shape(),Type.Bool < Lt.dtype()?Type.Bool:Lt.dtype(),Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][rc.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 5); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::ModInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Tensor::from_storage(Cnst).ptr()); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); diff --git a/src/linalg/Mul.cpp b/src/linalg/Mul.cpp index 7469a3747..76555a127 100644 --- a/src/linalg/Mul.cpp +++ b/src/linalg/Mul.cpp @@ -37,9 +37,19 @@ namespace cytnx { if ((Lt.is_contiguous() && Rt.is_contiguous()) || icnst) { // contiguous section if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - out._impl->storage()._impl->size(), {}, {}, {}, 1); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::MulInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, out._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -54,10 +64,20 @@ namespace cytnx { } else { // non-contiguous section if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - Lt._impl->storage()._impl->size(), Lt._impl->shape(), Lt._impl->invmapper(), - Rt._impl->invmapper(), 1); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::MulInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, Lt._impl->storage()._impl->size(), Lt._impl->shape(), + Lt._impl->invmapper(), Rt._impl->invmapper()); + }, + Rt.ptr()); + }, + Lt.ptr()); } else { #ifdef UNI_GPU cytnx_error_msg(true, @@ -85,9 +105,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.ComplexDouble, Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.ComplexDouble][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::MulInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -116,9 +141,14 @@ namespace cytnx { // Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.ComplexFloat][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::MulInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -146,9 +176,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Double < Rt.dtype() ? Type.Double : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Double][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::MulInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -175,9 +210,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Float < Rt.dtype() ? Type.Float : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Float][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::MulInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -204,9 +244,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Int64 < Rt.dtype() ? Type.Int64 : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Int64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::MulInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -234,9 +279,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Uint64 < Rt.dtype() ? Type.Uint64 : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Uint64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::MulInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -263,9 +313,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Int32 < Rt.dtype() ? Type.Int32 : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Int32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::MulInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -293,9 +348,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Uint32 < Rt.dtype() ? Type.Uint32 : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Uint32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::MulInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -322,9 +382,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Int16 < Rt.dtype() ? Type.Int16 : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Int16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::MulInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -352,9 +417,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Uint16 < Rt.dtype() ? Type.Uint16 : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Uint16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::MulInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -381,9 +451,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Bool < Rt.dtype() ? Type.Bool : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Bool][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::MulInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -410,9 +485,19 @@ namespace cytnx { Rt._impl->storage().size(), lc.dtype() < Rt.dtype() ? lc.dtype() : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[lc.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::MulInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); + }, + Tensor::from_storage(Cnst).ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); diff --git a/src/linalg/Sub.cpp b/src/linalg/Sub.cpp index 1f5d068a4..0947733b6 100644 --- a/src/linalg/Sub.cpp +++ b/src/linalg/Sub.cpp @@ -35,9 +35,19 @@ namespace cytnx { if ((Lt.is_contiguous() && Rt.is_contiguous()) || icnst) { // contiguous section if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - out._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, out._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -52,10 +62,20 @@ namespace cytnx { } else { // non-contiguous section if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), Lt._impl->shape(), Lt._impl->invmapper(), - Rt._impl->invmapper(), 2); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, Rt._impl->storage()._impl->size(), Lt._impl->shape(), + Lt._impl->invmapper(), Rt._impl->invmapper()); + }, + Rt.ptr()); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -83,9 +103,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.ComplexDouble, Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.ComplexDouble][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -114,9 +139,14 @@ namespace cytnx { // Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.ComplexFloat][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -144,9 +174,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Double < Rt.dtype() ? Type.Double : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Double][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -173,9 +208,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Float < Rt.dtype() ? Type.Float : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Float][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -202,9 +242,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Int64 < Rt.dtype() ? Type.Int64 : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Int64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -232,9 +277,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Uint64 < Rt.dtype() ? Type.Uint64 : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Uint64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -261,9 +311,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Int32 < Rt.dtype() ? Type.Int32 : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Int32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -291,9 +346,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Uint32 < Rt.dtype() ? Type.Uint32 : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Uint32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -320,9 +380,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Int16 < Rt.dtype() ? Type.Int16 : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Int16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -350,9 +415,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Uint16 < Rt.dtype() ? Type.Uint16 : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Uint16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -379,9 +449,14 @@ namespace cytnx { // Tensor out(Rt.shape(), Type.Bool < Rt.dtype() ? Type.Bool : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Type.Bool][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -408,9 +483,19 @@ namespace cytnx { Rt._impl->storage().size(), lc.dtype() < Rt.dtype() ? lc.dtype() : Rt.dtype(), Rt.device()); if (Rt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[lc.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); + }, + Tensor::from_storage(Cnst).ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -437,9 +522,14 @@ namespace cytnx { // Tensor out(Lt.shape(), Type.ComplexDouble, Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.ComplexDouble]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -468,9 +558,14 @@ namespace cytnx { // Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.ComplexFloat]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -498,9 +593,14 @@ namespace cytnx { // Tensor out(Lt.shape(), Type.Double < Lt.dtype() ? Type.Double : Lt.dtype(), Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Double]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -527,9 +627,14 @@ namespace cytnx { // Tensor out(Lt.shape(), Type.Float < Lt.dtype() ? Type.Float : Lt.dtype(), Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Float]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -556,9 +661,14 @@ namespace cytnx { // Tensor out(Lt.shape(), Type.Int64 < Lt.dtype() ? Type.Int64 : Lt.dtype(), Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Int64]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -586,9 +696,14 @@ namespace cytnx { // Tensor out(Lt.shape(), Type.Uint64 < Lt.dtype() ? Type.Uint64 : Lt.dtype(), Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Uint64]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -615,9 +730,14 @@ namespace cytnx { // Tensor out(Lt.shape(), Type.Int32 < Lt.dtype() ? Type.Int32 : Lt.dtype(), Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Int32]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -645,9 +765,14 @@ namespace cytnx { // Tensor out(Lt.shape(), Type.Uint32 < Lt.dtype() ? Type.Uint32 : Lt.dtype(), Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Uint32]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -674,9 +799,14 @@ namespace cytnx { // Tensor out(Lt.shape(), Type.Int16 < Lt.dtype() ? Type.Int16 : Lt.dtype(), Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Int16]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -704,9 +834,14 @@ namespace cytnx { // Tensor out(Lt.shape(), Type.Uint16 < Lt.dtype() ? Type.Uint16 : Lt.dtype(), Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Uint16]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -733,9 +868,14 @@ namespace cytnx { // Tensor out(Lt.shape(), Type.Bool < Lt.dtype() ? Type.Bool : Lt.dtype(), Lt.device()); if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.Ari_ii[Lt.dtype()][Type.Bool]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); @@ -762,9 +902,19 @@ namespace cytnx { Lt._impl->storage().size(), Lt.dtype() < rc.dtype() ? Lt.dtype() : rc.dtype(), Lt.device()); if (Lt.device() == Device.cpu) { - linalg_internal::lii.Ari_ii[Lt.dtype()][rc.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::SubInternalImpl( + out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}); + }, + Tensor::from_storage(Cnst).ptr()); + }, + Lt.ptr()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); From c483252c77f525b3964118a6df769e2eee40817e Mon Sep 17 00:00:00 2001 From: Ying-Jer Kao Date: Mon, 30 Mar 2026 14:56:05 +0800 Subject: [PATCH 2/7] linalg: use std::visit for inplace cpu arithmetic dispatch --- src/linalg/iAdd.cpp | 16 ++--- src/linalg/iArithmetic_visit.hpp | 111 +++++++++++++++++++++++++++++++ src/linalg/iDiv.cpp | 13 ++-- src/linalg/iMul.cpp | 13 ++-- src/linalg/iSub.cpp | 13 ++-- 5 files changed, 132 insertions(+), 34 deletions(-) create mode 100644 src/linalg/iArithmetic_visit.hpp diff --git a/src/linalg/iAdd.cpp b/src/linalg/iAdd.cpp index ab062fcbb..183163404 100644 --- a/src/linalg/iAdd.cpp +++ b/src/linalg/iAdd.cpp @@ -5,6 +5,7 @@ #ifdef BACKEND_TORCH #else #include "backend/linalg_internal_interface.hpp" + #include "iArithmetic_visit.hpp" namespace cytnx { namespace linalg { @@ -19,9 +20,6 @@ namespace cytnx { Lt.shape().size(), Rt.shape().size(), "\n"); } - // std::cout << "iadd entry" << std::endl; - Storage nulls; - Tensor R; if (Lt._impl->storage()._impl == Rt._impl->storage()._impl) { R = Rt.clone(); @@ -29,13 +27,13 @@ namespace cytnx { R = Rt; } + static const std::vector empty_mapper; + // if contiguous, then no need to calculate the mappers if ((Lt.is_contiguous() && Rt.is_contiguous())) { // contiguous section. if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.iAri_ii[Lt.dtype()][Rt.dtype()]( - nulls._impl, Lt._impl->storage()._impl, R._impl->storage()._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 0); + detail::DispatchInplaceArithmeticCPU<0>(Lt, R, empty_mapper, empty_mapper, empty_mapper); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -59,10 +57,8 @@ namespace cytnx { } else { // non-contiguous section if (Lt.device() == Device.cpu) { - linalg_internal::lii.iAri_ii[Lt.dtype()][Rt.dtype()]( - nulls._impl, Lt._impl->storage()._impl, R._impl->storage()._impl, - Lt._impl->storage()._impl->size(), Lt._impl->shape(), Lt._impl->invmapper(), - Rt._impl->invmapper(), 0); + detail::DispatchInplaceArithmeticCPU<0>(Lt, R, Lt._impl->shape(), Lt._impl->invmapper(), + Rt._impl->invmapper()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); diff --git a/src/linalg/iArithmetic_visit.hpp b/src/linalg/iArithmetic_visit.hpp new file mode 100644 index 000000000..33225edb3 --- /dev/null +++ b/src/linalg/iArithmetic_visit.hpp @@ -0,0 +1,111 @@ +#ifndef CYTNX_LINALG_IARITHMETIC_VISIT_HPP_ +#define CYTNX_LINALG_IARITHMETIC_VISIT_HPP_ + +#include +#include + +#include "Tensor.hpp" +#include "utils/utils.hpp" + +namespace cytnx { + namespace linalg { + namespace detail { + + template + inline void ApplyInplaceArithmeticOp(TLin &lhs, const TRin &rhs) { + if constexpr (!cytnx::is_complex_v && cytnx::is_complex_v) { + if constexpr (op_code == 0) { + cytnx_error_msg(true, "[ERROR][iadd] Cannot perform real+=complex%s", "\n"); + } else if constexpr (op_code == 1) { + cytnx_error_msg(true, "[ERROR][imul] Cannot perform real+=complex%s", "\n"); + } else if constexpr (op_code == 2) { + cytnx_error_msg(true, "[ERROR][isub] Cannot perform real+=complex%s", "\n"); + } else if constexpr (op_code == 3) { + cytnx_error_msg(true, "[ERROR][idiv] Cannot perform real+=complex%s", "\n"); + } + } else { + if constexpr (op_code == 0) { + lhs += rhs; + } else if constexpr (op_code == 1) { + lhs *= rhs; + } else if constexpr (op_code == 2) { + lhs -= rhs; + } else if constexpr (op_code == 3) { + lhs /= rhs; + } + } + } + + template + inline void ApplyInplaceArithmeticKernel( + TLin *lhs, const TRin *rhs, const cytnx_uint64 &len, const bool &rhs_is_scalar, + const std::vector &shape, const std::vector &invmapper_L, + const std::vector &invmapper_R) { + if (rhs_is_scalar) { + for (cytnx_uint64 i = 0; i < len; i++) { + ApplyInplaceArithmeticOp(lhs[i], rhs[0]); + } + return; + } + + if (shape.empty()) { + for (cytnx_uint64 i = 0; i < len; i++) { + ApplyInplaceArithmeticOp(lhs[i], rhs[i]); + } + return; + } + + std::vector accu_shape(shape.size()); + std::vector old_accu_shapeL(shape.size()), old_accu_shapeR(shape.size()); + cytnx_uint64 tmp1 = 1, tmp2 = 1, tmp3 = 1; + for (cytnx_uint64 i = 0; i < shape.size(); i++) { + accu_shape[shape.size() - 1 - i] = tmp1; + tmp1 *= shape[shape.size() - 1 - i]; + + old_accu_shapeL[shape.size() - 1 - i] = tmp2; + tmp2 *= shape[invmapper_L[shape.size() - 1 - i]]; + + old_accu_shapeR[shape.size() - 1 - i] = tmp3; + tmp3 *= shape[invmapper_R[shape.size() - 1 - i]]; + } + + for (cytnx_uint64 i = 0; i < len; i++) { + std::vector tmpv = cytnx::c2cartesian(i, accu_shape); + cytnx_uint64 idx_L = + cytnx::cartesian2c(cytnx::vec_map(tmpv, invmapper_L), old_accu_shapeL); + cytnx_uint64 idx_R = + cytnx::cartesian2c(cytnx::vec_map(tmpv, invmapper_R), old_accu_shapeR); + ApplyInplaceArithmeticOp(lhs[idx_L], rhs[idx_R]); + } + } + + template + inline void DispatchInplaceArithmeticCPU(Tensor &Lt, const Tensor &Rt, + const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R) { + const cytnx_uint64 len = Lt._impl->storage()._impl->size(); + const bool rhs_is_scalar = (Rt._impl->storage()._impl->size() == 1); + + std::visit( + [&](auto *lptr) { + using TL = std::remove_pointer_t; + static_assert(!std::is_same_v); + + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + static_assert(!std::is_same_v); + ApplyInplaceArithmeticKernel(lptr, rptr, len, rhs_is_scalar, shape, + invmapper_L, invmapper_R); + }, + Rt.ptr()); + }, + Lt.ptr()); + } + + } // namespace detail + } // namespace linalg +} // namespace cytnx + +#endif // CYTNX_LINALG_IARITHMETIC_VISIT_HPP_ diff --git a/src/linalg/iDiv.cpp b/src/linalg/iDiv.cpp index 0cb35a999..89abe4b29 100644 --- a/src/linalg/iDiv.cpp +++ b/src/linalg/iDiv.cpp @@ -5,6 +5,7 @@ #ifdef BACKEND_TORCH #else #include "backend/linalg_internal_interface.hpp" + #include "iArithmetic_visit.hpp" namespace cytnx { namespace linalg { @@ -27,14 +28,12 @@ namespace cytnx { R = Rt; } - Storage nulls; + static const std::vector empty_mapper; // if contiguous, then no need to calculate the mappers if ((Lt.is_contiguous() && Rt.is_contiguous())) { // contiguous section. if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.iAri_ii[Lt.dtype()][Rt.dtype()]( - nulls._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + detail::DispatchInplaceArithmeticCPU<3>(Lt, R, empty_mapper, empty_mapper, empty_mapper); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -58,10 +57,8 @@ namespace cytnx { } else { // non-contiguous section if (Lt.device() == Device.cpu) { - linalg_internal::lii.iAri_ii[Lt.dtype()][Rt.dtype()]( - nulls._impl, Lt._impl->storage()._impl, R._impl->storage()._impl, - Lt._impl->storage()._impl->size(), Lt._impl->shape(), Lt._impl->invmapper(), - Rt._impl->invmapper(), 3); + detail::DispatchInplaceArithmeticCPU<3>(Lt, R, Lt._impl->shape(), Lt._impl->invmapper(), + Rt._impl->invmapper()); } else { #ifdef UNI_GPU cytnx_error_msg(true, diff --git a/src/linalg/iMul.cpp b/src/linalg/iMul.cpp index b6d8e47dd..b54993ccf 100644 --- a/src/linalg/iMul.cpp +++ b/src/linalg/iMul.cpp @@ -5,6 +5,7 @@ #ifdef BACKEND_TORCH #else #include "backend/linalg_internal_interface.hpp" + #include "iArithmetic_visit.hpp" namespace cytnx { namespace linalg { @@ -27,14 +28,12 @@ namespace cytnx { R = Rt; } - Storage nulls; + static const std::vector empty_mapper; // if contiguous, then no need to calculate the mappers if ((Lt.is_contiguous() && Rt.is_contiguous())) { // contiguous section. if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.iAri_ii[Lt.dtype()][Rt.dtype()]( - nulls._impl, Lt._impl->storage()._impl, R._impl->storage()._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 1); + detail::DispatchInplaceArithmeticCPU<1>(Lt, R, empty_mapper, empty_mapper, empty_mapper); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -58,10 +57,8 @@ namespace cytnx { } else { // non-contiguous section if (Lt.device() == Device.cpu) { - linalg_internal::lii.iAri_ii[Lt.dtype()][Rt.dtype()]( - nulls._impl, Lt._impl->storage()._impl, R._impl->storage()._impl, - Lt._impl->storage()._impl->size(), Lt._impl->shape(), Lt._impl->invmapper(), - Rt._impl->invmapper(), 1); + detail::DispatchInplaceArithmeticCPU<1>(Lt, R, Lt._impl->shape(), Lt._impl->invmapper(), + Rt._impl->invmapper()); } else { #ifdef UNI_GPU cytnx_error_msg(true, diff --git a/src/linalg/iSub.cpp b/src/linalg/iSub.cpp index f1a8c52c4..f98a5ad45 100644 --- a/src/linalg/iSub.cpp +++ b/src/linalg/iSub.cpp @@ -5,6 +5,7 @@ #ifdef BACKEND_TORCH #else #include "backend/linalg_internal_interface.hpp" + #include "iArithmetic_visit.hpp" namespace cytnx { namespace linalg { @@ -27,14 +28,12 @@ namespace cytnx { R = Rt; } - Storage nulls; + static const std::vector empty_mapper; // if contiguous, then no need to calculate the mappers if ((Lt.is_contiguous() && Rt.is_contiguous())) { // contiguous section. if (Lt.device() == Device.cpu) { - cytnx::linalg_internal::lii.iAri_ii[Lt.dtype()][Rt.dtype()]( - nulls._impl, Lt._impl->storage()._impl, R._impl->storage()._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + detail::DispatchInplaceArithmeticCPU<2>(Lt, R, empty_mapper, empty_mapper, empty_mapper); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); @@ -58,10 +57,8 @@ namespace cytnx { } else { // non-contiguous section if (Lt.device() == Device.cpu) { - linalg_internal::lii.iAri_ii[Lt.dtype()][Rt.dtype()]( - nulls._impl, Lt._impl->storage()._impl, R._impl->storage()._impl, - Lt._impl->storage()._impl->size(), Lt._impl->shape(), Lt._impl->invmapper(), - Rt._impl->invmapper(), 2); + detail::DispatchInplaceArithmeticCPU<2>(Lt, R, Lt._impl->shape(), Lt._impl->invmapper(), + Rt._impl->invmapper()); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); From 17b60364d02505fb989cb11ae0c7d90cda426765 Mon Sep 17 00:00:00 2001 From: Ying-Jer Kao Date: Mon, 30 Mar 2026 16:00:33 +0800 Subject: [PATCH 3/7] linalg: remove dead inplace arithmetic dispatch table --- .../linalg_internal_cpu/CMakeLists.txt | 2 - .../iArithmetic_internal.cpp | 2239 ----------------- .../iArithmetic_internal.hpp | 755 ------ src/backend/linalg_internal_interface.cpp | 135 - src/backend/linalg_internal_interface.hpp | 2 - 5 files changed, 3133 deletions(-) delete mode 100644 src/backend/linalg_internal_cpu/iArithmetic_internal.cpp delete mode 100644 src/backend/linalg_internal_cpu/iArithmetic_internal.hpp diff --git a/src/backend/linalg_internal_cpu/CMakeLists.txt b/src/backend/linalg_internal_cpu/CMakeLists.txt index 3f00ecb4f..0c0bc89c7 100644 --- a/src/backend/linalg_internal_cpu/CMakeLists.txt +++ b/src/backend/linalg_internal_cpu/CMakeLists.txt @@ -3,7 +3,6 @@ target_sources_local(cytnx Norm_internal.hpp Add_internal.hpp iAdd_internal.hpp - iArithmetic_internal.hpp Conj_inplace_internal.hpp Cpr_internal.hpp Diag_internal.hpp @@ -45,7 +44,6 @@ target_sources_local(cytnx memcpyTruncation.hpp iAdd_internal.cpp - iArithmetic_internal.cpp Conj_inplace_internal.cpp Diag_internal.cpp iDiv_internal.cpp diff --git a/src/backend/linalg_internal_cpu/iArithmetic_internal.cpp b/src/backend/linalg_internal_cpu/iArithmetic_internal.cpp deleted file mode 100644 index 1b42efecf..000000000 --- a/src/backend/linalg_internal_cpu/iArithmetic_internal.cpp +++ /dev/null @@ -1,2239 +0,0 @@ -#include "iArithmetic_internal.hpp" - -namespace cytnx { - namespace linalg_internal { - - void iArithmetic_internal_cdtcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cdtcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cdtcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cdtcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cdtcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cdtcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cdtcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cdtcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cdtcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cdtcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cdtd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cdtd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cdtd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cdtd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cdtd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cdtf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cdtf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cdtf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cdtf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cdtf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cdtu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cdtu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cdtu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cdtu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cdtu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cdti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cdti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cdti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cdti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cdti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cdtu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cdtu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cdtu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cdtu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cdtu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cdti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cdti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cdti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cdti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cdti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cdti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cdti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cdti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cdti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cdti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cdtu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cdtu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cdtu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cdtu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cdtu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cdtb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cdtb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cdtb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cdtb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cdtb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - //----------------------- - void iArithmetic_internal_cftcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cftcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cftcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cftcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cftcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cftcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cftcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cftcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cftcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cftcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cftd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cftd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cftd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cftd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cftd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cftf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cftf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cftf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cftf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cftf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cftu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cftu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cftu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cftu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cftu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cfti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cfti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cfti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cfti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cfti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cftu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cftu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cftu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cftu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cftu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cfti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cfti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cfti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cfti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cfti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cfti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cfti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cfti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cfti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cfti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cftu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cftu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cftu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cftu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cftu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_cftb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_cftb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_cftb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_cftb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_cftb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - //--------------------------- - void iArithmetic_internal_dtcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_dtcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_dtcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_dtcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_dtcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_dtcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_dtcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_dtcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_dtcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_dtcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_dtd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_dtd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_dtd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_dtd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_dtd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_dtf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_dtf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_dtf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_dtf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_dtf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_dtu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_dtu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_dtu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_dtu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_dtu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_dti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_dti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_dti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_dti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_dti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_dtu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_dtu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_dtu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_dtu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_dtu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_dti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_dti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_dti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_dti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_dti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_dti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_dti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_dti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_dti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_dti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_dtu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_dtu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_dtu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_dtu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_dtu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_dtb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_dtb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_dtb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_dtb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_dtb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - //---------------------- - void iArithmetic_internal_ftcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_ftcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_ftcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_ftcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_ftcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_ftcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_ftcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_ftcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_ftcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_ftcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_ftd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_ftd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_ftd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_ftd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_ftd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_ftf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_ftf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_ftf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_ftf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_ftf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_ftu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_ftu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_ftu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_ftu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_ftu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_fti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_fti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_fti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_fti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_fti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_ftu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_ftu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_ftu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_ftu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_ftu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_fti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_fti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_fti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_fti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_fti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_fti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_fti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_fti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_fti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_fti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_ftu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_ftu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_ftu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_ftu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_ftu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_ftb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_ftb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_ftb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_ftb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_ftb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - - //----------------------- - void iArithmetic_internal_u64tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u64tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u64tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u64tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u64tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u64tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u64tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u64tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u64tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u64tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u64td( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u64td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u64td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u64td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u64td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u64tf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u64tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u64tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u64tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u64tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u64tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u64tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u64tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u64tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u64tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u64ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u64ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u64ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u64ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u64ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u64tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u64tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u64tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u64tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u64tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u64ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u64ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u64ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u64ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u64ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u64ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u64ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u64ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u64ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u64ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u64tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u64tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u64tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u64tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u64tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u64tb( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u64tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u64tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u64tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u64tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - //---------------------- - void iArithmetic_internal_i64tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i64tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i64tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i64tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i64tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i64tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i64tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i64tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i64tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i64tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i64td( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i64td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i64td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i64td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i64td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i64tf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i64tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i64tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i64tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i64tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i64tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i64tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i64tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i64tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i64tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i64ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i64ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i64ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i64ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i64ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i64tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i64tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i64tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i64tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i64tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i64ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i64ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i64ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i64ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i64ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i64ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i64ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i64ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i64ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i64tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i64tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i64tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i64tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i64tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i64tb( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i64tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i64tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i64tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i64tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - - //------------------- - void iArithmetic_internal_u32tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u32tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u32tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u32tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u32tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u32tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u32tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u32tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u32tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u32tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u32td( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u32td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u32td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u32td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u32td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u32tf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u32tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u32tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u32tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u32tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u32tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u32tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u32tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u32tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u32tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u32ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u32ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u32ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u32ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u32ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u32tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u32tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u32tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u32tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u32tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u32ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u32ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u32ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u32ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u32ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u32ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u32ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u32ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u32ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u32ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u32tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u32tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u32tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u32tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u32tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u32tb( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u32tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u32tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u32tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u32tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - - //---------------------- - void iArithmetic_internal_i32tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i32tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i32tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i32tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i32tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i32tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i32tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i32tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i32tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i32tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i32td( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i32td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i32td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i32td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i32td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i32tf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i32tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i32tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i32tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i32tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i32tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i32tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i32tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i32tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i32tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i32ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i32ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i32ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i32ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i32ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i32tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i32tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i32tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i32tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i32tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i32ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i32ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i32ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i32ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i32ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i32ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i32ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i32ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i32ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i32ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i32tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i32tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i32tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i32tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i32tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i32tb( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i32tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i32tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i32tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i32tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - //---------------------- - void iArithmetic_internal_i16tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i16tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i16tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i16tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i16tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i16tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i16tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i16tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i16tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i16tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i16td( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i16td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i16td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i16td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i16td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i16tf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i16tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i16tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i16tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i16tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i16tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i16tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i16tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i16tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i16tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i16ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i16ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i16ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i16ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i16ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i16tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i16tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i16tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i16tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i16tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i16ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i16ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i16ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i16ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i16ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i16ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i16ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i16ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i16ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i16ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i16tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i16tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i16tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i16tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i16tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_i16tb( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_i16tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_i16tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_i16tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_i16tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - - //---------------------- - void iArithmetic_internal_u16tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u16tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u16tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u16tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u16tcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u16tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u16tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u16tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u16tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u16tcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u16td( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u16td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u16td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u16td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u16td(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u16tf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u16tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u16tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u16tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u16tf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u16tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u16tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u16tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u16tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u16tu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u16ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u16ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u16ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u16ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u16ti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u16tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u16tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u16tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u16tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u16tu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u16ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u16ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u16ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u16ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u16ti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u16ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u16ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u16ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u16ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u16ti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u16tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u16tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u16tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u16tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u16tu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_u16tb( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_u16tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_u16tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_u16tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_u16tb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - //---------------------- - void iArithmetic_internal_btcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_btcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_btcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_btcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_btcd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_btcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_btcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_btcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_btcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_btcf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_btd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_btd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_btd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_btd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_btd(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_btf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_btf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_btf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_btf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_btf(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_btu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_btu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_btu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_btu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_btu64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_bti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_bti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_bti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_bti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_bti64(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_btu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_btu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_btu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_btu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_btu32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_bti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_bti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_bti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_bti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_bti32(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_bti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_bti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_bti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_bti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_bti16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_btu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_btu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_btu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_btu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_btu16(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - void iArithmetic_internal_btb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type) { - if (type == 0) - cytnx::linalg_internal::iAdd_internal_btb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 1) - cytnx::linalg_internal::iMul_internal_btb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 2) - cytnx::linalg_internal::iSub_internal_btb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - else if (type == 3) - cytnx::linalg_internal::iDiv_internal_btb(out, Lin, Rin, len, shape, invmapper_L, - invmapper_R); - } - - } // namespace linalg_internal -} // namespace cytnx diff --git a/src/backend/linalg_internal_cpu/iArithmetic_internal.hpp b/src/backend/linalg_internal_cpu/iArithmetic_internal.hpp deleted file mode 100644 index 1460c172c..000000000 --- a/src/backend/linalg_internal_cpu/iArithmetic_internal.hpp +++ /dev/null @@ -1,755 +0,0 @@ -#ifndef CYTNX_BACKEND_LINALG_INTERNAL_CPU_IARITHMETIC_INTERNAL_H_ -#define CYTNX_BACKEND_LINALG_INTERNAL_CPU_IARITHMETIC_INTERNAL_H_ - -#include "Type.hpp" -#include "backend/Storage.hpp" -#include "iAdd_internal.hpp" -#include "iMul_internal.hpp" -#include "iSub_internal.hpp" -#include "iDiv_internal.hpp" -namespace cytnx { - namespace linalg_internal { - - /// iArithmetic - void iArithmetic_internal_cdtcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cdtcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cdtd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cdtf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cdti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cdtu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cdti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cdtu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cdtu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cdti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cdtb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void iArithmetic_internal_cftcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cftcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cftd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cftf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cfti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cftu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cfti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cftu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cftu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cfti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_cftb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void iArithmetic_internal_dtcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_dtcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_dtd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_dtf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_dti64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_dtu64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_dti32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_dtu32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_dtu16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_dti16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_dtb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void iArithmetic_internal_ftcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_ftcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_ftd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_ftf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_fti64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_ftu64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_fti32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_ftu32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_ftu16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_fti16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_ftb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void iArithmetic_internal_i64tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i64tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i64td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i64tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i64ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i64tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i64ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i64tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i64tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i64ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i64tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void iArithmetic_internal_u64tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u64tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u64td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u64tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u64ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u64tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u64ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u64tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u64ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u64tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u64tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void iArithmetic_internal_i32tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i32tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i32td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i32tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i32ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i32tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i32ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i32tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i32tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i32ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i32tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void iArithmetic_internal_u32tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u32tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u32td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u32tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u32ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u32tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u32ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u32tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u32tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u32ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u32tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void iArithmetic_internal_i16tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i16tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i16td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i16tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i16ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i16tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i16ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i16tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i16tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i16ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_i16tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void iArithmetic_internal_u16tcd( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u16tcf( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u16td(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u16tf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u16ti64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u16tu64( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u16ti32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u16tu32( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u16tu16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u16ti16( - boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, const unsigned long long &len, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_u16tb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - void iArithmetic_internal_btcd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_btcf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_btd(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_btf(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_bti64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_btu64(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_bti32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_btu32(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_btu16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_bti16(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - void iArithmetic_internal_btb(boost::intrusive_ptr &out, - boost::intrusive_ptr &Lin, - boost::intrusive_ptr &Rin, - const unsigned long long &len, - const std::vector &shape, - const std::vector &invmapper_L, - const std::vector &invmapper_R, const char &type); - - } // namespace linalg_internal -} // namespace cytnx - -#endif // CYTNX_BACKEND_LINALG_INTERNAL_CPU_IARITHMETIC_INTERNAL_H_ diff --git a/src/backend/linalg_internal_interface.cpp b/src/backend/linalg_internal_interface.cpp index fbb800d98..18925dea8 100644 --- a/src/backend/linalg_internal_interface.cpp +++ b/src/backend/linalg_internal_interface.cpp @@ -13,141 +13,6 @@ namespace cytnx { linalg_internal_interface::~linalg_internal_interface() {} linalg_internal_interface::linalg_internal_interface() { - iAri_ii = - vector>(N_Type, vector(N_Type, NULL)); - - iAri_ii[Type.ComplexDouble][Type.ComplexDouble] = iArithmetic_internal_cdtcd; - iAri_ii[Type.ComplexDouble][Type.ComplexFloat] = iArithmetic_internal_cdtcf; - iAri_ii[Type.ComplexDouble][Type.Double] = iArithmetic_internal_cdtd; - iAri_ii[Type.ComplexDouble][Type.Float] = iArithmetic_internal_cdtf; - iAri_ii[Type.ComplexDouble][Type.Int64] = iArithmetic_internal_cdti64; - iAri_ii[Type.ComplexDouble][Type.Uint64] = iArithmetic_internal_cdtu64; - iAri_ii[Type.ComplexDouble][Type.Int32] = iArithmetic_internal_cdti32; - iAri_ii[Type.ComplexDouble][Type.Uint32] = iArithmetic_internal_cdtu32; - iAri_ii[Type.ComplexDouble][Type.Uint16] = iArithmetic_internal_cdtu16; - iAri_ii[Type.ComplexDouble][Type.Int16] = iArithmetic_internal_cdti16; - iAri_ii[Type.ComplexDouble][Type.Bool] = iArithmetic_internal_cdtb; - - iAri_ii[Type.ComplexFloat][Type.ComplexDouble] = iArithmetic_internal_cftcd; - iAri_ii[Type.ComplexFloat][Type.ComplexFloat] = iArithmetic_internal_cftcf; - iAri_ii[Type.ComplexFloat][Type.Double] = iArithmetic_internal_cftd; - iAri_ii[Type.ComplexFloat][Type.Float] = iArithmetic_internal_cftf; - iAri_ii[Type.ComplexFloat][Type.Int64] = iArithmetic_internal_cfti64; - iAri_ii[Type.ComplexFloat][Type.Uint64] = iArithmetic_internal_cftu64; - iAri_ii[Type.ComplexFloat][Type.Int32] = iArithmetic_internal_cfti32; - iAri_ii[Type.ComplexFloat][Type.Uint32] = iArithmetic_internal_cftu32; - iAri_ii[Type.ComplexFloat][Type.Uint16] = iArithmetic_internal_cftu16; - iAri_ii[Type.ComplexFloat][Type.Int16] = iArithmetic_internal_cfti16; - iAri_ii[Type.ComplexFloat][Type.Bool] = iArithmetic_internal_cftb; - - iAri_ii[Type.Double][Type.ComplexDouble] = iArithmetic_internal_dtcd; - iAri_ii[Type.Double][Type.ComplexFloat] = iArithmetic_internal_dtcf; - iAri_ii[Type.Double][Type.Double] = iArithmetic_internal_dtd; - iAri_ii[Type.Double][Type.Float] = iArithmetic_internal_dtf; - iAri_ii[Type.Double][Type.Int64] = iArithmetic_internal_dti64; - iAri_ii[Type.Double][Type.Uint64] = iArithmetic_internal_dtu64; - iAri_ii[Type.Double][Type.Int32] = iArithmetic_internal_dti32; - iAri_ii[Type.Double][Type.Uint32] = iArithmetic_internal_dtu32; - iAri_ii[Type.Double][Type.Uint16] = iArithmetic_internal_dtu16; - iAri_ii[Type.Double][Type.Int16] = iArithmetic_internal_dti16; - iAri_ii[Type.Double][Type.Bool] = iArithmetic_internal_dtb; - - iAri_ii[Type.Float][Type.ComplexDouble] = iArithmetic_internal_ftcd; - iAri_ii[Type.Float][Type.ComplexFloat] = iArithmetic_internal_ftcf; - iAri_ii[Type.Float][Type.Double] = iArithmetic_internal_ftd; - iAri_ii[Type.Float][Type.Float] = iArithmetic_internal_ftf; - iAri_ii[Type.Float][Type.Int64] = iArithmetic_internal_fti64; - iAri_ii[Type.Float][Type.Uint64] = iArithmetic_internal_ftu64; - iAri_ii[Type.Float][Type.Int32] = iArithmetic_internal_fti32; - iAri_ii[Type.Float][Type.Uint32] = iArithmetic_internal_ftu32; - iAri_ii[Type.Float][Type.Uint16] = iArithmetic_internal_ftu16; - iAri_ii[Type.Float][Type.Int16] = iArithmetic_internal_fti16; - iAri_ii[Type.Float][Type.Bool] = iArithmetic_internal_ftb; - - iAri_ii[Type.Int64][Type.ComplexDouble] = iArithmetic_internal_i64tcd; - iAri_ii[Type.Int64][Type.ComplexFloat] = iArithmetic_internal_i64tcf; - iAri_ii[Type.Int64][Type.Double] = iArithmetic_internal_i64td; - iAri_ii[Type.Int64][Type.Float] = iArithmetic_internal_i64tf; - iAri_ii[Type.Int64][Type.Int64] = iArithmetic_internal_i64ti64; - iAri_ii[Type.Int64][Type.Uint64] = iArithmetic_internal_i64tu64; - iAri_ii[Type.Int64][Type.Int32] = iArithmetic_internal_i64ti32; - iAri_ii[Type.Int64][Type.Uint32] = iArithmetic_internal_i64tu32; - iAri_ii[Type.Int64][Type.Uint16] = iArithmetic_internal_i64tu16; - iAri_ii[Type.Int64][Type.Int16] = iArithmetic_internal_i64ti16; - iAri_ii[Type.Int64][Type.Bool] = iArithmetic_internal_i64tb; - - iAri_ii[Type.Uint64][Type.ComplexDouble] = iArithmetic_internal_u64tcd; - iAri_ii[Type.Uint64][Type.ComplexFloat] = iArithmetic_internal_u64tcf; - iAri_ii[Type.Uint64][Type.Double] = iArithmetic_internal_u64td; - iAri_ii[Type.Uint64][Type.Float] = iArithmetic_internal_u64tf; - iAri_ii[Type.Uint64][Type.Int64] = iArithmetic_internal_u64ti64; - iAri_ii[Type.Uint64][Type.Uint64] = iArithmetic_internal_u64tu64; - iAri_ii[Type.Uint64][Type.Int32] = iArithmetic_internal_u64ti32; - iAri_ii[Type.Uint64][Type.Uint32] = iArithmetic_internal_u64tu32; - iAri_ii[Type.Uint64][Type.Uint16] = iArithmetic_internal_u64tu16; - iAri_ii[Type.Uint64][Type.Int16] = iArithmetic_internal_u64ti16; - iAri_ii[Type.Uint64][Type.Bool] = iArithmetic_internal_u64tb; - - iAri_ii[Type.Int32][Type.ComplexDouble] = iArithmetic_internal_i32tcd; - iAri_ii[Type.Int32][Type.ComplexFloat] = iArithmetic_internal_i32tcf; - iAri_ii[Type.Int32][Type.Double] = iArithmetic_internal_i32td; - iAri_ii[Type.Int32][Type.Float] = iArithmetic_internal_i32tf; - iAri_ii[Type.Int32][Type.Int64] = iArithmetic_internal_i32ti64; - iAri_ii[Type.Int32][Type.Uint64] = iArithmetic_internal_i32tu64; - iAri_ii[Type.Int32][Type.Int32] = iArithmetic_internal_i32ti32; - iAri_ii[Type.Int32][Type.Uint32] = iArithmetic_internal_i32tu32; - iAri_ii[Type.Int32][Type.Uint16] = iArithmetic_internal_i32tu16; - iAri_ii[Type.Int32][Type.Int16] = iArithmetic_internal_i32ti16; - iAri_ii[Type.Int32][Type.Bool] = iArithmetic_internal_i32tb; - - iAri_ii[Type.Uint32][Type.ComplexDouble] = iArithmetic_internal_u32tcd; - iAri_ii[Type.Uint32][Type.ComplexFloat] = iArithmetic_internal_u32tcf; - iAri_ii[Type.Uint32][Type.Double] = iArithmetic_internal_u32td; - iAri_ii[Type.Uint32][Type.Float] = iArithmetic_internal_u32tf; - iAri_ii[Type.Uint32][Type.Int64] = iArithmetic_internal_u32ti64; - iAri_ii[Type.Uint32][Type.Uint64] = iArithmetic_internal_u32tu64; - iAri_ii[Type.Uint32][Type.Int32] = iArithmetic_internal_u32ti32; - iAri_ii[Type.Uint32][Type.Uint32] = iArithmetic_internal_u32tu32; - iAri_ii[Type.Uint32][Type.Uint16] = iArithmetic_internal_u32tu16; - iAri_ii[Type.Uint32][Type.Int16] = iArithmetic_internal_u32ti16; - iAri_ii[Type.Uint32][Type.Bool] = iArithmetic_internal_u32tb; - - iAri_ii[Type.Int16][Type.ComplexDouble] = iArithmetic_internal_i16tcd; - iAri_ii[Type.Int16][Type.ComplexFloat] = iArithmetic_internal_i16tcf; - iAri_ii[Type.Int16][Type.Double] = iArithmetic_internal_i16td; - iAri_ii[Type.Int16][Type.Float] = iArithmetic_internal_i16tf; - iAri_ii[Type.Int16][Type.Int64] = iArithmetic_internal_i16ti64; - iAri_ii[Type.Int16][Type.Uint64] = iArithmetic_internal_i16tu64; - iAri_ii[Type.Int16][Type.Int32] = iArithmetic_internal_i16ti32; - iAri_ii[Type.Int16][Type.Uint32] = iArithmetic_internal_i16tu32; - iAri_ii[Type.Int16][Type.Uint16] = iArithmetic_internal_i16tu16; - iAri_ii[Type.Int16][Type.Int16] = iArithmetic_internal_i16ti16; - iAri_ii[Type.Int16][Type.Bool] = iArithmetic_internal_i16tb; - - iAri_ii[Type.Uint16][Type.ComplexDouble] = iArithmetic_internal_u16tcd; - iAri_ii[Type.Uint16][Type.ComplexFloat] = iArithmetic_internal_u16tcf; - iAri_ii[Type.Uint16][Type.Double] = iArithmetic_internal_u16td; - iAri_ii[Type.Uint16][Type.Float] = iArithmetic_internal_u16tf; - iAri_ii[Type.Uint16][Type.Int64] = iArithmetic_internal_u16ti64; - iAri_ii[Type.Uint16][Type.Uint64] = iArithmetic_internal_u16tu64; - iAri_ii[Type.Uint16][Type.Int32] = iArithmetic_internal_u16ti32; - iAri_ii[Type.Uint16][Type.Uint32] = iArithmetic_internal_u16tu32; - iAri_ii[Type.Uint16][Type.Uint16] = iArithmetic_internal_u16tu16; - iAri_ii[Type.Uint16][Type.Int16] = iArithmetic_internal_u16ti16; - iAri_ii[Type.Uint16][Type.Bool] = iArithmetic_internal_u16tb; - - iAri_ii[Type.Bool][Type.ComplexDouble] = iArithmetic_internal_btcd; - iAri_ii[Type.Bool][Type.ComplexFloat] = iArithmetic_internal_btcf; - iAri_ii[Type.Bool][Type.Double] = iArithmetic_internal_btd; - iAri_ii[Type.Bool][Type.Float] = iArithmetic_internal_btf; - iAri_ii[Type.Bool][Type.Int64] = iArithmetic_internal_bti64; - iAri_ii[Type.Bool][Type.Uint64] = iArithmetic_internal_btu64; - iAri_ii[Type.Bool][Type.Int32] = iArithmetic_internal_bti32; - iAri_ii[Type.Bool][Type.Uint32] = iArithmetic_internal_btu32; - iAri_ii[Type.Bool][Type.Uint16] = iArithmetic_internal_btu16; - iAri_ii[Type.Bool][Type.Int16] = iArithmetic_internal_bti16; - iAri_ii[Type.Bool][Type.Bool] = iArithmetic_internal_btb; - //===================== QR_ii = vector(5); diff --git a/src/backend/linalg_internal_interface.hpp b/src/backend/linalg_internal_interface.hpp index df53fa504..91916f60c 100644 --- a/src/backend/linalg_internal_interface.hpp +++ b/src/backend/linalg_internal_interface.hpp @@ -41,7 +41,6 @@ #include "linalg_internal_cpu/Trace_internal.hpp" #include "linalg_internal_cpu/Tridiag_internal.hpp" #include "linalg_internal_cpu/Vectordot_internal.hpp" -#include "linalg_internal_cpu/iArithmetic_internal.hpp" #include "linalg_internal_cpu/memcpyTruncation.hpp" #ifdef UNI_GPU @@ -217,7 +216,6 @@ namespace cytnx { #endif class linalg_internal_interface { public: - std::vector> iAri_ii; std::vector Sdd_ii; std::vector Gesvd_ii; std::vector Eigh_ii; From 965a109ebb631d88c83845a6ef6b05dddeb9111b Mon Sep 17 00:00:00 2001 From: Ying-Jer Kao Date: Mon, 30 Mar 2026 16:31:58 +0800 Subject: [PATCH 4/7] style: clang-format inplace arithmetic visit helper --- src/linalg/iArithmetic_visit.hpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/linalg/iArithmetic_visit.hpp b/src/linalg/iArithmetic_visit.hpp index 33225edb3..a456dfc20 100644 --- a/src/linalg/iArithmetic_visit.hpp +++ b/src/linalg/iArithmetic_visit.hpp @@ -37,10 +37,11 @@ namespace cytnx { } template - inline void ApplyInplaceArithmeticKernel( - TLin *lhs, const TRin *rhs, const cytnx_uint64 &len, const bool &rhs_is_scalar, - const std::vector &shape, const std::vector &invmapper_L, - const std::vector &invmapper_R) { + inline void ApplyInplaceArithmeticKernel(TLin *lhs, const TRin *rhs, const cytnx_uint64 &len, + const bool &rhs_is_scalar, + const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R) { if (rhs_is_scalar) { for (cytnx_uint64 i = 0; i < len; i++) { ApplyInplaceArithmeticOp(lhs[i], rhs[0]); @@ -97,7 +98,7 @@ namespace cytnx { using TR = std::remove_pointer_t; static_assert(!std::is_same_v); ApplyInplaceArithmeticKernel(lptr, rptr, len, rhs_is_scalar, shape, - invmapper_L, invmapper_R); + invmapper_L, invmapper_R); }, Rt.ptr()); }, From bc6f6d92e9f92951a54dae49e5ffadd4aed953aa Mon Sep 17 00:00:00 2001 From: Ying-Jer Kao Date: Mon, 30 Mar 2026 18:45:54 +0800 Subject: [PATCH 5/7] Refactor Add scalar dispatch and enforce CPU type promotion --- src/linalg/Add.cpp | 526 ++++++------------------------ tests/linalg_test/linalg_test.cpp | 23 ++ 2 files changed, 116 insertions(+), 433 deletions(-) diff --git a/src/linalg/Add.cpp b/src/linalg/Add.cpp index 4ee3bad05..15a0553bb 100644 --- a/src/linalg/Add.cpp +++ b/src/linalg/Add.cpp @@ -8,25 +8,75 @@ #include "backend/linalg_internal_interface.hpp" namespace cytnx { namespace linalg { + namespace detail { + inline unsigned int SelectAddOutputType(const unsigned int lhs_dtype, + const unsigned int rhs_dtype, const int device) { + #ifdef UNI_GPU + // Current GPU arithmetic tables still assume legacy min(lhs, rhs) promotion. + if (device != Device.cpu) { + return lhs_dtype < rhs_dtype ? lhs_dtype : rhs_dtype; + } + #else + (void)device; + #endif + return Type.type_promote(lhs_dtype, rhs_dtype); + } + + template + Tensor AddScalarTensorImpl(const TL &lc, const Tensor &Rt) { + const unsigned int lhs_dtype = Type.cy_typeid_v; + Storage Cnst(1, lhs_dtype); + Cnst.at(0) = lc; + + Tensor out; + out._impl = Rt._impl->_clone_meta_only(); + out._impl->storage() = + Storage(Rt._impl->storage().size(), + SelectAddOutputType(lhs_dtype, Rt.dtype(), Rt.device()), Rt.device()); + + if (Rt.device() == Device.cpu) { + std::visit( + [&](auto *rptr) { + using TR = std::remove_pointer_t; + cytnx::linalg_internal::AddInternalImpl( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); + }, + Rt.ptr()); + } else { + #ifdef UNI_GPU + checkCudaErrors(cudaSetDevice(Rt.device())); + linalg_internal::lii.cuAri_ii[lhs_dtype][Rt.dtype()]( + out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}, 0); + #else + cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", + "\n"); + #endif + } + + return out; + } + } // namespace detail + Tensor Add(const Tensor &Lt, const Tensor &Rt) { cytnx_error_msg(Lt.device() != Rt.device(), "[Add] The two tensors cannot be on different devices.%s", "\n"); + const unsigned int out_dtype = + detail::SelectAddOutputType(Lt.dtype(), Rt.dtype(), Lt.device()); + Tensor out; bool icnst = false; if (Lt.shape().size() == 1 && Lt.shape()[0] == 1) { out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt.storage().size(), Lt.dtype() < Rt.dtype() ? Lt.dtype() : Rt.dtype(), Rt.device()); - // out.Init(Rt.shape(),Lt.dtype() < Rt.dtype()?Lt.dtype():Rt.dtype(),Lt.device()); + out._impl->storage() = Storage(Rt.storage().size(), out_dtype, Rt.device()); icnst = true; } else if (Rt.shape().size() == 1 && Rt.shape()[0] == 1) { - // out.Init(Lt.shape(),Lt.dtype() < Rt.dtype()?Lt.dtype():Rt.dtype(),Lt.device()); out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Lt.storage().size(), Lt.dtype() < Rt.dtype() ? Lt.dtype() : Rt.dtype(), Lt.device()); + out._impl->storage() = Storage(Lt.storage().size(), out_dtype, Lt.device()); icnst = true; } else { @@ -34,7 +84,7 @@ namespace cytnx { "[Add] The two tensors do not have the same shape. Lt rank: [%d] " "Rt rank: [%d] %s", Lt.shape().size(), Rt.shape().size(), "\n"); - out.Init(Lt.shape(), Lt.dtype() < Rt.dtype() ? Lt.dtype() : Rt.dtype(), Lt.device()); + out.Init(Lt.shape(), out_dtype, Lt.device()); } // if contiguous, then no need to calculate the mappers @@ -98,383 +148,11 @@ namespace cytnx { } //----------------------------------------------------------------------------------- - template <> - Tensor Add(const cytnx_complex128 &lc, const Tensor &Rt) { - Storage Cnst(1, Type.ComplexDouble); - Cnst.at(0) = lc; - Tensor out; - out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage(Rt._impl->storage().size(), Type.ComplexDouble, Rt.device()); - // Tensor out(Rt.shape(),Type.ComplexDouble,Rt.device()); - - if (Rt.device() == Device.cpu) { - std::visit( - [&](auto *rptr) { - using TR = std::remove_pointer_t; - cytnx::linalg_internal::AddInternalImpl( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}); - }, - Rt.ptr()); - } else { - #ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[Type.ComplexDouble][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); - #else - cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", - "\n"); - #endif - } - - return out; - } - - template <> - Tensor Add(const cytnx_complex64 &lc, const Tensor &Rt) { - Storage Cnst(1, Type.ComplexFloat); - Cnst.at(0) = lc; - Tensor out; - out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Rt._impl->storage().size(), - Type.ComplexFloat < Rt.dtype() ? Type.ComplexFloat : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(),Type.ComplexFloat < - // Rt.dtype()?Type.ComplexFloat:Rt.dtype(),Rt.device()); - - if (Rt.device() == Device.cpu) { - std::visit( - [&](auto *rptr) { - using TR = std::remove_pointer_t; - cytnx::linalg_internal::AddInternalImpl( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}); - }, - Rt.ptr()); - } else { - #ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[Type.ComplexFloat][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); - #else - cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", - "\n"); - #endif - } - - return out; - } - - template <> - Tensor Add(const cytnx_double &lc, const Tensor &Rt) { - Storage Cnst(1, Type.Double); - Cnst.at(0) = lc; - Tensor out; - out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Rt._impl->storage().size(), Type.Double < Rt.dtype() ? Type.Double : Rt.dtype(), - Rt.device()); - // Tensor out(Rt.shape(),Type.Double < Rt.dtype()?Type.Double:Rt.dtype(),Rt.device()); - - if (Rt.device() == Device.cpu) { - std::visit( - [&](auto *rptr) { - using TR = std::remove_pointer_t; - cytnx::linalg_internal::AddInternalImpl( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}); - }, - Rt.ptr()); - } else { - #ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[Type.Double][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); - #else - cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", - "\n"); - #endif - } - - return out; - } - - template <> - Tensor Add(const cytnx_float &lc, const Tensor &Rt) { - Storage Cnst(1, Type.Float); - Cnst.at(0) = lc; - Tensor out; - out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt._impl->storage().size(), Type.Float < Rt.dtype() ? Type.Float : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(),Type.Float < Rt.dtype()?Type.Float:Rt.dtype(),Rt.device()); - - if (Rt.device() == Device.cpu) { - std::visit( - [&](auto *rptr) { - using TR = std::remove_pointer_t; - cytnx::linalg_internal::AddInternalImpl( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}); - }, - Rt.ptr()); - } else { - #ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[Type.Float][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); - #else - cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", - "\n"); - #endif - } - - return out; - } - - template <> - Tensor Add(const cytnx_int64 &lc, const Tensor &Rt) { - Storage Cnst(1, Type.Int64); - Cnst.at(0) = lc; - Tensor out; - out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt._impl->storage().size(), Type.Int64 < Rt.dtype() ? Type.Int64 : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(),Type.Int64 < Rt.dtype()?Type.Int64:Rt.dtype(),Rt.device()); - - if (Rt.device() == Device.cpu) { - std::visit( - [&](auto *rptr) { - using TR = std::remove_pointer_t; - cytnx::linalg_internal::AddInternalImpl( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}); - }, - Rt.ptr()); - } else { - #ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[Type.Int64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); - #else - cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", - "\n"); - #endif - } - - return out; - } - - template <> - Tensor Add(const cytnx_uint64 &lc, const Tensor &Rt) { - Storage Cnst(1, Type.Uint64); - Cnst.at(0) = lc; - Tensor out; - out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Rt._impl->storage().size(), Type.Uint64 < Rt.dtype() ? Type.Uint64 : Rt.dtype(), - Rt.device()); - // Tensor out(Rt.shape(),Type.Uint64 < Rt.dtype()?Type.Uint64:Rt.dtype(),Rt.device()); - - if (Rt.device() == Device.cpu) { - std::visit( - [&](auto *rptr) { - using TR = std::remove_pointer_t; - cytnx::linalg_internal::AddInternalImpl( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}); - }, - Rt.ptr()); - } else { - #ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[Type.Uint64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); - #else - cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", - "\n"); - #endif - } - - return out; - } - - template <> - Tensor Add(const cytnx_int32 &lc, const Tensor &Rt) { - Storage Cnst(1, Type.Int32); - Cnst.at(0) = lc; - Tensor out; - out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt._impl->storage().size(), Type.Int32 < Rt.dtype() ? Type.Int32 : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(),Type.Int32 < Rt.dtype()?Type.Int32:Rt.dtype(),Rt.device()); - - if (Rt.device() == Device.cpu) { - std::visit( - [&](auto *rptr) { - using TR = std::remove_pointer_t; - cytnx::linalg_internal::AddInternalImpl( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}); - }, - Rt.ptr()); - } else { - #ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[Type.Int32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); - #else - cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", - "\n"); - #endif - } - - return out; - } - - template <> - Tensor Add(const cytnx_uint32 &lc, const Tensor &Rt) { - Storage Cnst(1, Type.Uint32); - Cnst.at(0) = lc; - Tensor out; - out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Rt._impl->storage().size(), Type.Uint32 < Rt.dtype() ? Type.Uint32 : Rt.dtype(), - Rt.device()); - // Tensor out(Rt.shape(),Type.Uint32 < Rt.dtype()?Type.Uint32:Rt.dtype(),Rt.device()); - - if (Rt.device() == Device.cpu) { - std::visit( - [&](auto *rptr) { - using TR = std::remove_pointer_t; - cytnx::linalg_internal::AddInternalImpl( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}); - }, - Rt.ptr()); - } else { - #ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[Type.Uint32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); - #else - cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", - "\n"); - #endif - } - - return out; - } - - template <> - Tensor Add(const cytnx_int16 &lc, const Tensor &Rt) { - Storage Cnst(1, Type.Int16); - Cnst.at(0) = lc; - Tensor out; - out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt._impl->storage().size(), Type.Int16 < Rt.dtype() ? Type.Int16 : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(),Type.Int16 < Rt.dtype()?Type.Int16:Rt.dtype(),Rt.device()); - - if (Rt.device() == Device.cpu) { - std::visit( - [&](auto *rptr) { - using TR = std::remove_pointer_t; - cytnx::linalg_internal::AddInternalImpl( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}); - }, - Rt.ptr()); - } else { - #ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[Type.Int16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); - #else - cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", - "\n"); - #endif - } - - return out; - } - - template <> - Tensor Add(const cytnx_uint16 &lc, const Tensor &Rt) { - Storage Cnst(1, Type.Uint16); - Cnst.at(0) = lc; - Tensor out; - out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Rt._impl->storage().size(), Type.Uint16 < Rt.dtype() ? Type.Uint16 : Rt.dtype(), - Rt.device()); - // Tensor out(Rt.shape(),Type.Uint16 < Rt.dtype()?Type.Uint16:Rt.dtype(),Rt.device()); - - if (Rt.device() == Device.cpu) { - std::visit( - [&](auto *rptr) { - using TR = std::remove_pointer_t; - cytnx::linalg_internal::AddInternalImpl( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}); - }, - Rt.ptr()); - } else { - #ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[Type.Uint16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); - #else - cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", - "\n"); - #endif - } - - return out; - } - - template <> - Tensor Add(const cytnx_bool &lc, const Tensor &Rt) { - Storage Cnst(1, Type.Bool); - Cnst.at(0) = lc; - Tensor out; - out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage(Rt._impl->storage().size(), - Type.Bool < Rt.dtype() ? Type.Bool : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(),Type.Bool < Rt.dtype()?Type.Bool:Rt.dtype(),Rt.device()); - - if (Rt.device() == Device.cpu) { - std::visit( - [&](auto *rptr) { - using TR = std::remove_pointer_t; - cytnx::linalg_internal::AddInternalImpl( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}); - }, - Rt.ptr()); - } else { - #ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[Type.Bool][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); - #else - cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", - "\n"); - #endif - } - - return out; + template + Tensor Add(const T &lc, const Tensor &Rt) { + static_assert(!std::is_same_v, + "Add(const Scalar&, const Tensor&) is specialized separately."); + return detail::AddScalarTensorImpl(lc, Rt); } template <> @@ -484,8 +162,9 @@ namespace cytnx { Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt._impl->storage().size(), lc.dtype() < Rt.dtype() ? lc.dtype() : Rt.dtype(), Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), + detail::SelectAddOutputType(lc.dtype(), Rt.dtype(), Rt.device()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -516,57 +195,38 @@ namespace cytnx { } //----------------------------------------------------------------------------------- - template <> - Tensor Add(const Tensor &Lt, const cytnx_complex128 &rc) { - return Add(rc, Lt); - } - template <> - Tensor Add(const Tensor &Lt, const cytnx_complex64 &rc) { - return Add(rc, Lt); - } - template <> - Tensor Add(const Tensor &Lt, const cytnx_double &rc) { - return Add(rc, Lt); - } - template <> - Tensor Add(const Tensor &Lt, const cytnx_float &rc) { - return Add(rc, Lt); - } - template <> - Tensor Add(const Tensor &Lt, const cytnx_int64 &rc) { - return Add(rc, Lt); - } - template <> - Tensor Add(const Tensor &Lt, const cytnx_uint64 &rc) { - return Add(rc, Lt); - } - template <> - Tensor Add(const Tensor &Lt, const cytnx_int32 &rc) { - return Add(rc, Lt); - } - template <> - Tensor Add(const Tensor &Lt, const cytnx_uint32 &rc) { - return Add(rc, Lt); - } - template <> - Tensor Add(const Tensor &Lt, const cytnx_int16 &rc) { - return Add(rc, Lt); - } - template <> - Tensor Add(const Tensor &Lt, const cytnx_uint16 &rc) { - return Add(rc, Lt); - } - template <> - Tensor Add(const Tensor &Lt, const cytnx_bool &rc) { - return Add(rc, Lt); - } - template <> - Tensor Add(const Tensor &Lt, const Scalar &rc) { + template + Tensor Add(const Tensor &Lt, const T &rc) { return Add(rc, Lt); } - } // namespace linalg + template Tensor Add(const cytnx_complex128 &lc, const Tensor &Rt); + template Tensor Add(const cytnx_complex64 &lc, const Tensor &Rt); + template Tensor Add(const cytnx_double &lc, const Tensor &Rt); + template Tensor Add(const cytnx_float &lc, const Tensor &Rt); + template Tensor Add(const cytnx_int64 &lc, const Tensor &Rt); + template Tensor Add(const cytnx_uint64 &lc, const Tensor &Rt); + template Tensor Add(const cytnx_int32 &lc, const Tensor &Rt); + template Tensor Add(const cytnx_uint32 &lc, const Tensor &Rt); + template Tensor Add(const cytnx_int16 &lc, const Tensor &Rt); + template Tensor Add(const cytnx_uint16 &lc, const Tensor &Rt); + template Tensor Add(const cytnx_bool &lc, const Tensor &Rt); + template Tensor Add(const Scalar &lc, const Tensor &Rt); + + template Tensor Add(const Tensor &Lt, const cytnx_complex128 &rc); + template Tensor Add(const Tensor &Lt, const cytnx_complex64 &rc); + template Tensor Add(const Tensor &Lt, const cytnx_double &rc); + template Tensor Add(const Tensor &Lt, const cytnx_float &rc); + template Tensor Add(const Tensor &Lt, const cytnx_int64 &rc); + template Tensor Add(const Tensor &Lt, const cytnx_uint64 &rc); + template Tensor Add(const Tensor &Lt, const cytnx_int32 &rc); + template Tensor Add(const Tensor &Lt, const cytnx_uint32 &rc); + template Tensor Add(const Tensor &Lt, const cytnx_int16 &rc); + template Tensor Add(const Tensor &Lt, const cytnx_uint16 &rc); + template Tensor Add(const Tensor &Lt, const cytnx_bool &rc); + template Tensor Add(const Tensor &Lt, const Scalar &rc); + } // namespace linalg //=================== // operators: //=================== diff --git a/tests/linalg_test/linalg_test.cpp b/tests/linalg_test/linalg_test.cpp index e4deeaf49..a01f42bc3 100644 --- a/tests/linalg_test/linalg_test.cpp +++ b/tests/linalg_test/linalg_test.cpp @@ -318,6 +318,29 @@ TEST_F(linalg_Test, Tensor_Norm) { EXPECT_EQ(linalg::Norm(arange3x3cd).item(), ans); } +TEST_F(linalg_Test, Tensor_Add_mixed_dtype_type_promote_cpu) { + Tensor lhs = arange(0, 4, 1, Type.Uint32).reshape(2, 2); + Tensor rhs = arange(0, 4, 1, Type.Int16).reshape(2, 2); + + Tensor out = linalg::Add(lhs, rhs); + EXPECT_EQ(out.dtype(), Type.type_promote(lhs.dtype(), rhs.dtype())); + EXPECT_EQ((cytnx_int64)out(1, 1).item().real(), 6); +} + +TEST_F(linalg_Test, Tensor_Add_scalar_mixed_dtype_type_promote_cpu) { + Tensor rhs = arange(0, 4, 1, Type.Int16).reshape(2, 2); + const cytnx_uint32 lhs_scalar = 5; + + Tensor out_lhs = linalg::Add(lhs_scalar, rhs); + Tensor out_rhs = linalg::Add(rhs, lhs_scalar); + const unsigned int promoted = Type.type_promote(Type.Uint32, rhs.dtype()); + + EXPECT_EQ(out_lhs.dtype(), promoted); + EXPECT_EQ(out_rhs.dtype(), promoted); + EXPECT_EQ((cytnx_int64)out_lhs(1, 1).item().real(), 8); + EXPECT_EQ((cytnx_int64)out_rhs(1, 1).item().real(), 8); +} + TEST_F(linalg_Test, DenseUt_Norm) { cytnx_double ans = 0; for (cytnx_uint64 i = 0; i < 9; i++) { From d2c38818ca6a6be79fffb3222bb5f2954ff0ae52 Mon Sep 17 00:00:00 2001 From: Ying-Jer Kao Date: Tue, 31 Mar 2026 08:44:51 +0800 Subject: [PATCH 6/7] CUDA: typed Add dispatch + CUDA13 CCCL include detection --- CytnxBKNDCMakeLists.cmake | 27 ++ .../linalg_internal_gpu/cuAdd_internal.cu | 274 ++++++++++++++++++ .../linalg_internal_gpu/cuAdd_internal.hpp | 7 + src/linalg/Add.cpp | 34 +-- tests/gpu/linalg_test/Add_test.cpp | 35 +++ 5 files changed, 357 insertions(+), 20 deletions(-) diff --git a/CytnxBKNDCMakeLists.cmake b/CytnxBKNDCMakeLists.cmake index 1beaeada1..3cda74010 100644 --- a/CytnxBKNDCMakeLists.cmake +++ b/CytnxBKNDCMakeLists.cmake @@ -126,6 +126,33 @@ if(USE_CUDA) # -gencode=arch=compute_75,code=compute_75 ") target_compile_definitions(cytnx PUBLIC UNI_GPU) target_include_directories(cytnx PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) + # CUDA 12+/13 may place Thrust/CUB headers under include/cccl. + set(_cytnx_cccl_candidates) + if(DEFINED CUDAToolkit_TARGET_DIR AND NOT "${CUDAToolkit_TARGET_DIR}" STREQUAL "") + list(APPEND _cytnx_cccl_candidates "${CUDAToolkit_TARGET_DIR}/include/cccl") + endif() + foreach(_cuda_inc IN LISTS CUDAToolkit_INCLUDE_DIRS) + list(APPEND _cytnx_cccl_candidates + "${_cuda_inc}/cccl" + "${_cuda_inc}/../include/cccl" + "${_cuda_inc}/../../include/cccl" + "${_cuda_inc}/../../../include/cccl") + endforeach() + list(REMOVE_DUPLICATES _cytnx_cccl_candidates) + + set(_cytnx_cccl_dir "") + foreach(_cccl_candidate IN LISTS _cytnx_cccl_candidates) + get_filename_component(_cccl_candidate_abs "${_cccl_candidate}" ABSOLUTE) + if(EXISTS "${_cccl_candidate_abs}") + set(_cytnx_cccl_dir "${_cccl_candidate_abs}") + break() + endif() + endforeach() + if(NOT "${_cytnx_cccl_dir}" STREQUAL "") + target_include_directories(cytnx PRIVATE "${_cytnx_cccl_dir}") + message(STATUS "Detected CCCL headers at: ${_cytnx_cccl_dir}") + endif() + target_link_libraries(cytnx PUBLIC CUDA::toolkit) target_link_libraries(cytnx PUBLIC CUDA::cudart CUDA::cublas CUDA::cusparse CUDA::curand CUDA::cusolver) target_link_libraries(cytnx PUBLIC -lcudadevrt) diff --git a/src/backend/linalg_internal_gpu/cuAdd_internal.cu b/src/backend/linalg_internal_gpu/cuAdd_internal.cu index 57cc32631..e97f0121b 100644 --- a/src/backend/linalg_internal_gpu/cuAdd_internal.cu +++ b/src/backend/linalg_internal_gpu/cuAdd_internal.cu @@ -5,6 +5,280 @@ namespace cytnx { namespace linalg_internal { + namespace { + + template + __device__ inline cuDoubleComplex CuToComplexDouble(const T &v) { + return make_cuDoubleComplex(static_cast(v), 0.0); + } + + __device__ inline cuDoubleComplex CuToComplexDouble(const cuDoubleComplex &v) { return v; } + + __device__ inline cuDoubleComplex CuToComplexDouble(const cuComplex &v) { + return cuComplexFloatToDouble(v); + } + + template + __device__ inline cuComplex CuToComplexFloat(const T &v) { + return make_cuFloatComplex(static_cast(v), 0.0f); + } + + __device__ inline cuComplex CuToComplexFloat(const cuComplex &v) { return v; } + + __device__ inline cuComplex CuToComplexFloat(const cuDoubleComplex &v) { + return make_cuFloatComplex(static_cast(cuCreal(v)), + static_cast(cuCimag(v))); + } + + template + __device__ inline TO CuAddDispatchOp(const TL &lhs, const TR &rhs) { + if constexpr (std::is_same_v) { + return cuCadd(CuToComplexDouble(lhs), CuToComplexDouble(rhs)); + } else if constexpr (std::is_same_v) { + return cuCaddf(CuToComplexFloat(lhs), CuToComplexFloat(rhs)); + } else { + return static_cast(lhs) + static_cast(rhs); + } + } + + template + __global__ void cuAdd_dispatch_constconst_kernel(TO *out, const TL lhs, const cytnx_uint64 n, + const TR rhs) { + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) out[idx] = CuAddDispatchOp(lhs, rhs); + } + + template + __global__ void cuAdd_dispatch_lconst_kernel(TO *out, const TL lhs, const cytnx_uint64 n, + const TR *rhs) { + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) out[idx] = CuAddDispatchOp(lhs, rhs[idx]); + } + + template + __global__ void cuAdd_dispatch_rconst_kernel(TO *out, const TL *lhs, const cytnx_uint64 n, + const TR rhs) { + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) out[idx] = CuAddDispatchOp(lhs[idx], rhs); + } + + template + __global__ void cuAdd_dispatch_tn_kernel(TO *out, const TL *lhs, const cytnx_uint64 n, + const TR *rhs) { + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) out[idx] = CuAddDispatchOp(lhs[idx], rhs[idx]); + } + + template + __global__ void cuAdd_dispatch_tn_kernel_nonconti( + TO *out, const TL *lhs, const cytnx_uint64 n, const TR *rhs, const cytnx_uint64 *accu_shape, + const cytnx_uint64 *old_accu_shapeL, const cytnx_uint64 *old_accu_shapeR, + const cytnx_uint64 *invmapper_L, const cytnx_uint64 *invmapper_R, + const cytnx_uint64 shapesize) { + extern __shared__ cytnx_uint64 tmpv[]; + + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) { + cytnx_uint64 tmp = idx; + const cytnx_uint64 offset = threadIdx.x * shapesize; + cytnx_uint64 Lidx = 0, Ridx = 0; + + for (cytnx_uint64 j = 0; j < shapesize; j++) { + tmpv[offset + j] = tmp / accu_shape[j]; + tmp = tmp % accu_shape[j]; + } + for (cytnx_uint64 j = 0; j < shapesize; j++) { + Lidx += tmpv[offset + invmapper_L[j]] * old_accu_shapeL[j]; + Ridx += tmpv[offset + invmapper_R[j]] * old_accu_shapeR[j]; + } + out[idx] = CuAddDispatchOp(lhs[Lidx], rhs[Ridx]); + } + } + + template + void cuAdd_dispatch_typed(boost::intrusive_ptr &out, + boost::intrusive_ptr &Lin, + boost::intrusive_ptr &Rin, + const unsigned long long &len, + const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R) { + using TO = Type_class::type_promote_gpu_t; + cytnx_error_msg(out->dtype() != Type_class::cy_typeid_gpu_v, + "[cuAdd_dispatch] output dtype mismatch. got=%d expected=%d%s", + out->dtype(), Type_class::cy_typeid_gpu_v, "\n"); + + TO *_out = reinterpret_cast(out->data()); + const TL *_Lin = reinterpret_cast(Lin->data()); + const TR *_Rin = reinterpret_cast(Rin->data()); + + cytnx_uint32 NBlocks = len / 512; + if (len % 512) NBlocks += 1; + + if (Lin->size() == 1 and Rin->size() == 1) { + cuAdd_dispatch_constconst_kernel<<>>(_out, _Lin[0], len, _Rin[0]); + } else if (Lin->size() == 1) { + cuAdd_dispatch_lconst_kernel<<>>(_out, _Lin[0], len, _Rin); + } else if (Rin->size() == 1) { + cuAdd_dispatch_rconst_kernel<<>>(_out, _Lin, len, _Rin[0]); + } else { + if (shape.size() == 0) { + cuAdd_dispatch_tn_kernel<<>>(_out, _Lin, len, _Rin); + } else { + cytnx_uint64 *m_accu_shape = reinterpret_cast( + utils_internal::cuCalloc_gpu(shape.size(), sizeof(cytnx_uint64))); + cytnx_uint64 *m_old_accu_shapeL = reinterpret_cast( + utils_internal::cuCalloc_gpu(shape.size(), sizeof(cytnx_uint64))); + cytnx_uint64 *m_old_accu_shapeR = reinterpret_cast( + utils_internal::cuCalloc_gpu(shape.size(), sizeof(cytnx_uint64))); + cytnx_uint64 *m_invmapper_L = reinterpret_cast( + utils_internal::cuMalloc_gpu(invmapper_L.size() * sizeof(cytnx_uint64))); + cytnx_uint64 *m_invmapper_R = reinterpret_cast( + utils_internal::cuMalloc_gpu(invmapper_R.size() * sizeof(cytnx_uint64))); + + checkCudaErrors(cudaMemcpy(m_invmapper_L, &invmapper_L[0], + sizeof(cytnx_uint64) * invmapper_L.size(), + cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(m_invmapper_R, &invmapper_R[0], + sizeof(cytnx_uint64) * invmapper_R.size(), + cudaMemcpyHostToDevice)); + + cytnx_uint64 tmp1 = 1, tmp2 = 1, tmp3 = 1; + for (cytnx_uint64 i = 0; i < shape.size(); i++) { + m_accu_shape[shape.size() - 1 - i] = tmp1; + tmp1 *= shape[shape.size() - 1 - i]; + + m_old_accu_shapeL[shape.size() - 1 - i] = tmp2; + tmp2 *= shape[invmapper_L[shape.size() - 1 - i]]; + + m_old_accu_shapeR[shape.size() - 1 - i] = tmp3; + tmp3 *= shape[invmapper_R[shape.size() - 1 - i]]; + } + + cuAdd_dispatch_tn_kernel_nonconti<<>>( + _out, _Lin, len, _Rin, m_accu_shape, m_old_accu_shapeL, m_old_accu_shapeR, + m_invmapper_L, m_invmapper_R, shape.size()); + + checkCudaErrors(cudaFree(m_accu_shape)); + checkCudaErrors(cudaFree(m_old_accu_shapeL)); + checkCudaErrors(cudaFree(m_old_accu_shapeR)); + checkCudaErrors(cudaFree(m_invmapper_L)); + checkCudaErrors(cudaFree(m_invmapper_R)); + } + } + } + + template + void cuAdd_dispatch_rhs(boost::intrusive_ptr &out, + boost::intrusive_ptr &Lin, + boost::intrusive_ptr &Rin, + const unsigned long long &len, const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R) { + switch (Rin->dtype()) { + case Type.ComplexDouble: + cuAdd_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.ComplexFloat: + cuAdd_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Double: + cuAdd_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Float: + cuAdd_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Int64: + cuAdd_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Uint64: + cuAdd_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Int32: + cuAdd_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Uint32: + cuAdd_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Int16: + cuAdd_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Uint16: + cuAdd_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Bool: + cuAdd_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + default: + cytnx_error_msg(true, "[cuAdd_dispatch] unsupported rhs dtype: %d%s", Rin->dtype(), + "\n"); + } + } + + } // namespace + + void cuAdd_dispatch(boost::intrusive_ptr &out, + boost::intrusive_ptr &Lin, + boost::intrusive_ptr &Rin, const unsigned long long &len, + const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R) { + const unsigned int expected_dtype = Type.type_promote(Lin->dtype(), Rin->dtype()); + cytnx_error_msg(out->dtype() != expected_dtype, + "[cuAdd_dispatch] output dtype mismatch. got=%d expected=%d%s", out->dtype(), + expected_dtype, "\n"); + + switch (Lin->dtype()) { + case Type.ComplexDouble: + cuAdd_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.ComplexFloat: + cuAdd_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Double: + cuAdd_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Float: + cuAdd_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Int64: + cuAdd_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Uint64: + cuAdd_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Int32: + cuAdd_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Uint32: + cuAdd_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Int16: + cuAdd_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Uint16: + cuAdd_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Bool: + cuAdd_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + default: + cytnx_error_msg(true, "[cuAdd_dispatch] unsupported lhs dtype: %d%s", Lin->dtype(), "\n"); + } + } + //==================================================================== // generic R+R kernel diff --git a/src/backend/linalg_internal_gpu/cuAdd_internal.hpp b/src/backend/linalg_internal_gpu/cuAdd_internal.hpp index ca8997145..860bae9ea 100644 --- a/src/backend/linalg_internal_gpu/cuAdd_internal.hpp +++ b/src/backend/linalg_internal_gpu/cuAdd_internal.hpp @@ -13,6 +13,13 @@ namespace cytnx { namespace linalg_internal { /// cuAdd + void cuAdd_dispatch(boost::intrusive_ptr &out, + boost::intrusive_ptr &Lin, + boost::intrusive_ptr &Rin, const unsigned long long &len, + const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R); + void cuAdd_internal_cdtcd(boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, boost::intrusive_ptr &Rin, diff --git a/src/linalg/Add.cpp b/src/linalg/Add.cpp index 15a0553bb..4bb555a4d 100644 --- a/src/linalg/Add.cpp +++ b/src/linalg/Add.cpp @@ -11,14 +11,7 @@ namespace cytnx { namespace detail { inline unsigned int SelectAddOutputType(const unsigned int lhs_dtype, const unsigned int rhs_dtype, const int device) { - #ifdef UNI_GPU - // Current GPU arithmetic tables still assume legacy min(lhs, rhs) promotion. - if (device != Device.cpu) { - return lhs_dtype < rhs_dtype ? lhs_dtype : rhs_dtype; - } - #else (void)device; - #endif return Type.type_promote(lhs_dtype, rhs_dtype); } @@ -46,9 +39,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[lhs_dtype][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); + linalg_internal::cuAdd_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -107,9 +100,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - out._impl->storage()._impl->size(), {}, {}, {}, 0); + linalg_internal::cuAdd_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, + out._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -134,10 +127,11 @@ namespace cytnx { Lt.ptr()); } else { #ifdef UNI_GPU - linalg_internal::lii.cuAri_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - Lt._impl->storage()._impl->size(), Lt._impl->shape(), Lt._impl->invmapper(), - Rt._impl->invmapper(), 0); + checkCudaErrors(cudaSetDevice(Rt.device())); + linalg_internal::cuAdd_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, + Lt._impl->storage()._impl->size(), Lt._impl->shape(), + Lt._impl->invmapper(), Rt._impl->invmapper()); #else cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -183,9 +177,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[lc.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 0); + linalg_internal::cuAdd_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); diff --git a/tests/gpu/linalg_test/Add_test.cpp b/tests/gpu/linalg_test/Add_test.cpp index f125d8012..de5baa853 100644 --- a/tests/gpu/linalg_test/Add_test.cpp +++ b/tests/gpu/linalg_test/Add_test.cpp @@ -126,6 +126,41 @@ namespace AddTest { } } + TEST(AddMixedDtypeTest, gpu_tensor_add_tensor_mixed_unsigned_signed_type_promote) { + cytnx::Tensor lhs = cytnx::arange(0, 6, 1, cytnx::Type.Uint32).reshape({2, 3}); + cytnx::Tensor rhs = cytnx::arange(0, 6, 1, cytnx::Type.Int16).reshape({2, 3}); + lhs = lhs.to(cytnx::Device.cuda); + rhs = rhs.to(cytnx::Device.cuda); + + cytnx::Tensor gpu_result = cytnx::linalg::Add(lhs, rhs); + cytnx::Tensor expected_cpu = + cytnx::linalg::Add(lhs.to(cytnx::Device.cpu), rhs.to(cytnx::Device.cpu)); + cytnx::Tensor gpu_result_cpu = gpu_result.to(cytnx::Device.cpu); + + EXPECT_EQ(gpu_result.dtype(), expected_cpu.dtype()); + EXPECT_TRUE(cytnx::TestTools::AreNearlyEqTensor(gpu_result_cpu, expected_cpu, 1e-6)); + } + + TEST(AddMixedDtypeTest, gpu_scalar_add_tensor_mixed_unsigned_signed_type_promote) { + const cytnx::cytnx_uint32 scalar = 5; + cytnx::Tensor rhs = + cytnx::arange(0, 6, 1, cytnx::Type.Int16).reshape({2, 3}).to(cytnx::Device.cuda); + + cytnx::Tensor gpu_result_l = cytnx::linalg::Add(scalar, rhs); + cytnx::Tensor gpu_result_r = cytnx::linalg::Add(rhs, scalar); + + cytnx::Tensor rhs_cpu = rhs.to(cytnx::Device.cpu); + cytnx::Tensor expected_l = cytnx::linalg::Add(scalar, rhs_cpu); + cytnx::Tensor expected_r = cytnx::linalg::Add(rhs_cpu, scalar); + cytnx::Tensor gpu_result_l_cpu = gpu_result_l.to(cytnx::Device.cpu); + cytnx::Tensor gpu_result_r_cpu = gpu_result_r.to(cytnx::Device.cpu); + + EXPECT_EQ(gpu_result_l.dtype(), expected_l.dtype()); + EXPECT_EQ(gpu_result_r.dtype(), expected_r.dtype()); + EXPECT_TRUE(cytnx::TestTools::AreNearlyEqTensor(gpu_result_l_cpu, expected_l, 1e-6)); + EXPECT_TRUE(cytnx::TestTools::AreNearlyEqTensor(gpu_result_r_cpu, expected_r, 1e-6)); + } + INSTANTIATE_TEST_SUITE_P(AddTests, AddTestAllShapes, ::testing::ValuesIn(GetTestShapes())); ::testing::AssertionResult CheckAddResult(const cytnx::Tensor& gpu_result, From d60ea7ffebaa6576e3ebba0b552e33e33934b4d6 Mon Sep 17 00:00:00 2001 From: Ying-Jer Kao Date: Tue, 31 Mar 2026 09:01:26 +0800 Subject: [PATCH 7/7] CUDA: typed dispatch for Sub/Mul/Div with mixed-dtype tests --- .../linalg_internal_gpu/CMakeLists.txt | 3 + .../linalg_internal_gpu/cuDiv_dispatch.cu | 283 +++++++++++++++++ .../linalg_internal_gpu/cuDiv_internal.hpp | 7 + .../linalg_internal_gpu/cuMul_dispatch.cu | 283 +++++++++++++++++ .../linalg_internal_gpu/cuMul_internal.hpp | 7 + .../linalg_internal_gpu/cuSub_dispatch.cu | 283 +++++++++++++++++ .../linalg_internal_gpu/cuSub_internal.hpp | 7 + src/linalg/Div.cpp | 286 ++++++++--------- src/linalg/Mul.cpp | 161 +++++----- src/linalg/Sub.cpp | 298 ++++++++---------- tests/gpu/linalg_test/Div_test.cpp | 38 +++ tests/gpu/linalg_test/Mul_test.cpp | 35 ++ tests/gpu/linalg_test/Sub_test.cpp | 35 ++ 13 files changed, 1329 insertions(+), 397 deletions(-) create mode 100644 src/backend/linalg_internal_gpu/cuDiv_dispatch.cu create mode 100644 src/backend/linalg_internal_gpu/cuMul_dispatch.cu create mode 100644 src/backend/linalg_internal_gpu/cuSub_dispatch.cu diff --git a/src/backend/linalg_internal_gpu/CMakeLists.txt b/src/backend/linalg_internal_gpu/CMakeLists.txt index c3ca5f0ca..ef8eb9ef2 100644 --- a/src/backend/linalg_internal_gpu/CMakeLists.txt +++ b/src/backend/linalg_internal_gpu/CMakeLists.txt @@ -36,6 +36,9 @@ target_sources_local(cytnx cuAbs_internal.cu cuAdd_internal.cu + cuSub_dispatch.cu + cuMul_dispatch.cu + cuDiv_dispatch.cu cuGer_internal.cu cuArithmetic_internal.cu cuConj_inplace_internal.cu diff --git a/src/backend/linalg_internal_gpu/cuDiv_dispatch.cu b/src/backend/linalg_internal_gpu/cuDiv_dispatch.cu new file mode 100644 index 000000000..5eb9b6731 --- /dev/null +++ b/src/backend/linalg_internal_gpu/cuDiv_dispatch.cu @@ -0,0 +1,283 @@ +#include "cuDiv_internal.hpp" +#include "backend/utils_internal_interface.hpp" + +namespace cytnx { + + namespace linalg_internal { + + namespace { + + template + __device__ inline cuDoubleComplex CuToComplexDouble(const T &v) { + return make_cuDoubleComplex(static_cast(v), 0.0); + } + + __device__ inline cuDoubleComplex CuToComplexDouble(const cuDoubleComplex &v) { return v; } + + __device__ inline cuDoubleComplex CuToComplexDouble(const cuComplex &v) { + return cuComplexFloatToDouble(v); + } + + template + __device__ inline cuComplex CuToComplexFloat(const T &v) { + return make_cuFloatComplex(static_cast(v), 0.0f); + } + + __device__ inline cuComplex CuToComplexFloat(const cuComplex &v) { return v; } + + __device__ inline cuComplex CuToComplexFloat(const cuDoubleComplex &v) { + return make_cuFloatComplex(static_cast(cuCreal(v)), + static_cast(cuCimag(v))); + } + + template + __device__ inline TO CuDivDispatchOp(const TL &lhs, const TR &rhs) { + if constexpr (std::is_same_v) { + return cuCdiv(CuToComplexDouble(lhs), CuToComplexDouble(rhs)); + } else if constexpr (std::is_same_v) { + return cuCdivf(CuToComplexFloat(lhs), CuToComplexFloat(rhs)); + } else { + return static_cast(lhs) / static_cast(rhs); + } + } + + template + __global__ void cuDiv_dispatch_constconst_kernel(TO *out, const TL lhs, const cytnx_uint64 n, + const TR rhs) { + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) out[idx] = CuDivDispatchOp(lhs, rhs); + } + + template + __global__ void cuDiv_dispatch_lconst_kernel(TO *out, const TL lhs, const cytnx_uint64 n, + const TR *rhs) { + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) out[idx] = CuDivDispatchOp(lhs, rhs[idx]); + } + + template + __global__ void cuDiv_dispatch_rconst_kernel(TO *out, const TL *lhs, const cytnx_uint64 n, + const TR rhs) { + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) out[idx] = CuDivDispatchOp(lhs[idx], rhs); + } + + template + __global__ void cuDiv_dispatch_tn_kernel(TO *out, const TL *lhs, const cytnx_uint64 n, + const TR *rhs) { + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) out[idx] = CuDivDispatchOp(lhs[idx], rhs[idx]); + } + + template + __global__ void cuDiv_dispatch_tn_kernel_nonconti( + TO *out, const TL *lhs, const cytnx_uint64 n, const TR *rhs, const cytnx_uint64 *accu_shape, + const cytnx_uint64 *old_accu_shapeL, const cytnx_uint64 *old_accu_shapeR, + const cytnx_uint64 *invmapper_L, const cytnx_uint64 *invmapper_R, + const cytnx_uint64 shapesize) { + extern __shared__ cytnx_uint64 tmpv[]; + + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) { + cytnx_uint64 tmp = idx; + const cytnx_uint64 offset = threadIdx.x * shapesize; + cytnx_uint64 Lidx = 0, Ridx = 0; + + for (cytnx_uint64 j = 0; j < shapesize; j++) { + tmpv[offset + j] = tmp / accu_shape[j]; + tmp = tmp % accu_shape[j]; + } + for (cytnx_uint64 j = 0; j < shapesize; j++) { + Lidx += tmpv[offset + invmapper_L[j]] * old_accu_shapeL[j]; + Ridx += tmpv[offset + invmapper_R[j]] * old_accu_shapeR[j]; + } + out[idx] = CuDivDispatchOp(lhs[Lidx], rhs[Ridx]); + } + } + + template + void cuDiv_dispatch_typed(boost::intrusive_ptr &out, + boost::intrusive_ptr &Lin, + boost::intrusive_ptr &Rin, + const unsigned long long &len, + const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R) { + using TO = Type_class::type_promote_gpu_t; + cytnx_error_msg(out->dtype() != Type_class::cy_typeid_gpu_v, + "[cuDiv_dispatch] output dtype mismatch. got=%d expected=%d%s", + out->dtype(), Type_class::cy_typeid_gpu_v, "\n"); + + TO *_out = reinterpret_cast(out->data()); + const TL *_Lin = reinterpret_cast(Lin->data()); + const TR *_Rin = reinterpret_cast(Rin->data()); + + cytnx_uint32 NBlocks = len / 512; + if (len % 512) NBlocks += 1; + + if (Lin->size() == 1 and Rin->size() == 1) { + cuDiv_dispatch_constconst_kernel<<>>(_out, _Lin[0], len, _Rin[0]); + } else if (Lin->size() == 1) { + cuDiv_dispatch_lconst_kernel<<>>(_out, _Lin[0], len, _Rin); + } else if (Rin->size() == 1) { + cuDiv_dispatch_rconst_kernel<<>>(_out, _Lin, len, _Rin[0]); + } else { + if (shape.size() == 0) { + cuDiv_dispatch_tn_kernel<<>>(_out, _Lin, len, _Rin); + } else { + cytnx_uint64 *m_accu_shape = reinterpret_cast( + utils_internal::cuCalloc_gpu(shape.size(), sizeof(cytnx_uint64))); + cytnx_uint64 *m_old_accu_shapeL = reinterpret_cast( + utils_internal::cuCalloc_gpu(shape.size(), sizeof(cytnx_uint64))); + cytnx_uint64 *m_old_accu_shapeR = reinterpret_cast( + utils_internal::cuCalloc_gpu(shape.size(), sizeof(cytnx_uint64))); + cytnx_uint64 *m_invmapper_L = reinterpret_cast( + utils_internal::cuMalloc_gpu(invmapper_L.size() * sizeof(cytnx_uint64))); + cytnx_uint64 *m_invmapper_R = reinterpret_cast( + utils_internal::cuMalloc_gpu(invmapper_R.size() * sizeof(cytnx_uint64))); + + checkCudaErrors(cudaMemcpy(m_invmapper_L, &invmapper_L[0], + sizeof(cytnx_uint64) * invmapper_L.size(), + cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(m_invmapper_R, &invmapper_R[0], + sizeof(cytnx_uint64) * invmapper_R.size(), + cudaMemcpyHostToDevice)); + + cytnx_uint64 tmp1 = 1, tmp2 = 1, tmp3 = 1; + for (cytnx_uint64 i = 0; i < shape.size(); i++) { + m_accu_shape[shape.size() - 1 - i] = tmp1; + tmp1 *= shape[shape.size() - 1 - i]; + + m_old_accu_shapeL[shape.size() - 1 - i] = tmp2; + tmp2 *= shape[invmapper_L[shape.size() - 1 - i]]; + + m_old_accu_shapeR[shape.size() - 1 - i] = tmp3; + tmp3 *= shape[invmapper_R[shape.size() - 1 - i]]; + } + + cuDiv_dispatch_tn_kernel_nonconti<<>>( + _out, _Lin, len, _Rin, m_accu_shape, m_old_accu_shapeL, m_old_accu_shapeR, + m_invmapper_L, m_invmapper_R, shape.size()); + + checkCudaErrors(cudaFree(m_accu_shape)); + checkCudaErrors(cudaFree(m_old_accu_shapeL)); + checkCudaErrors(cudaFree(m_old_accu_shapeR)); + checkCudaErrors(cudaFree(m_invmapper_L)); + checkCudaErrors(cudaFree(m_invmapper_R)); + } + } + } + + template + void cuDiv_dispatch_rhs(boost::intrusive_ptr &out, + boost::intrusive_ptr &Lin, + boost::intrusive_ptr &Rin, + const unsigned long long &len, const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R) { + switch (Rin->dtype()) { + case Type.ComplexDouble: + cuDiv_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.ComplexFloat: + cuDiv_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Double: + cuDiv_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Float: + cuDiv_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Int64: + cuDiv_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Uint64: + cuDiv_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Int32: + cuDiv_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Uint32: + cuDiv_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Int16: + cuDiv_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Uint16: + cuDiv_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Bool: + cuDiv_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + default: + cytnx_error_msg(true, "[cuDiv_dispatch] unsupported rhs dtype: %d%s", Rin->dtype(), + "\n"); + } + } + + } // namespace + + void cuDiv_dispatch(boost::intrusive_ptr &out, + boost::intrusive_ptr &Lin, + boost::intrusive_ptr &Rin, const unsigned long long &len, + const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R) { + const unsigned int expected_dtype = Type.type_promote(Lin->dtype(), Rin->dtype()); + cytnx_error_msg(out->dtype() != expected_dtype, + "[cuDiv_dispatch] output dtype mismatch. got=%d expected=%d%s", out->dtype(), + expected_dtype, "\n"); + + switch (Lin->dtype()) { + case Type.ComplexDouble: + cuDiv_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.ComplexFloat: + cuDiv_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Double: + cuDiv_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Float: + cuDiv_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Int64: + cuDiv_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Uint64: + cuDiv_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Int32: + cuDiv_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Uint32: + cuDiv_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Int16: + cuDiv_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Uint16: + cuDiv_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Bool: + cuDiv_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + default: + cytnx_error_msg(true, "[cuDiv_dispatch] unsupported lhs dtype: %d%s", Lin->dtype(), "\n"); + } + } + + } // namespace linalg_internal +} // namespace cytnx diff --git a/src/backend/linalg_internal_gpu/cuDiv_internal.hpp b/src/backend/linalg_internal_gpu/cuDiv_internal.hpp index 2b6c220b3..a4bee2e61 100644 --- a/src/backend/linalg_internal_gpu/cuDiv_internal.hpp +++ b/src/backend/linalg_internal_gpu/cuDiv_internal.hpp @@ -13,6 +13,13 @@ namespace cytnx { namespace linalg_internal { /// cuDiv + void cuDiv_dispatch(boost::intrusive_ptr &out, + boost::intrusive_ptr &Lin, + boost::intrusive_ptr &Rin, const unsigned long long &len, + const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R); + void cuDiv_internal_cdtcd(boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, boost::intrusive_ptr &Rin, diff --git a/src/backend/linalg_internal_gpu/cuMul_dispatch.cu b/src/backend/linalg_internal_gpu/cuMul_dispatch.cu new file mode 100644 index 000000000..e2bba1362 --- /dev/null +++ b/src/backend/linalg_internal_gpu/cuMul_dispatch.cu @@ -0,0 +1,283 @@ +#include "cuMul_internal.hpp" +#include "backend/utils_internal_interface.hpp" + +namespace cytnx { + + namespace linalg_internal { + + namespace { + + template + __device__ inline cuDoubleComplex CuToComplexDouble(const T &v) { + return make_cuDoubleComplex(static_cast(v), 0.0); + } + + __device__ inline cuDoubleComplex CuToComplexDouble(const cuDoubleComplex &v) { return v; } + + __device__ inline cuDoubleComplex CuToComplexDouble(const cuComplex &v) { + return cuComplexFloatToDouble(v); + } + + template + __device__ inline cuComplex CuToComplexFloat(const T &v) { + return make_cuFloatComplex(static_cast(v), 0.0f); + } + + __device__ inline cuComplex CuToComplexFloat(const cuComplex &v) { return v; } + + __device__ inline cuComplex CuToComplexFloat(const cuDoubleComplex &v) { + return make_cuFloatComplex(static_cast(cuCreal(v)), + static_cast(cuCimag(v))); + } + + template + __device__ inline TO CuMulDispatchOp(const TL &lhs, const TR &rhs) { + if constexpr (std::is_same_v) { + return cuCmul(CuToComplexDouble(lhs), CuToComplexDouble(rhs)); + } else if constexpr (std::is_same_v) { + return cuCmulf(CuToComplexFloat(lhs), CuToComplexFloat(rhs)); + } else { + return static_cast(lhs) * static_cast(rhs); + } + } + + template + __global__ void cuMul_dispatch_constconst_kernel(TO *out, const TL lhs, const cytnx_uint64 n, + const TR rhs) { + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) out[idx] = CuMulDispatchOp(lhs, rhs); + } + + template + __global__ void cuMul_dispatch_lconst_kernel(TO *out, const TL lhs, const cytnx_uint64 n, + const TR *rhs) { + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) out[idx] = CuMulDispatchOp(lhs, rhs[idx]); + } + + template + __global__ void cuMul_dispatch_rconst_kernel(TO *out, const TL *lhs, const cytnx_uint64 n, + const TR rhs) { + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) out[idx] = CuMulDispatchOp(lhs[idx], rhs); + } + + template + __global__ void cuMul_dispatch_tn_kernel(TO *out, const TL *lhs, const cytnx_uint64 n, + const TR *rhs) { + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) out[idx] = CuMulDispatchOp(lhs[idx], rhs[idx]); + } + + template + __global__ void cuMul_dispatch_tn_kernel_nonconti( + TO *out, const TL *lhs, const cytnx_uint64 n, const TR *rhs, const cytnx_uint64 *accu_shape, + const cytnx_uint64 *old_accu_shapeL, const cytnx_uint64 *old_accu_shapeR, + const cytnx_uint64 *invmapper_L, const cytnx_uint64 *invmapper_R, + const cytnx_uint64 shapesize) { + extern __shared__ cytnx_uint64 tmpv[]; + + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) { + cytnx_uint64 tmp = idx; + const cytnx_uint64 offset = threadIdx.x * shapesize; + cytnx_uint64 Lidx = 0, Ridx = 0; + + for (cytnx_uint64 j = 0; j < shapesize; j++) { + tmpv[offset + j] = tmp / accu_shape[j]; + tmp = tmp % accu_shape[j]; + } + for (cytnx_uint64 j = 0; j < shapesize; j++) { + Lidx += tmpv[offset + invmapper_L[j]] * old_accu_shapeL[j]; + Ridx += tmpv[offset + invmapper_R[j]] * old_accu_shapeR[j]; + } + out[idx] = CuMulDispatchOp(lhs[Lidx], rhs[Ridx]); + } + } + + template + void cuMul_dispatch_typed(boost::intrusive_ptr &out, + boost::intrusive_ptr &Lin, + boost::intrusive_ptr &Rin, + const unsigned long long &len, + const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R) { + using TO = Type_class::type_promote_gpu_t; + cytnx_error_msg(out->dtype() != Type_class::cy_typeid_gpu_v, + "[cuMul_dispatch] output dtype mismatch. got=%d expected=%d%s", + out->dtype(), Type_class::cy_typeid_gpu_v, "\n"); + + TO *_out = reinterpret_cast(out->data()); + const TL *_Lin = reinterpret_cast(Lin->data()); + const TR *_Rin = reinterpret_cast(Rin->data()); + + cytnx_uint32 NBlocks = len / 512; + if (len % 512) NBlocks += 1; + + if (Lin->size() == 1 and Rin->size() == 1) { + cuMul_dispatch_constconst_kernel<<>>(_out, _Lin[0], len, _Rin[0]); + } else if (Lin->size() == 1) { + cuMul_dispatch_lconst_kernel<<>>(_out, _Lin[0], len, _Rin); + } else if (Rin->size() == 1) { + cuMul_dispatch_rconst_kernel<<>>(_out, _Lin, len, _Rin[0]); + } else { + if (shape.size() == 0) { + cuMul_dispatch_tn_kernel<<>>(_out, _Lin, len, _Rin); + } else { + cytnx_uint64 *m_accu_shape = reinterpret_cast( + utils_internal::cuCalloc_gpu(shape.size(), sizeof(cytnx_uint64))); + cytnx_uint64 *m_old_accu_shapeL = reinterpret_cast( + utils_internal::cuCalloc_gpu(shape.size(), sizeof(cytnx_uint64))); + cytnx_uint64 *m_old_accu_shapeR = reinterpret_cast( + utils_internal::cuCalloc_gpu(shape.size(), sizeof(cytnx_uint64))); + cytnx_uint64 *m_invmapper_L = reinterpret_cast( + utils_internal::cuMalloc_gpu(invmapper_L.size() * sizeof(cytnx_uint64))); + cytnx_uint64 *m_invmapper_R = reinterpret_cast( + utils_internal::cuMalloc_gpu(invmapper_R.size() * sizeof(cytnx_uint64))); + + checkCudaErrors(cudaMemcpy(m_invmapper_L, &invmapper_L[0], + sizeof(cytnx_uint64) * invmapper_L.size(), + cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(m_invmapper_R, &invmapper_R[0], + sizeof(cytnx_uint64) * invmapper_R.size(), + cudaMemcpyHostToDevice)); + + cytnx_uint64 tmp1 = 1, tmp2 = 1, tmp3 = 1; + for (cytnx_uint64 i = 0; i < shape.size(); i++) { + m_accu_shape[shape.size() - 1 - i] = tmp1; + tmp1 *= shape[shape.size() - 1 - i]; + + m_old_accu_shapeL[shape.size() - 1 - i] = tmp2; + tmp2 *= shape[invmapper_L[shape.size() - 1 - i]]; + + m_old_accu_shapeR[shape.size() - 1 - i] = tmp3; + tmp3 *= shape[invmapper_R[shape.size() - 1 - i]]; + } + + cuMul_dispatch_tn_kernel_nonconti<<>>( + _out, _Lin, len, _Rin, m_accu_shape, m_old_accu_shapeL, m_old_accu_shapeR, + m_invmapper_L, m_invmapper_R, shape.size()); + + checkCudaErrors(cudaFree(m_accu_shape)); + checkCudaErrors(cudaFree(m_old_accu_shapeL)); + checkCudaErrors(cudaFree(m_old_accu_shapeR)); + checkCudaErrors(cudaFree(m_invmapper_L)); + checkCudaErrors(cudaFree(m_invmapper_R)); + } + } + } + + template + void cuMul_dispatch_rhs(boost::intrusive_ptr &out, + boost::intrusive_ptr &Lin, + boost::intrusive_ptr &Rin, + const unsigned long long &len, const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R) { + switch (Rin->dtype()) { + case Type.ComplexDouble: + cuMul_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.ComplexFloat: + cuMul_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Double: + cuMul_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Float: + cuMul_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Int64: + cuMul_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Uint64: + cuMul_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Int32: + cuMul_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Uint32: + cuMul_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Int16: + cuMul_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Uint16: + cuMul_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Bool: + cuMul_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + default: + cytnx_error_msg(true, "[cuMul_dispatch] unsupported rhs dtype: %d%s", Rin->dtype(), + "\n"); + } + } + + } // namespace + + void cuMul_dispatch(boost::intrusive_ptr &out, + boost::intrusive_ptr &Lin, + boost::intrusive_ptr &Rin, const unsigned long long &len, + const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R) { + const unsigned int expected_dtype = Type.type_promote(Lin->dtype(), Rin->dtype()); + cytnx_error_msg(out->dtype() != expected_dtype, + "[cuMul_dispatch] output dtype mismatch. got=%d expected=%d%s", out->dtype(), + expected_dtype, "\n"); + + switch (Lin->dtype()) { + case Type.ComplexDouble: + cuMul_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.ComplexFloat: + cuMul_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Double: + cuMul_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Float: + cuMul_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Int64: + cuMul_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Uint64: + cuMul_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Int32: + cuMul_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Uint32: + cuMul_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Int16: + cuMul_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Uint16: + cuMul_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Bool: + cuMul_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + default: + cytnx_error_msg(true, "[cuMul_dispatch] unsupported lhs dtype: %d%s", Lin->dtype(), "\n"); + } + } + + } // namespace linalg_internal +} // namespace cytnx diff --git a/src/backend/linalg_internal_gpu/cuMul_internal.hpp b/src/backend/linalg_internal_gpu/cuMul_internal.hpp index 6837e6f43..8f393e4d7 100644 --- a/src/backend/linalg_internal_gpu/cuMul_internal.hpp +++ b/src/backend/linalg_internal_gpu/cuMul_internal.hpp @@ -13,6 +13,13 @@ namespace cytnx { namespace linalg_internal { /// cuMul + void cuMul_dispatch(boost::intrusive_ptr &out, + boost::intrusive_ptr &Lin, + boost::intrusive_ptr &Rin, const unsigned long long &len, + const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R); + void cuMul_internal_cdtcd(boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, boost::intrusive_ptr &Rin, diff --git a/src/backend/linalg_internal_gpu/cuSub_dispatch.cu b/src/backend/linalg_internal_gpu/cuSub_dispatch.cu new file mode 100644 index 000000000..b0b593299 --- /dev/null +++ b/src/backend/linalg_internal_gpu/cuSub_dispatch.cu @@ -0,0 +1,283 @@ +#include "cuSub_internal.hpp" +#include "backend/utils_internal_interface.hpp" + +namespace cytnx { + + namespace linalg_internal { + + namespace { + + template + __device__ inline cuDoubleComplex CuToComplexDouble(const T &v) { + return make_cuDoubleComplex(static_cast(v), 0.0); + } + + __device__ inline cuDoubleComplex CuToComplexDouble(const cuDoubleComplex &v) { return v; } + + __device__ inline cuDoubleComplex CuToComplexDouble(const cuComplex &v) { + return cuComplexFloatToDouble(v); + } + + template + __device__ inline cuComplex CuToComplexFloat(const T &v) { + return make_cuFloatComplex(static_cast(v), 0.0f); + } + + __device__ inline cuComplex CuToComplexFloat(const cuComplex &v) { return v; } + + __device__ inline cuComplex CuToComplexFloat(const cuDoubleComplex &v) { + return make_cuFloatComplex(static_cast(cuCreal(v)), + static_cast(cuCimag(v))); + } + + template + __device__ inline TO CuSubDispatchOp(const TL &lhs, const TR &rhs) { + if constexpr (std::is_same_v) { + return cuCsub(CuToComplexDouble(lhs), CuToComplexDouble(rhs)); + } else if constexpr (std::is_same_v) { + return cuCsubf(CuToComplexFloat(lhs), CuToComplexFloat(rhs)); + } else { + return static_cast(lhs) - static_cast(rhs); + } + } + + template + __global__ void cuSub_dispatch_constconst_kernel(TO *out, const TL lhs, const cytnx_uint64 n, + const TR rhs) { + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) out[idx] = CuSubDispatchOp(lhs, rhs); + } + + template + __global__ void cuSub_dispatch_lconst_kernel(TO *out, const TL lhs, const cytnx_uint64 n, + const TR *rhs) { + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) out[idx] = CuSubDispatchOp(lhs, rhs[idx]); + } + + template + __global__ void cuSub_dispatch_rconst_kernel(TO *out, const TL *lhs, const cytnx_uint64 n, + const TR rhs) { + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) out[idx] = CuSubDispatchOp(lhs[idx], rhs); + } + + template + __global__ void cuSub_dispatch_tn_kernel(TO *out, const TL *lhs, const cytnx_uint64 n, + const TR *rhs) { + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) out[idx] = CuSubDispatchOp(lhs[idx], rhs[idx]); + } + + template + __global__ void cuSub_dispatch_tn_kernel_nonconti( + TO *out, const TL *lhs, const cytnx_uint64 n, const TR *rhs, const cytnx_uint64 *accu_shape, + const cytnx_uint64 *old_accu_shapeL, const cytnx_uint64 *old_accu_shapeR, + const cytnx_uint64 *invmapper_L, const cytnx_uint64 *invmapper_R, + const cytnx_uint64 shapesize) { + extern __shared__ cytnx_uint64 tmpv[]; + + const cytnx_uint64 idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) { + cytnx_uint64 tmp = idx; + const cytnx_uint64 offset = threadIdx.x * shapesize; + cytnx_uint64 Lidx = 0, Ridx = 0; + + for (cytnx_uint64 j = 0; j < shapesize; j++) { + tmpv[offset + j] = tmp / accu_shape[j]; + tmp = tmp % accu_shape[j]; + } + for (cytnx_uint64 j = 0; j < shapesize; j++) { + Lidx += tmpv[offset + invmapper_L[j]] * old_accu_shapeL[j]; + Ridx += tmpv[offset + invmapper_R[j]] * old_accu_shapeR[j]; + } + out[idx] = CuSubDispatchOp(lhs[Lidx], rhs[Ridx]); + } + } + + template + void cuSub_dispatch_typed(boost::intrusive_ptr &out, + boost::intrusive_ptr &Lin, + boost::intrusive_ptr &Rin, + const unsigned long long &len, + const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R) { + using TO = Type_class::type_promote_gpu_t; + cytnx_error_msg(out->dtype() != Type_class::cy_typeid_gpu_v, + "[cuSub_dispatch] output dtype mismatch. got=%d expected=%d%s", + out->dtype(), Type_class::cy_typeid_gpu_v, "\n"); + + TO *_out = reinterpret_cast(out->data()); + const TL *_Lin = reinterpret_cast(Lin->data()); + const TR *_Rin = reinterpret_cast(Rin->data()); + + cytnx_uint32 NBlocks = len / 512; + if (len % 512) NBlocks += 1; + + if (Lin->size() == 1 and Rin->size() == 1) { + cuSub_dispatch_constconst_kernel<<>>(_out, _Lin[0], len, _Rin[0]); + } else if (Lin->size() == 1) { + cuSub_dispatch_lconst_kernel<<>>(_out, _Lin[0], len, _Rin); + } else if (Rin->size() == 1) { + cuSub_dispatch_rconst_kernel<<>>(_out, _Lin, len, _Rin[0]); + } else { + if (shape.size() == 0) { + cuSub_dispatch_tn_kernel<<>>(_out, _Lin, len, _Rin); + } else { + cytnx_uint64 *m_accu_shape = reinterpret_cast( + utils_internal::cuCalloc_gpu(shape.size(), sizeof(cytnx_uint64))); + cytnx_uint64 *m_old_accu_shapeL = reinterpret_cast( + utils_internal::cuCalloc_gpu(shape.size(), sizeof(cytnx_uint64))); + cytnx_uint64 *m_old_accu_shapeR = reinterpret_cast( + utils_internal::cuCalloc_gpu(shape.size(), sizeof(cytnx_uint64))); + cytnx_uint64 *m_invmapper_L = reinterpret_cast( + utils_internal::cuMalloc_gpu(invmapper_L.size() * sizeof(cytnx_uint64))); + cytnx_uint64 *m_invmapper_R = reinterpret_cast( + utils_internal::cuMalloc_gpu(invmapper_R.size() * sizeof(cytnx_uint64))); + + checkCudaErrors(cudaMemcpy(m_invmapper_L, &invmapper_L[0], + sizeof(cytnx_uint64) * invmapper_L.size(), + cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(m_invmapper_R, &invmapper_R[0], + sizeof(cytnx_uint64) * invmapper_R.size(), + cudaMemcpyHostToDevice)); + + cytnx_uint64 tmp1 = 1, tmp2 = 1, tmp3 = 1; + for (cytnx_uint64 i = 0; i < shape.size(); i++) { + m_accu_shape[shape.size() - 1 - i] = tmp1; + tmp1 *= shape[shape.size() - 1 - i]; + + m_old_accu_shapeL[shape.size() - 1 - i] = tmp2; + tmp2 *= shape[invmapper_L[shape.size() - 1 - i]]; + + m_old_accu_shapeR[shape.size() - 1 - i] = tmp3; + tmp3 *= shape[invmapper_R[shape.size() - 1 - i]]; + } + + cuSub_dispatch_tn_kernel_nonconti<<>>( + _out, _Lin, len, _Rin, m_accu_shape, m_old_accu_shapeL, m_old_accu_shapeR, + m_invmapper_L, m_invmapper_R, shape.size()); + + checkCudaErrors(cudaFree(m_accu_shape)); + checkCudaErrors(cudaFree(m_old_accu_shapeL)); + checkCudaErrors(cudaFree(m_old_accu_shapeR)); + checkCudaErrors(cudaFree(m_invmapper_L)); + checkCudaErrors(cudaFree(m_invmapper_R)); + } + } + } + + template + void cuSub_dispatch_rhs(boost::intrusive_ptr &out, + boost::intrusive_ptr &Lin, + boost::intrusive_ptr &Rin, + const unsigned long long &len, const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R) { + switch (Rin->dtype()) { + case Type.ComplexDouble: + cuSub_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.ComplexFloat: + cuSub_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Double: + cuSub_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Float: + cuSub_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Int64: + cuSub_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Uint64: + cuSub_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Int32: + cuSub_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Uint32: + cuSub_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Int16: + cuSub_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Uint16: + cuSub_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + case Type.Bool: + cuSub_dispatch_typed(out, Lin, Rin, len, shape, invmapper_L, + invmapper_R); + break; + default: + cytnx_error_msg(true, "[cuSub_dispatch] unsupported rhs dtype: %d%s", Rin->dtype(), + "\n"); + } + } + + } // namespace + + void cuSub_dispatch(boost::intrusive_ptr &out, + boost::intrusive_ptr &Lin, + boost::intrusive_ptr &Rin, const unsigned long long &len, + const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R) { + const unsigned int expected_dtype = Type.type_promote(Lin->dtype(), Rin->dtype()); + cytnx_error_msg(out->dtype() != expected_dtype, + "[cuSub_dispatch] output dtype mismatch. got=%d expected=%d%s", out->dtype(), + expected_dtype, "\n"); + + switch (Lin->dtype()) { + case Type.ComplexDouble: + cuSub_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.ComplexFloat: + cuSub_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Double: + cuSub_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Float: + cuSub_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Int64: + cuSub_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Uint64: + cuSub_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Int32: + cuSub_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Uint32: + cuSub_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Int16: + cuSub_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Uint16: + cuSub_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + case Type.Bool: + cuSub_dispatch_rhs(out, Lin, Rin, len, shape, invmapper_L, invmapper_R); + break; + default: + cytnx_error_msg(true, "[cuSub_dispatch] unsupported lhs dtype: %d%s", Lin->dtype(), "\n"); + } + } + + } // namespace linalg_internal +} // namespace cytnx diff --git a/src/backend/linalg_internal_gpu/cuSub_internal.hpp b/src/backend/linalg_internal_gpu/cuSub_internal.hpp index 5073e3ea6..8dc83d66d 100644 --- a/src/backend/linalg_internal_gpu/cuSub_internal.hpp +++ b/src/backend/linalg_internal_gpu/cuSub_internal.hpp @@ -13,6 +13,13 @@ namespace cytnx { namespace linalg_internal { /// cuSub + void cuSub_dispatch(boost::intrusive_ptr &out, + boost::intrusive_ptr &Lin, + boost::intrusive_ptr &Rin, const unsigned long long &len, + const std::vector &shape, + const std::vector &invmapper_L, + const std::vector &invmapper_R); + void cuSub_internal_cdtcd(boost::intrusive_ptr &out, boost::intrusive_ptr &Lin, boost::intrusive_ptr &Rin, diff --git a/src/linalg/Div.cpp b/src/linalg/Div.cpp index 80d7fa0af..d31b0a606 100644 --- a/src/linalg/Div.cpp +++ b/src/linalg/Div.cpp @@ -16,22 +16,22 @@ namespace cytnx { bool icnst = false; if (Lt.shape().size() == 1 && Lt.shape()[0] == 1) { out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt.storage().size(), Lt.dtype() < Rt.dtype() ? Lt.dtype() : Rt.dtype(), Rt.device()); - // out.Init(Rt.shape(),Lt.dtype() < Rt.dtype()?Lt.dtype():Rt.dtype(),Lt.device()); + out._impl->storage() = + Storage(Rt.storage().size(), Type.type_promote(Lt.dtype(), Rt.dtype()), Rt.device()); + // out.Init(Rt.shape(),Type.type_promote(Lt.dtype(), Rt.dtype()),Lt.device()); icnst = true; } else if (Rt.shape().size() == 1 && Rt.shape()[0] == 1) { - // out.Init(Lt.shape(),Lt.dtype() < Rt.dtype()?Lt.dtype():Rt.dtype(),Lt.device()); + // out.Init(Lt.shape(),Type.type_promote(Lt.dtype(), Rt.dtype()),Lt.device()); out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Lt.storage().size(), Lt.dtype() < Rt.dtype() ? Lt.dtype() : Rt.dtype(), Lt.device()); + out._impl->storage() = + Storage(Lt.storage().size(), Type.type_promote(Lt.dtype(), Rt.dtype()), Lt.device()); icnst = true; } else { cytnx_error_msg(Lt.shape() != Rt.shape(), "[Div] The two tensors do not have the same shape.%s", "\n"); - out.Init(Lt.shape(), Lt.dtype() < Rt.dtype() ? Lt.dtype() : Rt.dtype(), Lt.device()); + out.Init(Lt.shape(), Type.type_promote(Lt.dtype(), Rt.dtype()), Lt.device()); } if ((Lt.is_contiguous() && Rt.is_contiguous()) || icnst) { @@ -53,9 +53,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - out._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, + out._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -115,9 +115,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.ComplexDouble][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -134,9 +134,8 @@ namespace cytnx { Tensor out; out._impl = Rt._impl->_clone_meta_only(); //(Rt.shape(),Type.ComplexDouble,Rt.device()); - out._impl->storage() = - Storage(Rt._impl->storage().size(), - Type.ComplexFloat < Rt.dtype() ? Type.ComplexFloat : Rt.dtype(), Rt.device()); + out._impl->storage() = Storage(Rt._impl->storage().size(), + Type.type_promote(Type.ComplexFloat, Rt.dtype()), Rt.device()); // Tensor out(Rt.shape(),Type.ComplexFloat < // Rt.dtype()?Type.ComplexFloat:Rt.dtype(),Rt.device()); @@ -152,9 +151,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.ComplexFloat][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -170,10 +169,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); //(Rt.shape(),Type.ComplexDouble,Rt.device()); - out._impl->storage() = - Storage(Rt._impl->storage().size(), Type.Double < Rt.dtype() ? Type.Double : Rt.dtype(), - Rt.device()); - // Tensor out(Rt.shape(),Type.Double < Rt.dtype()?Type.Double:Rt.dtype(),Rt.device()); + out._impl->storage() = Storage(Rt._impl->storage().size(), + Type.type_promote(Type.Double, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(),Type.type_promote(Type.Double, Rt.dtype()),Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -186,9 +184,9 @@ namespace cytnx { Rt.ptr()); } else { #ifdef UNI_GPU - cytnx::linalg_internal::lii.cuAri_ii[Type.Double][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -204,9 +202,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); //(Rt.shape(),Type.ComplexDouble,Rt.device()); - out._impl->storage() = Storage( - Rt._impl->storage().size(), Type.Float < Rt.dtype() ? Type.Float : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(),Type.Float < Rt.dtype()?Type.Float:Rt.dtype(),Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(Type.Float, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(),Type.type_promote(Type.Float, Rt.dtype()),Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -220,9 +218,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Float][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -238,9 +236,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); //(Rt.shape(),Type.ComplexDouble,Rt.device()); - out._impl->storage() = Storage( - Rt._impl->storage().size(), Type.Int64 < Rt.dtype() ? Type.Int64 : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(),Type.Int64 < Rt.dtype()?Type.Int64:Rt.dtype(),Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(Type.Int64, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(),Type.type_promote(Type.Int64, Rt.dtype()),Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -253,9 +251,9 @@ namespace cytnx { Rt.ptr()); } else { #ifdef UNI_GPU - cytnx::linalg_internal::lii.cuAri_ii[Type.Int64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -271,10 +269,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); //(Rt.shape(),Type.ComplexDouble,Rt.device()); - out._impl->storage() = - Storage(Rt._impl->storage().size(), Type.Uint64 < Rt.dtype() ? Type.Uint64 : Rt.dtype(), - Rt.device()); - // Tensor out(Rt.shape(),Type.Uint64 < Rt.dtype()?Type.Uint64:Rt.dtype(),Rt.device()); + out._impl->storage() = Storage(Rt._impl->storage().size(), + Type.type_promote(Type.Uint64, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(),Type.type_promote(Type.Uint64, Rt.dtype()),Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -288,9 +285,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Uint64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -306,9 +303,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); //(Rt.shape(),Type.ComplexDouble,Rt.device()); - out._impl->storage() = Storage( - Rt._impl->storage().size(), Type.Int32 < Rt.dtype() ? Type.Int32 : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(),Type.Int32 < Rt.dtype()?Type.Int32:Rt.dtype(),Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(Type.Int32, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(),Type.type_promote(Type.Int32, Rt.dtype()),Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -322,9 +319,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Int32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -340,10 +337,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); //(Rt.shape(),Type.ComplexDouble,Rt.device()); - out._impl->storage() = - Storage(Rt._impl->storage().size(), Type.Uint32 < Rt.dtype() ? Type.Uint32 : Rt.dtype(), - Rt.device()); - // Tensor out(Rt.shape(),Type.Uint32 < Rt.dtype()?Type.Uint32:Rt.dtype(),Rt.device()); + out._impl->storage() = Storage(Rt._impl->storage().size(), + Type.type_promote(Type.Uint32, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(),Type.type_promote(Type.Uint32, Rt.dtype()),Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -357,9 +353,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Uint32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -375,9 +371,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); //(Rt.shape(),Type.ComplexDouble,Rt.device()); - out._impl->storage() = Storage( - Rt._impl->storage().size(), Type.Int16 < Rt.dtype() ? Type.Int16 : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(),Type.Int16 < Rt.dtype()?Type.Int16:Rt.dtype(),Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(Type.Int16, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(),Type.type_promote(Type.Int16, Rt.dtype()),Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -391,9 +387,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Int16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -409,10 +405,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); //(Rt.shape(),Type.ComplexDouble,Rt.device()); - out._impl->storage() = - Storage(Rt._impl->storage().size(), Type.Uint16 < Rt.dtype() ? Type.Uint16 : Rt.dtype(), - Rt.device()); - // Tensor out(Rt.shape(),Type.Uint16 < Rt.dtype()?Type.Uint16:Rt.dtype(),Rt.device()); + out._impl->storage() = Storage(Rt._impl->storage().size(), + Type.type_promote(Type.Uint16, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(),Type.type_promote(Type.Uint16, Rt.dtype()),Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -426,9 +421,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Uint16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -444,9 +439,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); //(Rt.shape(),Type.ComplexDouble,Rt.device()); - out._impl->storage() = Storage(Rt._impl->storage().size(), - Type.Bool < Rt.dtype() ? Type.Bool : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(),Type.Bool < Rt.dtype()?Type.Bool:Rt.dtype(),Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(Type.Bool, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(),Type.type_promote(Type.Bool, Rt.dtype()),Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -460,9 +455,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Bool][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -479,8 +474,8 @@ namespace cytnx { Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt._impl->storage().size(), lc.dtype() < Rt.dtype() ? lc.dtype() : Rt.dtype(), Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(lc.dtype(), Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -499,9 +494,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[lc.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -532,9 +527,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.ComplexDouble]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -549,9 +543,8 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Lt._impl->storage().size(), - Type.ComplexFloat < Lt.dtype() ? Type.ComplexFloat : Lt.dtype(), Lt.device()); + out._impl->storage() = Storage(Lt._impl->storage().size(), + Type.type_promote(Type.ComplexFloat, Lt.dtype()), Lt.device()); // Tensor out(Lt.shape(),Type.ComplexFloat < // Lt.dtype()?Type.ComplexFloat:Lt.dtype(),Lt.device()); @@ -567,9 +560,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.ComplexFloat]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -585,10 +577,9 @@ namespace cytnx { Tensor out; out._impl = Lt._impl->_clone_meta_only(); //(Rt.shape(),Type.ComplexDouble,Rt.device()); - out._impl->storage() = - Storage(Lt._impl->storage().size(), Type.Double < Lt.dtype() ? Type.Double : Lt.dtype(), - Lt.device()); - // Tensor out(Lt.shape(),Type.Double < Lt.dtype()?Type.Double:Lt.dtype(),Lt.device()); + out._impl->storage() = Storage(Lt._impl->storage().size(), + Type.type_promote(Type.Double, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(),Type.type_promote(Type.Double, Lt.dtype()),Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -602,9 +593,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Double]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -619,9 +609,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Lt._impl->storage().size(), Type.Float < Lt.dtype() ? Type.Float : Lt.dtype(), Lt.device()); - // Tensor out(Lt.shape(),Type.Float < Lt.dtype()?Type.Float:Lt.dtype(),Lt.device()); + out._impl->storage() = + Storage(Lt._impl->storage().size(), Type.type_promote(Type.Float, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(),Type.type_promote(Type.Float, Lt.dtype()),Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -635,9 +625,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Float]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -652,9 +641,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Lt._impl->storage().size(), Type.Int64 < Lt.dtype() ? Type.Int64 : Lt.dtype(), Lt.device()); - // Tensor out(Lt.shape(),Type.Int64 < Lt.dtype()?Type.Int64:Lt.dtype(),Lt.device()); + out._impl->storage() = + Storage(Lt._impl->storage().size(), Type.type_promote(Type.Int64, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(),Type.type_promote(Type.Int64, Lt.dtype()),Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -668,9 +657,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Int64]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -685,10 +673,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Lt._impl->storage().size(), Type.Uint64 < Lt.dtype() ? Type.Uint64 : Lt.dtype(), - Lt.device()); - // Tensor out(Lt.shape(),Type.Uint64 < Lt.dtype()?Type.Uint64:Lt.dtype(),Lt.device()); + out._impl->storage() = Storage(Lt._impl->storage().size(), + Type.type_promote(Type.Uint64, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(),Type.type_promote(Type.Uint64, Lt.dtype()),Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -702,9 +689,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Uint64]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -719,9 +705,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Lt._impl->storage().size(), Type.Int32 < Lt.dtype() ? Type.Int32 : Lt.dtype(), Lt.device()); - // Tensor out(Lt.shape(),Type.Int32 < Lt.dtype()?Type.Int32:Lt.dtype(),Lt.device()); + out._impl->storage() = + Storage(Lt._impl->storage().size(), Type.type_promote(Type.Int32, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(),Type.type_promote(Type.Int32, Lt.dtype()),Lt.device()); if (Lt.device() == Device.cpu) { // std::cout << "chk" << std::endl; @@ -736,9 +722,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Int32]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -753,10 +738,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Lt._impl->storage().size(), Type.Uint32 < Lt.dtype() ? Type.Uint32 : Lt.dtype(), - Lt.device()); - // Tensor out(Lt.shape(),Type.Uint32 < Lt.dtype()?Type.Uint32:Lt.dtype(),Lt.device()); + out._impl->storage() = Storage(Lt._impl->storage().size(), + Type.type_promote(Type.Uint32, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(),Type.type_promote(Type.Uint32, Lt.dtype()),Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -770,9 +754,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Uint32]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -788,9 +771,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Lt._impl->storage().size(), Type.Int16 < Lt.dtype() ? Type.Int16 : Lt.dtype(), Lt.device()); - // Tensor out(Lt.shape(),Type.Int16 < Lt.dtype()?Type.Int16:Lt.dtype(),Lt.device()); + out._impl->storage() = + Storage(Lt._impl->storage().size(), Type.type_promote(Type.Int16, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(),Type.type_promote(Type.Int16, Lt.dtype()),Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -804,9 +787,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Int16]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -821,10 +803,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Lt._impl->storage().size(), Type.Uint16 < Lt.dtype() ? Type.Uint16 : Lt.dtype(), - Lt.device()); - // Tensor out(Lt.shape(),Type.Uint16 < Lt.dtype()?Type.Uint16:Lt.dtype(),Lt.device()); + out._impl->storage() = Storage(Lt._impl->storage().size(), + Type.type_promote(Type.Uint16, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(),Type.type_promote(Type.Uint16, Lt.dtype()),Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -838,9 +819,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Uint16]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -855,9 +835,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = Storage(Lt._impl->storage().size(), - Type.Bool < Lt.dtype() ? Type.Bool : Lt.dtype(), Lt.device()); - // Tensor out(Lt.shape(),Type.Bool < Lt.dtype()?Type.Bool:Lt.dtype(),Lt.device()); + out._impl->storage() = + Storage(Lt._impl->storage().size(), Type.type_promote(Type.Bool, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(),Type.type_promote(Type.Bool, Lt.dtype()),Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -871,9 +851,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Bool]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -890,8 +869,8 @@ namespace cytnx { Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Lt._impl->storage().size(), Lt.dtype() < rc.dtype() ? Lt.dtype() : rc.dtype(), Lt.device()); + out._impl->storage() = + Storage(Lt._impl->storage().size(), Type.type_promote(Lt.dtype(), rc.dtype()), Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -910,9 +889,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - linalg_internal::lii.cuAri_ii[Lt.dtype()][rc.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 3); + linalg_internal::cuDiv_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Div] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); diff --git a/src/linalg/Mul.cpp b/src/linalg/Mul.cpp index 76555a127..daf88a94f 100644 --- a/src/linalg/Mul.cpp +++ b/src/linalg/Mul.cpp @@ -16,22 +16,22 @@ namespace cytnx { bool icnst = false; if (Lt.shape().size() == 1 && Lt.shape()[0] == 1) { out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt.storage().size(), Lt.dtype() < Rt.dtype() ? Lt.dtype() : Rt.dtype(), Rt.device()); - // out.Init(Rt.shape(),Lt.dtype() < Rt.dtype()?Lt.dtype():Rt.dtype(),Lt.device()); + out._impl->storage() = + Storage(Rt.storage().size(), Type.type_promote(Lt.dtype(), Rt.dtype()), Rt.device()); + // out.Init(Rt.shape(),Type.type_promote(Lt.dtype(), Rt.dtype()),Lt.device()); icnst = true; } else if (Rt.shape().size() == 1 && Rt.shape()[0] == 1) { - // out.Init(Lt.shape(),Lt.dtype() < Rt.dtype()?Lt.dtype():Rt.dtype(),Lt.device()); + // out.Init(Lt.shape(),Type.type_promote(Lt.dtype(), Rt.dtype()),Lt.device()); out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Lt.storage().size(), Lt.dtype() < Rt.dtype() ? Lt.dtype() : Rt.dtype(), Lt.device()); + out._impl->storage() = + Storage(Lt.storage().size(), Type.type_promote(Lt.dtype(), Rt.dtype()), Lt.device()); icnst = true; } else { cytnx_error_msg(Lt.shape() != Rt.shape(), "[Mul] The two tensors do not have the same shape.%s", "\n"); - out.Init(Lt.shape(), Lt.dtype() < Rt.dtype() ? Lt.dtype() : Rt.dtype(), Lt.device()); + out.Init(Lt.shape(), Type.type_promote(Lt.dtype(), Rt.dtype()), Lt.device()); } if ((Lt.is_contiguous() && Rt.is_contiguous()) || icnst) { @@ -53,9 +53,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - out._impl->storage()._impl->size(), {}, {}, {}, 1); + linalg_internal::cuMul_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, + out._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Mul] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -116,9 +116,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.ComplexDouble][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + linalg_internal::cuMul_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Mul] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -134,10 +134,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Rt._impl->storage().size(), - Type.ComplexFloat < Rt.dtype() ? Type.ComplexFloat : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(), Type.ComplexFloat < Rt.dtype() ? Type.ComplexFloat : Rt.dtype(), + out._impl->storage() = Storage(Rt._impl->storage().size(), + Type.type_promote(Type.ComplexFloat, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.ComplexFloat, Rt.dtype()), // Rt.device()); if (Rt.device() == Device.cpu) { @@ -152,9 +151,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.ComplexFloat][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + linalg_internal::cuMul_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Mul] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -170,10 +169,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Rt._impl->storage().size(), Type.Double < Rt.dtype() ? Type.Double : Rt.dtype(), - Rt.device()); - // Tensor out(Rt.shape(), Type.Double < Rt.dtype() ? Type.Double : Rt.dtype(), Rt.device()); + out._impl->storage() = Storage(Rt._impl->storage().size(), + Type.type_promote(Type.Double, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Double, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -187,9 +185,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Double][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + linalg_internal::cuMul_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Mul] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -205,9 +203,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt._impl->storage().size(), Type.Float < Rt.dtype() ? Type.Float : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(), Type.Float < Rt.dtype() ? Type.Float : Rt.dtype(), Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(Type.Float, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Float, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -221,9 +219,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Float][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + linalg_internal::cuMul_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Mul] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -239,9 +237,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt._impl->storage().size(), Type.Int64 < Rt.dtype() ? Type.Int64 : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(), Type.Int64 < Rt.dtype() ? Type.Int64 : Rt.dtype(), Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(Type.Int64, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Int64, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -255,9 +253,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Int64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + linalg_internal::cuMul_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Mul] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -273,10 +271,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Rt._impl->storage().size(), Type.Uint64 < Rt.dtype() ? Type.Uint64 : Rt.dtype(), - Rt.device()); - // Tensor out(Rt.shape(), Type.Uint64 < Rt.dtype() ? Type.Uint64 : Rt.dtype(), Rt.device()); + out._impl->storage() = Storage(Rt._impl->storage().size(), + Type.type_promote(Type.Uint64, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Uint64, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -290,9 +287,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Uint64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + linalg_internal::cuMul_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Mul] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -308,9 +305,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt._impl->storage().size(), Type.Int32 < Rt.dtype() ? Type.Int32 : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(), Type.Int32 < Rt.dtype() ? Type.Int32 : Rt.dtype(), Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(Type.Int32, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Int32, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -324,9 +321,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Int32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + linalg_internal::cuMul_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Mul] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -342,10 +339,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Rt._impl->storage().size(), Type.Uint32 < Rt.dtype() ? Type.Uint32 : Rt.dtype(), - Rt.device()); - // Tensor out(Rt.shape(), Type.Uint32 < Rt.dtype() ? Type.Uint32 : Rt.dtype(), Rt.device()); + out._impl->storage() = Storage(Rt._impl->storage().size(), + Type.type_promote(Type.Uint32, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Uint32, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -359,9 +355,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Uint32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + linalg_internal::cuMul_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Mul] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -377,9 +373,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt._impl->storage().size(), Type.Int16 < Rt.dtype() ? Type.Int16 : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(), Type.Int16 < Rt.dtype() ? Type.Int16 : Rt.dtype(), Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(Type.Int16, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Int16, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -393,9 +389,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Int16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + linalg_internal::cuMul_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Mul] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -411,10 +407,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Rt._impl->storage().size(), Type.Uint16 < Rt.dtype() ? Type.Uint16 : Rt.dtype(), - Rt.device()); - // Tensor out(Rt.shape(), Type.Uint16 < Rt.dtype() ? Type.Uint16 : Rt.dtype(), Rt.device()); + out._impl->storage() = Storage(Rt._impl->storage().size(), + Type.type_promote(Type.Uint16, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Uint16, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -428,9 +423,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Uint16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + linalg_internal::cuMul_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Mul] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -446,9 +441,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage(Rt._impl->storage().size(), - Type.Bool < Rt.dtype() ? Type.Bool : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(), Type.Bool < Rt.dtype() ? Type.Bool : Rt.dtype(), Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(Type.Bool, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Bool, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -462,9 +457,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Bool][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + linalg_internal::cuMul_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Mul] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -481,8 +476,8 @@ namespace cytnx { Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt._impl->storage().size(), lc.dtype() < Rt.dtype() ? lc.dtype() : Rt.dtype(), Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(lc.dtype(), Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -501,9 +496,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[lc.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 1); + linalg_internal::cuMul_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Mul] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); diff --git a/src/linalg/Sub.cpp b/src/linalg/Sub.cpp index 0947733b6..807aab17f 100644 --- a/src/linalg/Sub.cpp +++ b/src/linalg/Sub.cpp @@ -16,20 +16,20 @@ namespace cytnx { bool icnst = false; if (Lt.shape().size() == 1 && Lt.shape()[0] == 1) { out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt.storage().size(), Lt.dtype() < Rt.dtype() ? Lt.dtype() : Rt.dtype(), Rt.device()); - // out.Init(Rt.shape(),Lt.dtype() < Rt.dtype()?Lt.dtype():Rt.dtype(),Lt.device()); + out._impl->storage() = + Storage(Rt.storage().size(), Type.type_promote(Lt.dtype(), Rt.dtype()), Rt.device()); + // out.Init(Rt.shape(),Type.type_promote(Lt.dtype(), Rt.dtype()),Lt.device()); icnst = true; } else if (Rt.shape().size() == 1 && Rt.shape()[0] == 1) { out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Lt.storage().size(), Lt.dtype() < Rt.dtype() ? Lt.dtype() : Rt.dtype(), Lt.device()); - // out.Init(Lt.shape(),Lt.dtype() < Rt.dtype()?Lt.dtype():Rt.dtype(),Lt.device()); + out._impl->storage() = + Storage(Lt.storage().size(), Type.type_promote(Lt.dtype(), Rt.dtype()), Lt.device()); + // out.Init(Lt.shape(),Type.type_promote(Lt.dtype(), Rt.dtype()),Lt.device()); icnst = true; } else { cytnx_error_msg(Lt.shape() != Rt.shape(), "[Sub] The two tensors do not have the same shape.%s", "\n"); - out.Init(Lt.shape(), Lt.dtype() < Rt.dtype() ? Lt.dtype() : Rt.dtype(), Lt.device()); + out.Init(Lt.shape(), Type.type_promote(Lt.dtype(), Rt.dtype()), Lt.device()); } if ((Lt.is_contiguous() && Rt.is_contiguous()) || icnst) { @@ -51,9 +51,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - out._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, + out._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -79,10 +79,10 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Rt._impl->storage()._impl, - out._impl->storage()._impl->size(), Lt._impl->shape(), Lt._impl->invmapper(), - Rt._impl->invmapper(), 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Rt._impl->storage()._impl, + out._impl->storage()._impl->size(), Lt._impl->shape(), + Lt._impl->invmapper(), Rt._impl->invmapper()); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -114,9 +114,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.ComplexDouble][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -132,10 +132,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Rt._impl->storage().size(), - Type.ComplexFloat < Rt.dtype() ? Type.ComplexFloat : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(), Type.ComplexFloat < Rt.dtype() ? Type.ComplexFloat : Rt.dtype(), + out._impl->storage() = Storage(Rt._impl->storage().size(), + Type.type_promote(Type.ComplexFloat, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.ComplexFloat, Rt.dtype()), // Rt.device()); if (Rt.device() == Device.cpu) { @@ -150,9 +149,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.ComplexFloat][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -168,10 +167,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Rt._impl->storage().size(), Type.Double < Rt.dtype() ? Type.Double : Rt.dtype(), - Rt.device()); - // Tensor out(Rt.shape(), Type.Double < Rt.dtype() ? Type.Double : Rt.dtype(), Rt.device()); + out._impl->storage() = Storage(Rt._impl->storage().size(), + Type.type_promote(Type.Double, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Double, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -185,9 +183,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Double][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -203,9 +201,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt._impl->storage().size(), Type.Float < Rt.dtype() ? Type.Float : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(), Type.Float < Rt.dtype() ? Type.Float : Rt.dtype(), Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(Type.Float, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Float, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -219,9 +217,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Float][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -237,9 +235,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt._impl->storage().size(), Type.Int64 < Rt.dtype() ? Type.Int64 : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(), Type.Int64 < Rt.dtype() ? Type.Int64 : Rt.dtype(), Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(Type.Int64, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Int64, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -253,9 +251,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Int64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -271,10 +269,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Rt._impl->storage().size(), Type.Uint64 < Rt.dtype() ? Type.Uint64 : Rt.dtype(), - Rt.device()); - // Tensor out(Rt.shape(), Type.Uint64 < Rt.dtype() ? Type.Uint64 : Rt.dtype(), Rt.device()); + out._impl->storage() = Storage(Rt._impl->storage().size(), + Type.type_promote(Type.Uint64, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Uint64, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -288,9 +285,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Uint64][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -306,9 +303,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt._impl->storage().size(), Type.Int32 < Rt.dtype() ? Type.Int32 : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(), Type.Int32 < Rt.dtype() ? Type.Int32 : Rt.dtype(), Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(Type.Int32, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Int32, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -322,9 +319,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Int32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -340,10 +337,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Rt._impl->storage().size(), Type.Uint32 < Rt.dtype() ? Type.Uint32 : Rt.dtype(), - Rt.device()); - // Tensor out(Rt.shape(), Type.Uint32 < Rt.dtype() ? Type.Uint32 : Rt.dtype(), Rt.device()); + out._impl->storage() = Storage(Rt._impl->storage().size(), + Type.type_promote(Type.Uint32, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Uint32, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -357,9 +353,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Uint32][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -375,9 +371,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt._impl->storage().size(), Type.Int16 < Rt.dtype() ? Type.Int16 : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(), Type.Int16 < Rt.dtype() ? Type.Int16 : Rt.dtype(), Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(Type.Int16, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Int16, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -391,9 +387,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Int16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -409,10 +405,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Rt._impl->storage().size(), Type.Uint16 < Rt.dtype() ? Type.Uint16 : Rt.dtype(), - Rt.device()); - // Tensor out(Rt.shape(), Type.Uint16 < Rt.dtype() ? Type.Uint16 : Rt.dtype(), Rt.device()); + out._impl->storage() = Storage(Rt._impl->storage().size(), + Type.type_promote(Type.Uint16, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Uint16, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -426,9 +421,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Uint16][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -444,9 +439,9 @@ namespace cytnx { Cnst.at(0) = lc; Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage(Rt._impl->storage().size(), - Type.Bool < Rt.dtype() ? Type.Bool : Rt.dtype(), Rt.device()); - // Tensor out(Rt.shape(), Type.Bool < Rt.dtype() ? Type.Bool : Rt.dtype(), Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(Type.Bool, Rt.dtype()), Rt.device()); + // Tensor out(Rt.shape(), Type.type_promote(Type.Bool, Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -460,9 +455,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Type.Bool][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -479,8 +474,8 @@ namespace cytnx { Tensor out; out._impl = Rt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Rt._impl->storage().size(), lc.dtype() < Rt.dtype() ? lc.dtype() : Rt.dtype(), Rt.device()); + out._impl->storage() = + Storage(Rt._impl->storage().size(), Type.type_promote(lc.dtype(), Rt.dtype()), Rt.device()); if (Rt.device() == Device.cpu) { std::visit( @@ -499,9 +494,9 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Rt.device())); - linalg_internal::lii.cuAri_ii[lc.dtype()][Rt.dtype()]( - out._impl->storage()._impl, Cnst._impl, Rt._impl->storage()._impl, - Rt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Cnst._impl, + Rt._impl->storage()._impl, + Rt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -533,9 +528,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.ComplexDouble]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -551,10 +545,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Lt._impl->storage().size(), - Type.ComplexFloat < Lt.dtype() ? Type.ComplexFloat : Lt.dtype(), Lt.device()); - // Tensor out(Lt.shape(), Type.ComplexFloat < Lt.dtype() ? Type.ComplexFloat : Lt.dtype(), + out._impl->storage() = Storage(Lt._impl->storage().size(), + Type.type_promote(Type.ComplexFloat, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(), Type.type_promote(Type.ComplexFloat, Lt.dtype()), // Lt.device()); if (Lt.device() == Device.cpu) { @@ -569,9 +562,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.ComplexFloat]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -587,10 +579,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Lt._impl->storage().size(), Type.Double < Lt.dtype() ? Type.Double : Lt.dtype(), - Lt.device()); - // Tensor out(Lt.shape(), Type.Double < Lt.dtype() ? Type.Double : Lt.dtype(), Lt.device()); + out._impl->storage() = Storage(Lt._impl->storage().size(), + Type.type_promote(Type.Double, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(), Type.type_promote(Type.Double, Lt.dtype()), Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -604,9 +595,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Double]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -622,9 +612,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Lt._impl->storage().size(), Type.Float < Lt.dtype() ? Type.Float : Lt.dtype(), Lt.device()); - // Tensor out(Lt.shape(), Type.Float < Lt.dtype() ? Type.Float : Lt.dtype(), Lt.device()); + out._impl->storage() = + Storage(Lt._impl->storage().size(), Type.type_promote(Type.Float, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(), Type.type_promote(Type.Float, Lt.dtype()), Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -638,9 +628,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Float]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -656,9 +645,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Lt._impl->storage().size(), Type.Int64 < Lt.dtype() ? Type.Int64 : Lt.dtype(), Lt.device()); - // Tensor out(Lt.shape(), Type.Int64 < Lt.dtype() ? Type.Int64 : Lt.dtype(), Lt.device()); + out._impl->storage() = + Storage(Lt._impl->storage().size(), Type.type_promote(Type.Int64, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(), Type.type_promote(Type.Int64, Lt.dtype()), Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -672,9 +661,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Int64]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -690,10 +678,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Lt._impl->storage().size(), Type.Uint64 < Lt.dtype() ? Type.Uint64 : Lt.dtype(), - Lt.device()); - // Tensor out(Lt.shape(), Type.Uint64 < Lt.dtype() ? Type.Uint64 : Lt.dtype(), Lt.device()); + out._impl->storage() = Storage(Lt._impl->storage().size(), + Type.type_promote(Type.Uint64, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(), Type.type_promote(Type.Uint64, Lt.dtype()), Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -707,9 +694,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Uint64]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -725,9 +711,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Lt._impl->storage().size(), Type.Int32 < Lt.dtype() ? Type.Int32 : Lt.dtype(), Lt.device()); - // Tensor out(Lt.shape(), Type.Int32 < Lt.dtype() ? Type.Int32 : Lt.dtype(), Lt.device()); + out._impl->storage() = + Storage(Lt._impl->storage().size(), Type.type_promote(Type.Int32, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(), Type.type_promote(Type.Int32, Lt.dtype()), Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -741,9 +727,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Int32]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -759,10 +744,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Lt._impl->storage().size(), Type.Uint32 < Lt.dtype() ? Type.Uint32 : Lt.dtype(), - Lt.device()); - // Tensor out(Lt.shape(), Type.Uint32 < Lt.dtype() ? Type.Uint32 : Lt.dtype(), Lt.device()); + out._impl->storage() = Storage(Lt._impl->storage().size(), + Type.type_promote(Type.Uint32, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(), Type.type_promote(Type.Uint32, Lt.dtype()), Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -776,9 +760,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Uint32]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -794,9 +777,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Lt._impl->storage().size(), Type.Int16 < Lt.dtype() ? Type.Int16 : Lt.dtype(), Lt.device()); - // Tensor out(Lt.shape(), Type.Int16 < Lt.dtype() ? Type.Int16 : Lt.dtype(), Lt.device()); + out._impl->storage() = + Storage(Lt._impl->storage().size(), Type.type_promote(Type.Int16, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(), Type.type_promote(Type.Int16, Lt.dtype()), Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -810,9 +793,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Int16]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -828,10 +810,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = - Storage(Lt._impl->storage().size(), Type.Uint16 < Lt.dtype() ? Type.Uint16 : Lt.dtype(), - Lt.device()); - // Tensor out(Lt.shape(), Type.Uint16 < Lt.dtype() ? Type.Uint16 : Lt.dtype(), Lt.device()); + out._impl->storage() = Storage(Lt._impl->storage().size(), + Type.type_promote(Type.Uint16, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(), Type.type_promote(Type.Uint16, Lt.dtype()), Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -845,9 +826,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Uint16]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -863,9 +843,9 @@ namespace cytnx { Cnst.at(0) = rc; Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = Storage(Lt._impl->storage().size(), - Type.Bool < Lt.dtype() ? Type.Bool : Lt.dtype(), Lt.device()); - // Tensor out(Lt.shape(), Type.Bool < Lt.dtype() ? Type.Bool : Lt.dtype(), Lt.device()); + out._impl->storage() = + Storage(Lt._impl->storage().size(), Type.type_promote(Type.Bool, Lt.dtype()), Lt.device()); + // Tensor out(Lt.shape(), Type.type_promote(Type.Bool, Lt.dtype()), Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -879,9 +859,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Type.Bool]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); @@ -898,8 +877,8 @@ namespace cytnx { Tensor out; out._impl = Lt._impl->_clone_meta_only(); - out._impl->storage() = Storage( - Lt._impl->storage().size(), Lt.dtype() < rc.dtype() ? Lt.dtype() : rc.dtype(), Lt.device()); + out._impl->storage() = + Storage(Lt._impl->storage().size(), Type.type_promote(Lt.dtype(), rc.dtype()), Lt.device()); if (Lt.device() == Device.cpu) { std::visit( @@ -918,9 +897,8 @@ namespace cytnx { } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(Lt.device())); - linalg_internal::lii.cuAri_ii[Lt.dtype()][rc.dtype()]( - out._impl->storage()._impl, Lt._impl->storage()._impl, Cnst._impl, - Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + linalg_internal::cuSub_dispatch(out._impl->storage()._impl, Lt._impl->storage()._impl, + Cnst._impl, Lt._impl->storage()._impl->size(), {}, {}, {}); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); diff --git a/tests/gpu/linalg_test/Div_test.cpp b/tests/gpu/linalg_test/Div_test.cpp index 7e712a0b2..9f73063b4 100644 --- a/tests/gpu/linalg_test/Div_test.cpp +++ b/tests/gpu/linalg_test/Div_test.cpp @@ -199,6 +199,44 @@ namespace DivTest { } } + TEST(DivMixedDtypeTest, gpu_tensor_div_tensor_mixed_unsigned_signed_type_promote) { + cytnx::Tensor lhs = cytnx::arange(1, 7, 1, cytnx::Type.Uint32).reshape({2, 3}); + cytnx::Tensor rhs = cytnx::arange(1, 7, 1, cytnx::Type.Int16).reshape({2, 3}); + lhs = lhs.to(cytnx::Device.cuda); + rhs = rhs.to(cytnx::Device.cuda); + + cytnx::Tensor gpu_result = cytnx::linalg::Div(lhs, rhs); + cytnx::Tensor expected_cpu = + cytnx::linalg::Div(lhs.to(cytnx::Device.cpu), rhs.to(cytnx::Device.cpu)); + cytnx::Tensor gpu_result_cpu = gpu_result.to(cytnx::Device.cpu); + + EXPECT_EQ(gpu_result.dtype(), expected_cpu.dtype()); + EXPECT_TRUE(cytnx::TestTools::AreNearlyEqTensor(gpu_result_cpu, expected_cpu, + GetTolerance(gpu_result.dtype()))); + } + + TEST(DivMixedDtypeTest, gpu_scalar_div_tensor_mixed_unsigned_signed_type_promote) { + const cytnx::cytnx_uint32 scalar = 12; + cytnx::Tensor rhs = + cytnx::arange(1, 7, 1, cytnx::Type.Int16).reshape({2, 3}).to(cytnx::Device.cuda); + + cytnx::Tensor gpu_result_l = cytnx::linalg::Div(scalar, rhs); + cytnx::Tensor gpu_result_r = cytnx::linalg::Div(rhs, scalar); + + cytnx::Tensor rhs_cpu = rhs.to(cytnx::Device.cpu); + cytnx::Tensor expected_l = cytnx::linalg::Div(scalar, rhs_cpu); + cytnx::Tensor expected_r = cytnx::linalg::Div(rhs_cpu, scalar); + cytnx::Tensor gpu_result_l_cpu = gpu_result_l.to(cytnx::Device.cpu); + cytnx::Tensor gpu_result_r_cpu = gpu_result_r.to(cytnx::Device.cpu); + + EXPECT_EQ(gpu_result_l.dtype(), expected_l.dtype()); + EXPECT_EQ(gpu_result_r.dtype(), expected_r.dtype()); + EXPECT_TRUE(cytnx::TestTools::AreNearlyEqTensor(gpu_result_l_cpu, expected_l, + GetTolerance(gpu_result_l.dtype()))); + EXPECT_TRUE(cytnx::TestTools::AreNearlyEqTensor(gpu_result_r_cpu, expected_r, + GetTolerance(gpu_result_r.dtype()))); + } + INSTANTIATE_TEST_SUITE_P(DivTests, DivTestAllShapes, ::testing::ValuesIn(GetTestShapes())); } // namespace DivTest diff --git a/tests/gpu/linalg_test/Mul_test.cpp b/tests/gpu/linalg_test/Mul_test.cpp index be7e75a35..e8207dbc4 100644 --- a/tests/gpu/linalg_test/Mul_test.cpp +++ b/tests/gpu/linalg_test/Mul_test.cpp @@ -128,6 +128,41 @@ namespace MulTest { } } + TEST(MulMixedDtypeTest, gpu_tensor_mul_tensor_mixed_unsigned_signed_type_promote) { + cytnx::Tensor lhs = cytnx::arange(0, 6, 1, cytnx::Type.Uint32).reshape({2, 3}); + cytnx::Tensor rhs = cytnx::arange(0, 6, 1, cytnx::Type.Int16).reshape({2, 3}); + lhs = lhs.to(cytnx::Device.cuda); + rhs = rhs.to(cytnx::Device.cuda); + + cytnx::Tensor gpu_result = cytnx::linalg::Mul(lhs, rhs); + cytnx::Tensor expected_cpu = + cytnx::linalg::Mul(lhs.to(cytnx::Device.cpu), rhs.to(cytnx::Device.cpu)); + cytnx::Tensor gpu_result_cpu = gpu_result.to(cytnx::Device.cpu); + + EXPECT_EQ(gpu_result.dtype(), expected_cpu.dtype()); + EXPECT_TRUE(cytnx::TestTools::AreNearlyEqTensor(gpu_result_cpu, expected_cpu, 1e-6)); + } + + TEST(MulMixedDtypeTest, gpu_scalar_mul_tensor_mixed_unsigned_signed_type_promote) { + const cytnx::cytnx_uint32 scalar = 5; + cytnx::Tensor rhs = + cytnx::arange(0, 6, 1, cytnx::Type.Int16).reshape({2, 3}).to(cytnx::Device.cuda); + + cytnx::Tensor gpu_result_l = cytnx::linalg::Mul(scalar, rhs); + cytnx::Tensor gpu_result_r = cytnx::linalg::Mul(rhs, scalar); + + cytnx::Tensor rhs_cpu = rhs.to(cytnx::Device.cpu); + cytnx::Tensor expected_l = cytnx::linalg::Mul(scalar, rhs_cpu); + cytnx::Tensor expected_r = cytnx::linalg::Mul(rhs_cpu, scalar); + cytnx::Tensor gpu_result_l_cpu = gpu_result_l.to(cytnx::Device.cpu); + cytnx::Tensor gpu_result_r_cpu = gpu_result_r.to(cytnx::Device.cpu); + + EXPECT_EQ(gpu_result_l.dtype(), expected_l.dtype()); + EXPECT_EQ(gpu_result_r.dtype(), expected_r.dtype()); + EXPECT_TRUE(cytnx::TestTools::AreNearlyEqTensor(gpu_result_l_cpu, expected_l, 1e-6)); + EXPECT_TRUE(cytnx::TestTools::AreNearlyEqTensor(gpu_result_r_cpu, expected_r, 1e-6)); + } + INSTANTIATE_TEST_SUITE_P(MulTests, MulTestAllShapes, ::testing::ValuesIn(GetTestShapes())); ::testing::AssertionResult CheckMulResult(const cytnx::Tensor& gpu_result, diff --git a/tests/gpu/linalg_test/Sub_test.cpp b/tests/gpu/linalg_test/Sub_test.cpp index a1a97261a..3e7ad3d61 100644 --- a/tests/gpu/linalg_test/Sub_test.cpp +++ b/tests/gpu/linalg_test/Sub_test.cpp @@ -128,6 +128,41 @@ namespace SubTest { } } + TEST(SubMixedDtypeTest, gpu_tensor_sub_tensor_mixed_unsigned_signed_type_promote) { + cytnx::Tensor lhs = cytnx::arange(0, 6, 1, cytnx::Type.Uint32).reshape({2, 3}); + cytnx::Tensor rhs = cytnx::arange(0, 6, 1, cytnx::Type.Int16).reshape({2, 3}); + lhs = lhs.to(cytnx::Device.cuda); + rhs = rhs.to(cytnx::Device.cuda); + + cytnx::Tensor gpu_result = cytnx::linalg::Sub(lhs, rhs); + cytnx::Tensor expected_cpu = + cytnx::linalg::Sub(lhs.to(cytnx::Device.cpu), rhs.to(cytnx::Device.cpu)); + cytnx::Tensor gpu_result_cpu = gpu_result.to(cytnx::Device.cpu); + + EXPECT_EQ(gpu_result.dtype(), expected_cpu.dtype()); + EXPECT_TRUE(cytnx::TestTools::AreNearlyEqTensor(gpu_result_cpu, expected_cpu, 1e-6)); + } + + TEST(SubMixedDtypeTest, gpu_scalar_sub_tensor_mixed_unsigned_signed_type_promote) { + const cytnx::cytnx_uint32 scalar = 5; + cytnx::Tensor rhs = + cytnx::arange(0, 6, 1, cytnx::Type.Int16).reshape({2, 3}).to(cytnx::Device.cuda); + + cytnx::Tensor gpu_result_l = cytnx::linalg::Sub(scalar, rhs); + cytnx::Tensor gpu_result_r = cytnx::linalg::Sub(rhs, scalar); + + cytnx::Tensor rhs_cpu = rhs.to(cytnx::Device.cpu); + cytnx::Tensor expected_l = cytnx::linalg::Sub(scalar, rhs_cpu); + cytnx::Tensor expected_r = cytnx::linalg::Sub(rhs_cpu, scalar); + cytnx::Tensor gpu_result_l_cpu = gpu_result_l.to(cytnx::Device.cpu); + cytnx::Tensor gpu_result_r_cpu = gpu_result_r.to(cytnx::Device.cpu); + + EXPECT_EQ(gpu_result_l.dtype(), expected_l.dtype()); + EXPECT_EQ(gpu_result_r.dtype(), expected_r.dtype()); + EXPECT_TRUE(cytnx::TestTools::AreNearlyEqTensor(gpu_result_l_cpu, expected_l, 1e-6)); + EXPECT_TRUE(cytnx::TestTools::AreNearlyEqTensor(gpu_result_r_cpu, expected_r, 1e-6)); + } + INSTANTIATE_TEST_SUITE_P(SubTests, SubTestAllShapes, ::testing::ValuesIn(GetTestShapes())); ::testing::AssertionResult CheckSubResult(const cytnx::Tensor& gpu_result,