array2d
diff --git a/‎.github/ISSUE_TEMPLATE/operator.md‎
Lines changed: 29 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/operator.md‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎doc/excuter/op-mem-cuda/list.md‎
Lines changed: 1 addition & 1 deletion b/‎doc/excuter/op-mem-cuda/list.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/excuter/op-mem-ompsimd/list.md‎
Lines changed: 4 additions & 1 deletion b/‎doc/excuter/op-mem-ompsimd/list.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎excuter/op-mem-cuda/src/client/tfs.cpp‎
Lines changed: 2 additions & 2 deletions b/‎excuter/op-mem-cuda/src/client/tfs.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎excuter/op-mem-cuda/src/deepx/tensorfunc/cuda.hpp‎
Lines changed: 6 additions & 0 deletions b/‎excuter/op-mem-cuda/src/deepx/tensorfunc/cuda.hpp‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cu‎
Lines changed: 1 addition & 0 deletions b/‎excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cu‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎excuter/op-mem-cuda/src/deepx/tensorfunc/tensorlife_miaobyte.hpp‎
Lines changed: 2 additions & 2 deletions b/‎excuter/op-mem-cuda/src/deepx/tensorfunc/tensorlife_miaobyte.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp‎
Lines changed: 3 additions & 0 deletions b/‎excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎excuter/op-mem-cuda/src/deepx/tf/elementwise_compare.hpp‎
Lines changed: 3 additions & 3 deletions b/‎excuter/op-mem-cuda/src/deepx/tf/elementwise_compare.hpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎excuter/op-mem-ompsimd/src/client/tfs.cpp‎
Lines changed: 29 additions & 2 deletions b/‎excuter/op-mem-ompsimd/src/client/tfs.cpp‎
Lines changed: 29 additions & 2 deletions
@@ -0,0 +1,29 @@
+---
+name: 算子新增
+about: 用于提交新的算子实现请求
+title: '[算子] '
+labels: enhancement, operator
+assignees: ''
+---
+
+## 算子新增
+该算子数学表达为
+
+## 影响组件
+
+### front
+1. 
+2.
+
+### 引擎
+1. 
+2. 
+
+## 其他叙述
+
+<!-- 请在此处添加其他相关信息，如：
+- 参考实现（如PyTorch中的实现）
+- 性能要求
+- 测试用例
+- 其他注意事项
+-->
@@ -80,7 +80,7 @@
 | equal | miaobyte | T1==T2->mask | equal(tensor<any> A, tensor<any> B, var<float32> epsilon)->(tensor<bool> mask) |
 | mulscalar | miaobyte | T3=T1*scalar | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
 | div | miaobyte | T3=T1/T2 | div(tensor<any> A, tensor<any> B)->(tensor<any> C) |
-| invert | miaobyte | T3=~T1 | invert(tensor<int64|int32|int16|int8> A)->(tensor<int64|int32|int16|int8> C) |
+| invert | miaobyte | T3=~T1 | invert(tensor<int64|int32|int16|int8|bool> A)->(tensor<int64|int32|int16|int8|bool> C) |
 | max | miaobyte | T3=max(T1, T2) | max(tensor<any> A, tensor<any> B)->(tensor<any> C) |
 | pow | miaobyte | T3=pow(T1, T2) | pow(tensor<float64|float32> A, tensor<float64|float32> B)->(tensor<float64|float32> C) |
 
 
@@ -56,11 +56,14 @@
 | equalscalar | miaobyte | mask=equal(T1,scalar) | equalscalar(tensor<any> A, var<any> scalar, var<float32> eposilon)->(tensor<bool> mask) |
 | min | miaobyte | T3=min(T1,T2) | min(tensor<any> A, tensor<any> B)->(tensor<any> C) |
 | maxscalar | miaobyte | T3=max(T1,scalar) | maxscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
+| tan | miaobyte | T3=tan(T1) | tan(tensor<any> A)->(tensor<any> C) |
+| sin | miaobyte | T3=sin(T1) | sin(tensor<any> A)->(tensor<any> C) |
 | divscalar | miaobyte | T3=T1/scalar | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
 | log | miaobyte | T3=log(T1) | log(tensor<any> A)->(tensor<any> C) |
 | addscalar | miaobyte | T3=T1+scalar | addscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) |
 | greater | miaobyte | mask=greater(T1,T2) | greater(tensor<any> A, tensor<any> B)->(tensor<bool> mask) |
 | lessscalar | miaobyte | mask=less(T1,scalar) | lessscalar(tensor<any> A, var<any> scalar)->(tensor<bool> mask) |
+| cos | miaobyte | T3=cos(T1) | cos(tensor<any> A)->(tensor<any> C) |
 | notequalscalar | miaobyte | mask=notequal(T1,scalar) | notequalscalar(tensor<any> A, var<any> scalar, var<float32> epsilon)->(tensor<bool> mask) |
 | minscalar | miaobyte | T3=min(T1,scalar) | minscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
 | rpowscalar | miaobyte | T3=scalar^T1 | rpowscalar(var<float32> scalar, tensor<any> A)->(tensor<any> C) |
@@ -78,7 +81,7 @@
 | equal | miaobyte | equal(T1,T2)->mask | equal(tensor<any> A, tensor<any> B, var<float32> eposilon)->(tensor<bool> mask) |
 | mulscalar | miaobyte | T3=T1*scalar | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
 | div | miaobyte | T3=T1/T2 | div(tensor<any> A, tensor<any> B)->(tensor<any> C) |
-| invert | miaobyte | T3=~T1 | invert(tensor<int64|int32|int16|int8> A)->(tensor<int64|int32|int16|int8> C) |
+| invert | miaobyte | T3=~T1 | invert(tensor<int64|int32|int16|int8|bool> A)->(tensor<int64|int32|int16|int8|bool> C) |
 | max | miaobyte | T3=max(T1,T2) | max(tensor<any> A, tensor<any> B)->(tensor<any> C) |
 | pow | miaobyte | T3=T1^T2 | pow(tensor<any> A, tensor<any> B)->(tensor<any> C) |
 
 
@@ -280,11 +280,11 @@ namespace deepx::tf
         // invert
         tffactory.add_tf(std::make_shared<Invert<miaobyte>>(vector<Param>(
                                                                 {
-                                                                    Param("A", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8),
+                                                                    Param("A", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8|Precision::Bool),
                                                                 }),
                                                             vector<Param>(
                                                                 {
-                                                                    Param("C", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8),
+                                                                    Param("C", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8|Precision::Bool),
                                                                 })));
 
         tffactory.add_tf(std::make_shared<Sqrt<miaobyte>>(vector<Param>(
 
@@ -80,6 +80,12 @@ namespace deepx::tensorfunc
         return {size, host_data};
     }
 
+    inline void throwcudaerror(const std::string& msg,cudaError_t err){
+       if (err != cudaSuccess)
+        {
+            throw std::runtime_error(msg + "\n" + std::string(cudaGetErrorString(err)));
+        }
+    }
 }
 
 #endif
@@ -406,6 +406,7 @@ namespace deepx::tensorfunc
     template void launch_invert<int32_t>(const int32_t *a, int32_t *c, const int size);
     template void launch_invert<int16_t>(const int16_t *a, int16_t *c, const int size);
     template void launch_invert<int8_t>(const int8_t *a, int8_t *c, const int size);
+    template void launch_invert<bool>(const bool *a, bool *c, const int size);
 
 }
 
 
@@ -19,8 +19,8 @@ namespace deepx::tensorfunc
         T *data;
         cudaError_t err = cudaMalloc(&data, size * sizeof(T));
         if (err != cudaSuccess)
-        {
-            throw std::runtime_error("Failed to allocate Unified Memory");
+        {   
+            throwcudaerror("Failed to cudaMalloc "+std::to_string(size) +" "+ precision_str(precision<T>()),err);
         }
         return data;
     }
 
@@ -1026,6 +1026,9 @@ namespace deepx::tf
             case Precision::Int8:
                 tensorfunc::invert<Author>(*mem->gettensor<int8_t>(this->args[0].textvalue), *mem->gettensor<int8_t>(this->returns[0].textvalue));
                 break;
+            case Precision::Bool:
+                tensorfunc::invert<Author>(*mem->gettensor<bool>(this->args[0].textvalue), *mem->gettensor<bool>(this->returns[0].textvalue));
+                break;
             default:
                 error = "Unsupported dtype: " + precision_str(a_type);
                 return 1;
 
@@ -636,7 +636,7 @@ namespace deepx::tf
         {
             Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype;
             Precision mask_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;
-            if (a_type != mask_type || mask_type != Precision::Bool)
+            if (mask_type != Precision::Bool)
             {
                 error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(mask_type);
                 return 1;
@@ -769,7 +769,7 @@ namespace deepx::tf
         {
             Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype;
             Precision mask_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;
-            if (a_type != mask_type || mask_type != Precision::Bool)
+            if (mask_type != Precision::Bool)
             {
                 error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(mask_type);
                 return 1;
@@ -916,7 +916,7 @@ namespace deepx::tf
                 }
                 else
                 {
-                    tensorfunc::Switch<Author, int8_t,int32_t>(mem->gettensors<int8_t>(this->getvector<string>(0)), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int8_t>(this->returns[0].textvalue));
+                    tensorfunc::Switch<Author, int8_t,int32_t>(mem->gettensors<int8_t>(this->getvector<string>(0)),     *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int8_t>(this->returns[0].textvalue));
                 }
                 break;
             case Precision::Bool:
 
@@ -299,11 +299,11 @@ namespace deepx::tf
         // invert author=miaobyte
         tffactory.add_tf(std::make_shared<Invert<miaobyte>>(vector<Param>(
                                                                 {
-                                                                    Param("A", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8),
+                                                                    Param("A", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8|Precision::Bool),
                                                                 }),
                                                             vector<Param>(
                                                                 {
-                                                                    Param("C", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8),
+                                                                    Param("C", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8|Precision::Bool),
                                                                 })));
         // sqrt author=miaobyte
         tffactory.add_tf(std::make_shared<Sqrt<miaobyte>>(vector<Param>(
@@ -364,6 +364,33 @@ namespace deepx::tf
                                                              {
                                                                  Param("C", DataCategory::Tensor, Precision::Any),
                                                              })));
+        // sin author=miaobyte
+        tffactory.add_tf(std::make_shared<Sin<miaobyte>>(vector<Param>(
+                                                             {
+                                                                 Param("A", DataCategory::Tensor, Precision::Any),
+                                                             }),
+                                                         vector<Param>(
+                                                             {
+                                                                 Param("C", DataCategory::Tensor, Precision::Any),
+                                                             })));
+        // cos author=miaobyte
+        tffactory.add_tf(std::make_shared<Cos<miaobyte>>(vector<Param>(
+                                                             {
+                                                                 Param("A", DataCategory::Tensor, Precision::Any),
+                                                             }),
+                                                            vector<Param>(
+                                                             {
+                                                                 Param("C", DataCategory::Tensor, Precision::Any),
+                                                             })));
+        // tan author=miaobyte
+        tffactory.add_tf(std::make_shared<Tan<miaobyte>>(vector<Param>(
+                                                             {
+                                                                 Param("A", DataCategory::Tensor, Precision::Any),
+                                                             }),
+                                                         vector<Param>(
+                                                             {
+                                                                 Param("C", DataCategory::Tensor, Precision::Any),
+                                                             })));
         // max author=miaobyte
         tffactory.add_tf(std::make_shared<Max<miaobyte>>(vector<Param>(
                                                              {
Original file line number	Diff line number	Diff line change
`@@ -280,11 +280,11 @@ namespace deepx::tf`
`280`	`280`	`// invert`
`281`	`281`	`tffactory.add_tf(std::make_shared<Invert<miaobyte>>(vector<Param>(`
`282`	`282`	`{`
`283`		`- Param("A", DataCategory::Tensor, Precision::Int64 \| Precision::Int32 \| Precision::Int16 \| Precision::Int8),`
	`283`	`+ Param("A", DataCategory::Tensor, Precision::Int64 \| Precision::Int32 \| Precision::Int16 \| Precision::Int8\|Precision::Bool),`
`284`	`284`	`}),`
`285`	`285`	`vector<Param>(`
`286`	`286`	`{`
`287`		`- Param("C", DataCategory::Tensor, Precision::Int64 \| Precision::Int32 \| Precision::Int16 \| Precision::Int8),`
	`287`	`+ Param("C", DataCategory::Tensor, Precision::Int64 \| Precision::Int32 \| Precision::Int16 \| Precision::Int8\|Precision::Bool),`
`288`	`288`	`})));`
`289`	`289`
`290`	`290`	`tffactory.add_tf(std::make_shared<Sqrt<miaobyte>>(vector<Param>(`
Original file line number	Diff line number	Diff line change
`@@ -80,6 +80,12 @@ namespace deepx::tensorfunc`
`80`	`80`	`return {size, host_data};`
`81`	`81`	`}`
`82`	`82`
	`83`	`+ inline void throwcudaerror(const std::string& msg,cudaError_t err){`
	`84`	`+ if (err != cudaSuccess)`
	`85`	`+ {`
	`86`	`+ throw std::runtime_error(msg + "\n" + std::string(cudaGetErrorString(err)));`
	`87`	`+ }`
	`88`	`+ }`
`83`	`89`	`}`
`84`	`90`
`85`	`91`	`#endif`
Original file line number	Diff line number	Diff line change
`@@ -406,6 +406,7 @@ namespace deepx::tensorfunc`
`406`	`406`	`template void launch_invert<int32_t>(const int32_t a, int32_t c, const int size);`
`407`	`407`	`template void launch_invert<int16_t>(const int16_t a, int16_t c, const int size);`
`408`	`408`	`template void launch_invert<int8_t>(const int8_t a, int8_t c, const int size);`
	`409`	`+ template void launch_invert<bool>(const bool a, bool c, const int size);`
`409`	`410`
`410`	`411`	`}`
`411`	`412`
Original file line number	Diff line number	Diff line change
`@@ -19,8 +19,8 @@ namespace deepx::tensorfunc`
`19`	`19`	`T *data;`
`20`	`20`	`cudaError_t err = cudaMalloc(&data, size * sizeof(T));`
`21`	`21`	`if (err != cudaSuccess)`
`22`		`- {`
`23`		`- throw std::runtime_error("Failed to allocate Unified Memory");`
	`22`	`+ {`
	`23`	`+ throwcudaerror("Failed to cudaMalloc "+std::to_string(size) +" "+ precision_str(precision<T>()),err);`
`24`	`24`	`}`
`25`	`25`	`return data;`
`26`	`26`	`}`
Original file line number	Diff line number	Diff line change
`@@ -636,7 +636,7 @@ namespace deepx::tf`
`636`	`636`	`{`
`637`	`637`	`Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype;`
`638`	`638`	`Precision mask_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;`
`639`		`- if (a_type != mask_type \|\| mask_type != Precision::Bool)`
	`639`	`+ if (mask_type != Precision::Bool)`
`640`	`640`	`{`
`641`	`641`	`error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(mask_type);`
`642`	`642`	`return 1;`
`@@ -769,7 +769,7 @@ namespace deepx::tf`
`769`	`769`	`{`
`770`	`770`	`Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype;`
`771`	`771`	`Precision mask_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;`
`772`		`- if (a_type != mask_type \|\| mask_type != Precision::Bool)`
	`772`	`+ if (mask_type != Precision::Bool)`
`773`	`773`	`{`
`774`	`774`	`error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(mask_type);`
`775`	`775`	`return 1;`
`@@ -916,7 +916,7 @@ namespace deepx::tf`
`916`	`916`	`}`
`917`	`917`	`else`
`918`	`918`	`{`
`919`		`- tensorfunc::Switch<Author, int8_t,int32_t>(mem->gettensors<int8_t>(this->getvector<string>(0)), mem->gettensor<int32_t>(this->args[1].textvalue), mem->gettensor<int8_t>(this->returns[0].textvalue));`
	`919`	`+ tensorfunc::Switch<Author, int8_t,int32_t>(mem->gettensors<int8_t>(this->getvector<string>(0)), mem->gettensor<int32_t>(this->args[1].textvalue), mem->gettensor<int8_t>(this->returns[0].textvalue));`
`920`	`920`	`}`
`921`	`921`	`break;`
`922`	`922`	`case Precision::Bool:`