Skip to content

Commit 4ef769f

Browse files
authored
rope,embedding验证 (#59)
* LlamaRotaryEmbedding: 验证中,目前和torch还不太一致 * rope,embedding验证:
1 parent 5409286 commit 4ef769f

24 files changed

Lines changed: 252 additions & 175 deletions

File tree

.github/ISSUE_TEMPLATE/operator.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
---
2+
name: 算子新增
3+
about: 用于提交新的算子实现请求
4+
title: '[算子] '
5+
labels: enhancement, operator
6+
assignees: ''
7+
---
8+
9+
## 算子新增
10+
该算子数学表达为
11+
12+
## 影响组件
13+
14+
### front
15+
1.
16+
2.
17+
18+
### 引擎
19+
1.
20+
2.
21+
22+
## 其他叙述
23+
24+
<!-- 请在此处添加其他相关信息,如:
25+
- 参考实现(如PyTorch中的实现)
26+
- 性能要求
27+
- 测试用例
28+
- 其他注意事项
29+
-->

doc/excuter/op-mem-cuda/list.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
| equal | miaobyte | T1==T2->mask | equal(tensor<any> A, tensor<any> B, var<float32> epsilon)->(tensor<bool> mask) |
8181
| mulscalar | miaobyte | T3=T1*scalar | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
8282
| div | miaobyte | T3=T1/T2 | div(tensor<any> A, tensor<any> B)->(tensor<any> C) |
83-
| invert | miaobyte | T3=~T1 | invert(tensor<int64|int32|int16|int8> A)->(tensor<int64|int32|int16|int8> C) |
83+
| invert | miaobyte | T3=~T1 | invert(tensor<int64|int32|int16|int8|bool> A)->(tensor<int64|int32|int16|int8|bool> C) |
8484
| max | miaobyte | T3=max(T1, T2) | max(tensor<any> A, tensor<any> B)->(tensor<any> C) |
8585
| pow | miaobyte | T3=pow(T1, T2) | pow(tensor<float64|float32> A, tensor<float64|float32> B)->(tensor<float64|float32> C) |
8686

doc/excuter/op-mem-ompsimd/list.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,14 @@
5656
| equalscalar | miaobyte | mask=equal(T1,scalar) | equalscalar(tensor<any> A, var<any> scalar, var<float32> eposilon)->(tensor<bool> mask) |
5757
| min | miaobyte | T3=min(T1,T2) | min(tensor<any> A, tensor<any> B)->(tensor<any> C) |
5858
| maxscalar | miaobyte | T3=max(T1,scalar) | maxscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
59+
| tan | miaobyte | T3=tan(T1) | tan(tensor<any> A)->(tensor<any> C) |
60+
| sin | miaobyte | T3=sin(T1) | sin(tensor<any> A)->(tensor<any> C) |
5961
| divscalar | miaobyte | T3=T1/scalar | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
6062
| log | miaobyte | T3=log(T1) | log(tensor<any> A)->(tensor<any> C) |
6163
| addscalar | miaobyte | T3=T1+scalar | addscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) |
6264
| greater | miaobyte | mask=greater(T1,T2) | greater(tensor<any> A, tensor<any> B)->(tensor<bool> mask) |
6365
| lessscalar | miaobyte | mask=less(T1,scalar) | lessscalar(tensor<any> A, var<any> scalar)->(tensor<bool> mask) |
66+
| cos | miaobyte | T3=cos(T1) | cos(tensor<any> A)->(tensor<any> C) |
6467
| notequalscalar | miaobyte | mask=notequal(T1,scalar) | notequalscalar(tensor<any> A, var<any> scalar, var<float32> epsilon)->(tensor<bool> mask) |
6568
| minscalar | miaobyte | T3=min(T1,scalar) | minscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
6669
| rpowscalar | miaobyte | T3=scalar^T1 | rpowscalar(var<float32> scalar, tensor<any> A)->(tensor<any> C) |
@@ -78,7 +81,7 @@
7881
| equal | miaobyte | equal(T1,T2)->mask | equal(tensor<any> A, tensor<any> B, var<float32> eposilon)->(tensor<bool> mask) |
7982
| mulscalar | miaobyte | T3=T1*scalar | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
8083
| div | miaobyte | T3=T1/T2 | div(tensor<any> A, tensor<any> B)->(tensor<any> C) |
81-
| invert | miaobyte | T3=~T1 | invert(tensor<int64|int32|int16|int8> A)->(tensor<int64|int32|int16|int8> C) |
84+
| invert | miaobyte | T3=~T1 | invert(tensor<int64|int32|int16|int8|bool> A)->(tensor<int64|int32|int16|int8|bool> C) |
8285
| max | miaobyte | T3=max(T1,T2) | max(tensor<any> A, tensor<any> B)->(tensor<any> C) |
8386
| pow | miaobyte | T3=T1^T2 | pow(tensor<any> A, tensor<any> B)->(tensor<any> C) |
8487

excuter/op-mem-cuda/src/client/tfs.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,11 +280,11 @@ namespace deepx::tf
280280
// invert
281281
tffactory.add_tf(std::make_shared<Invert<miaobyte>>(vector<Param>(
282282
{
283-
Param("A", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8),
283+
Param("A", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8|Precision::Bool),
284284
}),
285285
vector<Param>(
286286
{
287-
Param("C", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8),
287+
Param("C", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8|Precision::Bool),
288288
})));
289289

290290
tffactory.add_tf(std::make_shared<Sqrt<miaobyte>>(vector<Param>(

excuter/op-mem-cuda/src/deepx/tensorfunc/cuda.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,12 @@ namespace deepx::tensorfunc
8080
return {size, host_data};
8181
}
8282

83+
inline void throwcudaerror(const std::string& msg,cudaError_t err){
84+
if (err != cudaSuccess)
85+
{
86+
throw std::runtime_error(msg + "\n" + std::string(cudaGetErrorString(err)));
87+
}
88+
}
8389
}
8490

8591
#endif

excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ namespace deepx::tensorfunc
406406
template void launch_invert<int32_t>(const int32_t *a, int32_t *c, const int size);
407407
template void launch_invert<int16_t>(const int16_t *a, int16_t *c, const int size);
408408
template void launch_invert<int8_t>(const int8_t *a, int8_t *c, const int size);
409+
template void launch_invert<bool>(const bool *a, bool *c, const int size);
409410

410411
}
411412

excuter/op-mem-cuda/src/deepx/tensorfunc/tensorlife_miaobyte.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ namespace deepx::tensorfunc
1919
T *data;
2020
cudaError_t err = cudaMalloc(&data, size * sizeof(T));
2121
if (err != cudaSuccess)
22-
{
23-
throw std::runtime_error("Failed to allocate Unified Memory");
22+
{
23+
throwcudaerror("Failed to cudaMalloc "+std::to_string(size) +" "+ precision_str(precision<T>()),err);
2424
}
2525
return data;
2626
}

excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,9 @@ namespace deepx::tf
10261026
case Precision::Int8:
10271027
tensorfunc::invert<Author>(*mem->gettensor<int8_t>(this->args[0].textvalue), *mem->gettensor<int8_t>(this->returns[0].textvalue));
10281028
break;
1029+
case Precision::Bool:
1030+
tensorfunc::invert<Author>(*mem->gettensor<bool>(this->args[0].textvalue), *mem->gettensor<bool>(this->returns[0].textvalue));
1031+
break;
10291032
default:
10301033
error = "Unsupported dtype: " + precision_str(a_type);
10311034
return 1;

excuter/op-mem-cuda/src/deepx/tf/elementwise_compare.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@ namespace deepx::tf
636636
{
637637
Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype;
638638
Precision mask_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;
639-
if (a_type != mask_type || mask_type != Precision::Bool)
639+
if (mask_type != Precision::Bool)
640640
{
641641
error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(mask_type);
642642
return 1;
@@ -769,7 +769,7 @@ namespace deepx::tf
769769
{
770770
Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype;
771771
Precision mask_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;
772-
if (a_type != mask_type || mask_type != Precision::Bool)
772+
if (mask_type != Precision::Bool)
773773
{
774774
error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(mask_type);
775775
return 1;
@@ -916,7 +916,7 @@ namespace deepx::tf
916916
}
917917
else
918918
{
919-
tensorfunc::Switch<Author, int8_t,int32_t>(mem->gettensors<int8_t>(this->getvector<string>(0)), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int8_t>(this->returns[0].textvalue));
919+
tensorfunc::Switch<Author, int8_t,int32_t>(mem->gettensors<int8_t>(this->getvector<string>(0)), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int8_t>(this->returns[0].textvalue));
920920
}
921921
break;
922922
case Precision::Bool:

excuter/op-mem-ompsimd/src/client/tfs.cpp

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,11 +299,11 @@ namespace deepx::tf
299299
// invert author=miaobyte
300300
tffactory.add_tf(std::make_shared<Invert<miaobyte>>(vector<Param>(
301301
{
302-
Param("A", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8),
302+
Param("A", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8|Precision::Bool),
303303
}),
304304
vector<Param>(
305305
{
306-
Param("C", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8),
306+
Param("C", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8|Precision::Bool),
307307
})));
308308
// sqrt author=miaobyte
309309
tffactory.add_tf(std::make_shared<Sqrt<miaobyte>>(vector<Param>(
@@ -364,6 +364,33 @@ namespace deepx::tf
364364
{
365365
Param("C", DataCategory::Tensor, Precision::Any),
366366
})));
367+
// sin author=miaobyte
368+
tffactory.add_tf(std::make_shared<Sin<miaobyte>>(vector<Param>(
369+
{
370+
Param("A", DataCategory::Tensor, Precision::Any),
371+
}),
372+
vector<Param>(
373+
{
374+
Param("C", DataCategory::Tensor, Precision::Any),
375+
})));
376+
// cos author=miaobyte
377+
tffactory.add_tf(std::make_shared<Cos<miaobyte>>(vector<Param>(
378+
{
379+
Param("A", DataCategory::Tensor, Precision::Any),
380+
}),
381+
vector<Param>(
382+
{
383+
Param("C", DataCategory::Tensor, Precision::Any),
384+
})));
385+
// tan author=miaobyte
386+
tffactory.add_tf(std::make_shared<Tan<miaobyte>>(vector<Param>(
387+
{
388+
Param("A", DataCategory::Tensor, Precision::Any),
389+
}),
390+
vector<Param>(
391+
{
392+
Param("C", DataCategory::Tensor, Precision::Any),
393+
})));
367394
// max author=miaobyte
368395
tffactory.add_tf(std::make_shared<Max<miaobyte>>(vector<Param>(
369396
{

0 commit comments

Comments
 (0)