Skip to content

Commit fc6c377

Browse files
authored
front&excuter:联合调试mul,mulscalar,div,divscalar,rdivscalar (#10)
1 parent 510a09f commit fc6c377

13 files changed

Lines changed: 1414 additions & 68 deletions

File tree

doc/excuter/op-mem-cuda/list.md

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,23 @@
44

55
| Operation | Author | Func Def | Math Formula | IR Instruction |
66
|-----------|--------|------------|--------------|----------------|
7-
| addscalar | miaobyte | addscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1+scalar | addscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
8-
| add | cublas | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
9-
| add | miaobyte | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
10-
| uniform | miaobyte | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() | uniform(T1,low,high,seed) | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() |
11-
| subscalar | miaobyte | subscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1-scalar | subscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
12-
| arange | miaobyte | arange(tensor<any> t, var<any> start, var<any> step)->() | arange(T1,start,step) | arange(tensor<any> t, var<any> start, var<any> step)->() |
13-
| constant | miaobyte | constant(tensor<any> t, var<any> value)->() | constant(T1) | constant(tensor<any> t, var<any> value)->() |
14-
| print | miaobyte | print(tensor<any> )->() | print(T1) | print(tensor<any> )->() |
15-
| print | miaobyte | print(tensor<any> , var<string> )->() | print(T1) | print(tensor<any> , var<string> )->() |
16-
| newtensor | none | newtensor(vector<int32> shape)->(tensor<any> tensor1) | T1 = zeros(shape) | newtensor(vector<int32> shape)->(tensor<any> tensor1) |
17-
| newtensor | none | newtensor(var<string> shape)->(tensor<any> tensor1) | T1 = zeros(shape) | newtensor(var<string> shape)->(tensor<any> tensor1) |
18-
| vecset | none | vecset(vector<any> value)->(vector<any> name) | shape = [3 4 5] | vecset(vector<any> value)->(vector<any> name) |
197
| matmul | cublas | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1 @ T2 | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) |
8+
| rdivscalar | miaobyte | rdivscalar(var<any> scalar, tensor<any> A)->(tensor<any> C) | T3=scalar/T1 | rdivscalar(var<any> scalar, tensor<any> A)->(tensor<any> C) |
9+
| div | miaobyte | div(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1/T2 | div(tensor<any> A, tensor<any> B)->(tensor<any> C) |
2010
| sub | miaobyte | sub(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1-T2 | sub(tensor<any> A, tensor<any> B)->(tensor<any> C) |
2111
| argset | none | argset(var<any> value)->(var<any> name) | var argname = argvalue | argset(var<any> value)->(var<any> name) |
12+
| mulscalar | miaobyte | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1*scalar | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
13+
| vecset | none | vecset(vector<any> value)->(vector<any> name) | shape = [3 4 5] | vecset(vector<any> value)->(vector<any> name) |
14+
| newtensor | none | newtensor(vector<int32> shape)->(tensor<any> tensor1) | T1 = zeros(shape) | newtensor(vector<int32> shape)->(tensor<any> tensor1) |
15+
| newtensor | none | newtensor(var<string> shape)->(tensor<any> tensor1) | T1 = zeros(shape) | newtensor(var<string> shape)->(tensor<any> tensor1) |
16+
| print | miaobyte | print(tensor<any> )->() | print(T1) | print(tensor<any> )->() |
17+
| print | miaobyte | print(tensor<any> , var<string> )->() | print(T1) | print(tensor<any> , var<string> )->() |
18+
| divscalar | miaobyte | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) | T3=scalar/T1 | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
19+
| constant | miaobyte | constant(tensor<any> t, var<any> value)->() | constant(T1) | constant(tensor<any> t, var<any> value)->() |
20+
| arange | miaobyte | arange(tensor<any> t, var<any> start, var<any> step)->() | arange(T1,start,step) | arange(tensor<any> t, var<any> start, var<any> step)->() |
21+
| subscalar | miaobyte | subscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1-scalar | subscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
22+
| uniform | miaobyte | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() | uniform(T1,low,high,seed) | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() |
23+
| add | cublas | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
24+
| add | miaobyte | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
25+
| addscalar | miaobyte | addscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1+scalar | addscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
26+
| mul | miaobyte | mul(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1*T2 | mul(tensor<any> A, tensor<any> B)->(tensor<any> C) |

doc/excuter/op-mem-ompsimd/list.md

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,24 @@
55
| Operation | Author | Func Def | Math Formula | IR Instruction |
66
|-----------|--------|------------|--------------|----------------|
77
| concat | none | concat()->() | Tresult = concat([T1, T2...], axis=3) | concat()->() |
8-
| addscalar | miaobyte | addscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) | T3=T1+scalar | addscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) |
9-
| add | cblas | add(tensor<float64|float32> a, tensor<float64|float32> b)->(tensor<float64|float32> c) | T3=T1+T2 | add(tensor<float64|float32> a, tensor<float64|float32> b)->(tensor<float64|float32> c) |
10-
| add | miaobyte | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
11-
| uniform | miaobyte | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() | uniform(T1,low,high,seed) | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() |
12-
| subscalar | miaobyte | subscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) | T3=T1-scalar | subscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) |
13-
| arange | miaobyte | arange(tensor<any> t, var<any> start, var<any> step)->() | arange(T1,start,step) | arange(tensor<any> t, var<any> start, var<any> step)->() |
14-
| constant | miaobyte | constant(tensor<any> t, var<any> value)->() | constant(T1,value) | constant(tensor<any> t, var<any> value)->() |
15-
| print | miaobyte | print(tensor<any> )->() | print(T1) | print(tensor<any> )->() |
16-
| print | miaobyte | print(tensor<any> , var<string> )->() | print(T1) | print(tensor<any> , var<string> )->() |
17-
| newtensor | none | newtensor(vector<int32> shape)->(tensor<any> tensor1) | T1 =Tensor(shape=[...]) | newtensor(vector<int32> shape)->(tensor<any> tensor1) |
18-
| newtensor | none | newtensor(var<string> shape)->(tensor<any> tensor1) | T1 =Tensor(shape=[...]) | newtensor(var<string> shape)->(tensor<any> tensor1) |
19-
| vecset | none | vecset(vector<any> value)->(vector<any> name) | shape = [3 4 5] | vecset(vector<any> value)->(vector<any> name) |
208
| matmul | cblas | matmul(tensor<float64|float32> A, tensor<float64|float32> B)->(tensor<float64|float32> C) | T3=T1 @ T2 | matmul(tensor<float64|float32> A, tensor<float64|float32> B)->(tensor<float64|float32> C) |
219
| matmul | miaobyte | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1 @ T2 | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) |
10+
| rdivscalar | miaobyte | rdivscalar(var<any> scalar, tensor<any> A)->(tensor<any> C) | T3=scalar/T1 | rdivscalar(var<any> scalar, tensor<any> A)->(tensor<any> C) |
11+
| div | miaobyte | div(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1/T2 | div(tensor<any> A, tensor<any> B)->(tensor<any> C) |
2212
| sub | miaobyte | sub(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1-T2 | sub(tensor<any> a, tensor<any> b)->(tensor<any> c) |
2313
| argset | none | argset(var<any> value)->(var<any> name) | var argname = argvalue | argset(var<any> value)->(var<any> name) |
14+
| mulscalar | miaobyte | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1*scalar | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
15+
| vecset | none | vecset(vector<any> value)->(vector<any> name) | shape = [3 4 5] | vecset(vector<any> value)->(vector<any> name) |
16+
| newtensor | none | newtensor(vector<int32> shape)->(tensor<any> tensor1) | T1 =Tensor(shape=[...]) | newtensor(vector<int32> shape)->(tensor<any> tensor1) |
17+
| newtensor | none | newtensor(var<string> shape)->(tensor<any> tensor1) | T1 =Tensor(shape=[...]) | newtensor(var<string> shape)->(tensor<any> tensor1) |
18+
| print | miaobyte | print(tensor<any> )->() | print(T1) | print(tensor<any> )->() |
19+
| print | miaobyte | print(tensor<any> , var<string> )->() | print(T1) | print(tensor<any> , var<string> )->() |
20+
| divscalar | miaobyte | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) | T3=T1/scalar | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
21+
| constant | miaobyte | constant(tensor<any> t, var<any> value)->() | constant(T1,value) | constant(tensor<any> t, var<any> value)->() |
22+
| arange | miaobyte | arange(tensor<any> t, var<any> start, var<any> step)->() | arange(T1,start,step) | arange(tensor<any> t, var<any> start, var<any> step)->() |
23+
| subscalar | miaobyte | subscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) | T3=T1-scalar | subscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) |
24+
| uniform | miaobyte | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() | uniform(T1,low,high,seed) | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() |
25+
| add | cblas | add(tensor<float64|float32> a, tensor<float64|float32> b)->(tensor<float64|float32> c) | T3=T1+T2 | add(tensor<float64|float32> a, tensor<float64|float32> b)->(tensor<float64|float32> c) |
26+
| add | miaobyte | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
27+
| addscalar | miaobyte | addscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) | T3=T1+scalar | addscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) |
28+
| mul | miaobyte | mul(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1*T2 | mul(tensor<any> A, tensor<any> B)->(tensor<any> C) |

excuter/cpp-common/src/deepx/tensorfunc/elementwise.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,13 +150,13 @@ namespace deepx::tensorfunc
150150
template <typename Author, typename T>
151151
struct rdivscalarDispatcher
152152
{
153-
static void rdivscalar(const Tensor<T> &input, const T value, Tensor<T> &output) = delete;
153+
static void rdivscalar(const T value, const Tensor<T> &input, Tensor<T> &output) = delete;
154154
};
155155

156156
template <typename Author, typename T>
157-
void rdivscalar(const Tensor<T> &input, const T value, Tensor<T> &output)
157+
void rdivscalar(const T value, const Tensor<T> &input, Tensor<T> &output)
158158
{
159-
rdivscalarDispatcher<Author, T>::rdivscalar(input, value, output);
159+
rdivscalarDispatcher<Author, T>::rdivscalar(value, input, output);
160160
}
161161

162162
template <typename Author, typename T>

excuter/op-mem-cuda/src/client/tfs.cpp

Lines changed: 46 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -143,24 +143,52 @@ namespace deepx::tf
143143
{
144144
Param("C", DataCategory::Tensor, Precision::Any),
145145
})));
146-
147-
// opfactory.add_op(Sub_cblas<float>());
148-
// opfactory.add_op(Sub_cblas<double>());
149-
150-
// opfactory.add_op(Mul_miaobyte<float>());
151-
// opfactory.add_op(Mul_miaobyte<double>());
152-
153-
// opfactory.add_op(Mulscalar_miaobyte<float>());
154-
// opfactory.add_op(Mulscalar_miaobyte<double>());
155-
156-
// opfactory.add_op(Div_miaobyte<float>());
157-
// opfactory.add_op(Div_miaobyte<double>());
158-
159-
// opfactory.add_op(Divscalar_miaobyte<float>());
160-
// opfactory.add_op(Divscalar_miaobyte<double>());
161-
162-
// opfactory.add_op(RDivscalar_miaobyte<float>());
163-
// opfactory.add_op(RDivscalar_miaobyte<double>());
146+
tffactory.add_tf(std::make_shared<Mul<miaobyte>>(vector<Param>(
147+
{
148+
Param("A", DataCategory::Tensor, Precision::Any),
149+
Param("B", DataCategory::Tensor, Precision::Any),
150+
}),
151+
vector<Param>(
152+
{
153+
Param("C", DataCategory::Tensor, Precision::Any),
154+
})));
155+
tffactory.add_tf(std::make_shared<MulScalar<miaobyte>>(vector<Param>(
156+
{
157+
Param("A", DataCategory::Tensor, Precision::Any),
158+
Param("b", DataCategory::Var, Precision::Any),
159+
}),
160+
vector<Param>(
161+
{
162+
Param("C", DataCategory::Tensor, Precision::Any),
163+
})));
164+
tffactory.add_tf(std::make_shared<Div<miaobyte>>(vector<Param>(
165+
{
166+
Param("A", DataCategory::Tensor, Precision::Any),
167+
Param("B", DataCategory::Tensor, Precision::Any),
168+
}),
169+
vector<Param>(
170+
{
171+
Param("C", DataCategory::Tensor, Precision::Any),
172+
})));
173+
tffactory.add_tf(std::make_shared<DivScalar<miaobyte>>(vector<Param>(
174+
{
175+
Param("A", DataCategory::Tensor, Precision::Any),
176+
Param("scalar", DataCategory::Var, Precision::Any),
177+
}),
178+
vector<Param>(
179+
{
180+
Param("C", DataCategory::Tensor, Precision::Any),
181+
})));
182+
tffactory.add_tf(std::make_shared<RDivScalar<miaobyte>>(vector<Param>(
183+
{
184+
Param("scalar", DataCategory::Var, Precision::Any),
185+
Param("A", DataCategory::Tensor, Precision::Any),
186+
}),
187+
vector<Param>(
188+
{
189+
Param("C", DataCategory::Tensor, Precision::Any),
190+
})));
191+
164192

165193
// opfactory.add_op(Sqrt_miaobyte<float>());
166194
// opfactory.add_op(Sqrt_miaobyte<double>());

0 commit comments

Comments
 (0)