Skip to content

Commit bd6e242

Browse files
committed
cuda:load,save,gather,编译通过,测试有点异常。
1 parent 7ce58e1 commit bd6e242

18 files changed

Lines changed: 630 additions & 209 deletions

File tree

deepxctl/.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
.idea
1+
.idea
2+
deepxctl

doc/excuter/op-mem-cuda/list.md

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,6 @@
99
| vecset | none | vecset(vector<any> value)->(vector<any> name) | shape = [3 4 5] | vecset(vector<any> value)->(vector<any> name) |
1010
| argset | none | argset(var<any> value)->(var<any> name) | var argname = argvalue | argset(var<any> value)->(var<any> name) |
1111

12-
### io
13-
14-
| Operation | Author | Func Def | Math Formula | IR Instruction |
15-
|-----------|--------|------------|--------------|----------------|
16-
| print | miaobyte | print(tensor<any> )->() | print(T1) | print(tensor<any> )->() |
17-
| print | miaobyte | print(tensor<any> , var<string> )->() | print(T1) | print(tensor<any> , var<string> )->() |
18-
1912
### tensorlife
2013

2114
| Operation | Author | Func Def | Math Formula | IR Instruction |
@@ -25,13 +18,22 @@
2518
| newtensor | none | newtensor(var<string> shape)->(tensor<any> tensor1) | T1 = zeros(shape) | newtensor(var<string> shape)->(tensor<any> tensor1) |
2619
| deltensor | none | deltensor(tensor<any> t)->() | del T1 | deltensor(tensor<any> t)->() |
2720

21+
### io
22+
23+
| Operation | Author | Func Def | Math Formula | IR Instruction |
24+
|-----------|--------|------------|--------------|----------------|
25+
| save | none | save(tensor<any> t, var<string> path)->() | save(T1,path) | save(tensor<any> t, var<string> path)->() |
26+
| print | miaobyte | print(tensor<any> t)->() | print(T1) | print(tensor<any> t)->() |
27+
| print | miaobyte | print(tensor<any> t, var<string> format)->() | print(T1) | print(tensor<any> t, var<string> format)->() |
28+
| load | none | load(var<string> path)->() | load(path) | load(var<string> path)->() |
29+
2830
### init
2931

3032
| Operation | Author | Func Def | Math Formula | IR Instruction |
3133
|-----------|--------|------------|--------------|----------------|
34+
| arange | miaobyte | arange(tensor<any> t, var<any> start, var<any> step)->() | arange(T1,start,step) | arange(tensor<any> t, var<any> start, var<any> step)->() |
3235
| normal | miaobyte | normal(tensor<any> t, var<any> mean, var<any> stddev, var<int32> seed)->() | normal(T1,mean,stddev,seed) | normal(tensor<any> t, var<any> mean, var<any> stddev, var<int32> seed)->() |
3336
| uniform | miaobyte | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() | uniform(T1,low,high,seed) | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() |
34-
| arange | miaobyte | arange(tensor<any> t, var<any> start, var<any> step)->() | arange(T1,start,step) | arange(tensor<any> t, var<any> start, var<any> step)->() |
3537
| constant | miaobyte | constant(tensor<any> t, var<any> value)->() | constant(T1) | constant(tensor<any> t, var<any> value)->() |
3638

3739
### elementwise
@@ -43,19 +45,21 @@
4345
| equalscalar | miaobyte | equalscalar(tensor<any> A, var<any> scalar, var<float64> epsilon)->(tensor<bool> mask) | mask=compare(T1, scalar) | equalscalar(tensor<any> A, var<any> scalar, var<float64> epsilon)->(tensor<bool> mask) |
4446
| min | miaobyte | min(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=min(T1, T2) | min(tensor<any> A, tensor<any> B)->(tensor<any> C) |
4547
| maxscalar | miaobyte | maxscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) | T3=max(T1, scalar) | maxscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
46-
| addscalar | miaobyte | addscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1+scalar | addscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
47-
| log | miaobyte | log(tensor<float64|float32|float16|bfloat16> A)->(tensor<float64|float32|float16|bfloat16> C) | T3=log(T1) | log(tensor<float64|float32|float16|bfloat16> A)->(tensor<float64|float32|float16|bfloat16> C) |
48+
| tan | miaobyte | tan(tensor<float64|float32> A)->(tensor<float64|float32> C) | T3=tan(T1) | tan(tensor<float64|float32> A)->(tensor<float64|float32> C) |
4849
| divscalar | miaobyte | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) | T3=scalar/T1 | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
4950
| sin | miaobyte | sin(tensor<float64|float32|float16|bfloat16> A)->(tensor<float64|float32|float16|bfloat16> C) | T3=sin(T1) | sin(tensor<float64|float32|float16|bfloat16> A)->(tensor<float64|float32|float16|bfloat16> C) |
50-
| tan | miaobyte | tan(tensor<float64|float32> A)->(tensor<float64|float32> C) | T3=tan(T1) | tan(tensor<float64|float32> A)->(tensor<float64|float32> C) |
5151
| add | cublas | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
5252
| add | miaobyte | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
5353
| greater | miaobyte | greater(tensor<any> A, tensor<any> B)->(tensor<bool> mask) | mask=compare(T1, T2) | greater(tensor<any> A, tensor<any> B)->(tensor<bool> mask) |
54+
| lessscalar | miaobyte | lessscalar(tensor<any> A, var<any> scalar)->(tensor<bool> mask) | mask=compare(T1, scalar) | lessscalar(tensor<any> A, var<any> scalar)->(tensor<bool> mask) |
55+
| cos | miaobyte | cos(tensor<float64|float32|float16|bfloat16> A)->(tensor<float64|float32|float16|bfloat16> C) | T3=cos(T1) | cos(tensor<float64|float32|float16|bfloat16> A)->(tensor<float64|float32|float16|bfloat16> C) |
5456
| less | miaobyte | less(tensor<any> A, tensor<any> B)->(tensor<bool> mask) | mask=compare(T1, T2) | less(tensor<any> A, tensor<any> B)->(tensor<bool> mask) |
5557
| powscalar | miaobyte | powscalar(tensor<float64|float32> A, var<float64|int32> scalar)->(tensor<float64|float32> C) | T3=pow(T1, scalar) | powscalar(tensor<float64|float32> A, var<float64|int32> scalar)->(tensor<float64|float32> C) |
5658
| minscalar | miaobyte | minscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) | T3=min(T1, scalar) | minscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
5759
| rdivscalar | miaobyte | rdivscalar(var<any> scalar, tensor<any> A)->(tensor<any> C) | T3=scalar/T1 | rdivscalar(var<any> scalar, tensor<any> A)->(tensor<any> C) |
5860
| rpowscalar | miaobyte | rpowscalar(var<float64|int32> scalar, tensor<float64|float32> A)->(tensor<float64|float32> C) | T3=pow(scalar, T1) | rpowscalar(var<float64|int32> scalar, tensor<float64|float32> A)->(tensor<float64|float32> C) |
61+
| log | miaobyte | log(tensor<float64|float32|float16|bfloat16> A)->(tensor<float64|float32|float16|bfloat16> C) | T3=log(T1) | log(tensor<float64|float32|float16|bfloat16> A)->(tensor<float64|float32|float16|bfloat16> C) |
62+
| addscalar | miaobyte | addscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1+scalar | addscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
5963
| sub | miaobyte | sub(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1-T2 | sub(tensor<any> A, tensor<any> B)->(tensor<any> C) |
6064
| sqrt | miaobyte | sqrt(tensor<float64|float32|float16|bfloat16> A)->(tensor<float64|float32|float16|bfloat16> C) | T3=sqrt(T1) | sqrt(tensor<float64|float32|float16|bfloat16> A)->(tensor<float64|float32|float16|bfloat16> C) |
6165
| subscalar | miaobyte | subscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1-scalar | subscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
@@ -67,8 +71,6 @@
6771
| pow | miaobyte | pow(tensor<float64|float32> A, tensor<float64|float32> B)->(tensor<float64|float32> C) | T3=pow(T1, T2) | pow(tensor<float64|float32> A, tensor<float64|float32> B)->(tensor<float64|float32> C) |
6872
| mul | miaobyte | mul(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1*T2 | mul(tensor<any> A, tensor<any> B)->(tensor<any> C) |
6973
| exp | miaobyte | exp(tensor<float64|float32|float16|bfloat16> A)->(tensor<float64|float32|float16|bfloat16> C) | T3=exp(T1) | exp(tensor<float64|float32|float16|bfloat16> A)->(tensor<float64|float32|float16|bfloat16> C) |
70-
| lessscalar | miaobyte | lessscalar(tensor<any> A, var<any> scalar)->(tensor<bool> mask) | mask=compare(T1, scalar) | lessscalar(tensor<any> A, var<any> scalar)->(tensor<bool> mask) |
71-
| cos | miaobyte | cos(tensor<float64|float32|float16|bfloat16> A)->(tensor<float64|float32|float16|bfloat16> C) | T3=cos(T1) | cos(tensor<float64|float32|float16|bfloat16> A)->(tensor<float64|float32|float16|bfloat16> C) |
7274

7375
### matmul
7476

@@ -80,6 +82,7 @@
8082

8183
| Operation | Author | Func Def | Math Formula | IR Instruction |
8284
|-----------|--------|------------|--------------|----------------|
85+
| gather | miaobyte | gather(tensor<any> A, tensor<int64|int32> indices, var<int32> axis)->(tensor<any> B) | T2 = T1.gather(indices=[1,2], axis=1) | gather(tensor<any> A, tensor<int64|int32> indices, var<int32> axis)->(tensor<any> B) |
8386
| broadcastTo | miaobyte | broadcastTo(tensor<any> A, vector<int32> new_shape)->(tensor<any> B) | T2 = T1.broadcastTo(new_shape=[4,3,2]) | broadcastTo(tensor<any> A, vector<int32> new_shape)->(tensor<any> B) |
8487
| concat | miaobyte | concat(listtensor<any> tensors, var<int32> dim)->(tensor<any> result) | Tresult = concat([T1, T2...], axis=3) | concat(listtensor<any> tensors, var<int32> dim)->(tensor<any> result) |
8588
| transpose | miaobyte | transpose(tensor<any> A, vector<int32> dim_order)->(tensor<any> C) | T2 = T1.transpose(dimorder=[1,0]) | transpose(tensor<any> A, vector<int32> dim_order)->(tensor<any> C) |

excuter/cpp-common/src/deepx/tensorfunc/io.hpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define DEEPX_TENSORFUNC_IO_HPP
33

44
#include "deepx/tensor.hpp"
5+
#include "stdutil/fs.hpp"
56

67
namespace deepx::tensorfunc{
78

@@ -21,7 +22,17 @@ namespace deepx::tensorfunc{
2122
template <typename T>
2223
pair<std::string,shared_ptr<Tensor<T>>> load(const std::string &path);
2324

24-
pair<std::string,Shape> loadShape(const std::string &path);
25+
inline pair<std::string,Shape> loadShape(const std::string &path)
26+
{
27+
std::string shapepath = path + ".shape";
28+
std::ifstream shape_fs(shapepath, std::ios::binary);
29+
std::string shapedata((std::istreambuf_iterator<char>(shape_fs)), std::istreambuf_iterator<char>());
30+
Shape shape;
31+
shape.fromYaml(shapedata);
32+
std::string filename = stdutil::filename(path);
33+
std::string tensor_name = filename.substr(0, filename.find_last_of('.'));
34+
return std::make_pair(tensor_name, shape);
35+
}
2536

2637
}
2738

excuter/op-mem-cuda/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,10 @@ include_directories(${CUDAToolkit_INCLUDE_DIRS})
4040
set(CMAKE_CUDA_STANDARD 17)
4141
set(CMAKE_CUDA_ARCHITECTURES 75) # 根据您的 GPU 计算能力进行调整
4242
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) #确保 CMake 能够正确识别 CUDA 文件并将其编译为目标
43-
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --ptxas-options=-v")
43+
44+
# 设置 CUDA 编译选项
45+
# 是否开启PTX 汇编展示
46+
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --ptxas-options=-v")
4447

4548
find_package(yaml-cpp REQUIRED)
4649

excuter/op-mem-cuda/src/client/tfs.cpp

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,16 +107,30 @@ namespace deepx::tf
107107
{
108108
opfactory.add_tf(std::make_shared<Print<miaobyte>>(vector<Param>(
109109
{
110-
Param("", DataCategory::Tensor, Precision::Any),
110+
Param("t", DataCategory::Tensor, Precision::Any),
111111
}),
112112
vector<Param>()));
113113

114114
opfactory.add_tf(std::make_shared<Print<miaobyte>>(vector<Param>(
115115
{
116-
Param("", DataCategory::Tensor, Precision::Any),
117-
Param("", DataCategory::Var, Precision::String),
116+
Param("t", DataCategory::Tensor, Precision::Any),
117+
Param("format", DataCategory::Var, Precision::String),
118118
}),
119119
vector<Param>()));
120+
121+
opfactory.add_tf(std::make_shared<Save>(vector<Param>(
122+
{
123+
Param("t", DataCategory::Tensor, Precision::Any),
124+
Param("path", DataCategory::Var, Precision::String),
125+
}),
126+
vector<Param>()));
127+
128+
opfactory.add_tf(std::make_shared<Load>(vector<Param>(
129+
{
130+
Param("path", DataCategory::Var, Precision::String),
131+
}),
132+
vector<Param>()));
133+
120134
}
121135

122136
// elementwise
@@ -422,7 +436,7 @@ namespace deepx::tf
422436
Param("C", DataCategory::Tensor, Precision::Any),
423437
})));
424438
}
425-
// // changeshape
439+
// changeshape
426440
void register_changeshape(TfFactory &tffactory)
427441
{
428442
// reshape
@@ -465,6 +479,17 @@ namespace deepx::tf
465479
{
466480
Param("B", DataCategory::Tensor, Precision::Any),
467481
})));
482+
// gather
483+
tffactory.add_tf(std::make_shared<Gather<miaobyte>>(vector<Param>(
484+
{
485+
Param("A", DataCategory::Tensor, Precision::Any),
486+
Param("indices", DataCategory::Tensor, Precision::Int64|Precision::Int32),
487+
Param("axis", DataCategory::Var, Precision::Int32),
488+
}),
489+
vector<Param>(
490+
{
491+
Param("B", DataCategory::Tensor, Precision::Any),
492+
})));
468493
}
469494
// reduce
470495
void register_reduce(TfFactory &tffactory)

excuter/op-mem-cuda/src/deepx/mem/mem_cuda.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ namespace deepx::mem
5252
auto ptr = mem.at(name);
5353
auto result = make_shared<Tensor<void>>();
5454
result->shape = ptr->shape;
55-
result->device = ptr->device;
55+
5656
result->deleter = nullptr;
5757
result->copyer = nullptr;
5858
result->newer = nullptr;

0 commit comments

Comments
 (0)