Skip to content

Commit f815237

Browse files
committed
tf:上移动functional层,保留tf,细化参数类型
1 parent ce862dc commit f815237

45 files changed

Lines changed: 1884 additions & 1228 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.cursorrules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ Always respond in 中文
1111
关于概念
1212
deepx.Tensor仅仅就是一个tensor,不像pytorch的tensor,一个tensor其实包含了自身和梯度2个tensor的数据
1313

14+
关于任何编程语言
15+
注重设计函数时,通过多级的子函数,实现层级模块化分解
16+
1417
关于c++
1518
我的环境为ubuntu22,项目是c++17,使用cmake编译,
1619
返回c++代码区分header和source文件

doc/excuter/deepx.op.drawio

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<mxfile host="app.diagrams.net" agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36" version="26.1.0">
1+
<mxfile host="app.diagrams.net" agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36" version="26.1.0" pages="2">
22
<diagram name="第 1 页" id="Vvg-Fo-2kplNZ5rFatkM">
33
<mxGraphModel dx="2191" dy="993" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
44
<root>
@@ -78,4 +78,60 @@
7878
</root>
7979
</mxGraphModel>
8080
</diagram>
81+
<diagram id="z9uejpskYJko357ewfTc" name="第 2 页">
82+
<mxGraphModel dx="1364" dy="2162" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
83+
<root>
84+
<mxCell id="0" />
85+
<mxCell id="1" parent="0" />
86+
<mxCell id="JBEWLCwWRuB5Uu3qIstv-14" value="" style="group" vertex="1" connectable="0" parent="1">
87+
<mxGeometry x="90" y="180" width="160" as="geometry" />
88+
</mxCell>
89+
<mxCell id="JBEWLCwWRuB5Uu3qIstv-10" value="" style="rounded=1;whiteSpace=wrap;html=1;arcSize=8;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-14">
90+
<mxGeometry x="20" y="30" width="350" height="400" as="geometry" />
91+
</mxCell>
92+
<mxCell id="JBEWLCwWRuB5Uu3qIstv-13" value="process&lt;br&gt;excuter-cpu" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-14">
93+
<mxGeometry width="80" height="80" as="geometry" />
94+
</mxCell>
95+
<mxCell id="JBEWLCwWRuB5Uu3qIstv-1" value="tensorfunc" style="swimlane;childLayout=stackLayout;horizontal=1;startSize=50;horizontalStack=0;rounded=1;fontSize=14;fontStyle=0;strokeWidth=2;resizeParent=0;resizeLast=1;shadow=0;dashed=0;align=center;arcSize=4;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1">
96+
<mxGeometry x="240" y="470" width="160" height="120" as="geometry" />
97+
</mxCell>
98+
<mxCell id="JBEWLCwWRuB5Uu3qIstv-2" value="+A&lt;br&gt;+B&lt;br&gt;+C" style="align=left;strokeColor=none;fillColor=none;spacingLeft=4;spacingRight=4;fontSize=12;verticalAlign=top;resizable=0;rotatable=0;part=1;html=1;whiteSpace=wrap;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-1">
99+
<mxGeometry y="50" width="160" height="70" as="geometry" />
100+
</mxCell>
101+
<mxCell id="JBEWLCwWRuB5Uu3qIstv-7" value="TF" style="swimlane;childLayout=stackLayout;horizontal=1;startSize=50;horizontalStack=0;rounded=1;fontSize=14;fontStyle=0;strokeWidth=2;resizeParent=0;resizeLast=1;shadow=0;dashed=0;align=center;arcSize=4;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1">
102+
<mxGeometry x="240" y="250" width="160" height="120" as="geometry" />
103+
</mxCell>
104+
<mxCell id="JBEWLCwWRuB5Uu3qIstv-8" value="+inputs&lt;br&gt;+returns" style="align=left;strokeColor=none;fillColor=none;spacingLeft=4;spacingRight=4;fontSize=12;verticalAlign=top;resizable=0;rotatable=0;part=1;html=1;whiteSpace=wrap;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-7">
105+
<mxGeometry y="50" width="160" height="70" as="geometry" />
106+
</mxCell>
107+
<mxCell id="JBEWLCwWRuB5Uu3qIstv-9" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="1" source="JBEWLCwWRuB5Uu3qIstv-8" target="JBEWLCwWRuB5Uu3qIstv-1">
108+
<mxGeometry relative="1" as="geometry" />
109+
</mxCell>
110+
<mxCell id="JBEWLCwWRuB5Uu3qIstv-19" value="" style="group" vertex="1" connectable="0" parent="1">
111+
<mxGeometry x="55" y="-540" width="530" height="560" as="geometry" />
112+
</mxCell>
113+
<mxCell id="JBEWLCwWRuB5Uu3qIstv-20" value="" style="rounded=1;whiteSpace=wrap;html=1;arcSize=8;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-19">
114+
<mxGeometry x="28.64864864864865" y="78.13953488372093" width="501.35135135135135" height="468.83720930232556" as="geometry" />
115+
</mxCell>
116+
<mxCell id="JBEWLCwWRuB5Uu3qIstv-21" value="&lt;div&gt;process&lt;/div&gt;front-py&lt;div&gt;&lt;br/&gt;&lt;/div&gt;" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-19">
117+
<mxGeometry width="84.32432432432432" height="84.32432432432432" as="geometry" />
118+
</mxCell>
119+
<mxCell id="JBEWLCwWRuB5Uu3qIstv-22" value="TF" style="swimlane;childLayout=stackLayout;horizontal=1;startSize=50;horizontalStack=0;rounded=1;fontSize=14;fontStyle=0;strokeWidth=2;resizeParent=0;resizeLast=1;shadow=0;dashed=0;align=center;arcSize=4;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-19">
120+
<mxGeometry x="192.97179487179488" y="430" width="212" height="90.93" as="geometry" />
121+
</mxCell>
122+
<mxCell id="JBEWLCwWRuB5Uu3qIstv-23" value="+inputs&lt;br&gt;+returns" style="align=left;strokeColor=none;fillColor=none;spacingLeft=4;spacingRight=4;fontSize=12;verticalAlign=top;resizable=0;rotatable=0;part=1;html=1;whiteSpace=wrap;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-22">
123+
<mxGeometry y="50" width="212" height="40.93000000000001" as="geometry" />
124+
</mxCell>
125+
<mxCell id="JBEWLCwWRuB5Uu3qIstv-27" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="JBEWLCwWRuB5Uu3qIstv-19" source="JBEWLCwWRuB5Uu3qIstv-24" target="JBEWLCwWRuB5Uu3qIstv-22">
126+
<mxGeometry relative="1" as="geometry" />
127+
</mxCell>
128+
<mxCell id="JBEWLCwWRuB5Uu3qIstv-24" value="Function" style="swimlane;childLayout=stackLayout;horizontal=1;startSize=50;horizontalStack=0;rounded=1;fontSize=14;fontStyle=0;strokeWidth=2;resizeParent=0;resizeLast=1;shadow=0;dashed=0;align=center;arcSize=4;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-19">
129+
<mxGeometry x="149.4871794871795" y="236.74" width="298.974358974359" height="103.26" as="geometry" />
130+
</mxCell>
131+
<mxCell id="JBEWLCwWRuB5Uu3qIstv-25" value="&lt;div&gt;&lt;span style=&quot;background-color: transparent; color: light-dark(rgb(0, 0, 0), rgb(255, 255, 255));&quot;&gt;+forward(args,returns)&lt;/span&gt;&lt;/div&gt;+backward(args,args_grad,returns,returns_grad)" style="align=left;strokeColor=none;fillColor=none;spacingLeft=4;spacingRight=4;fontSize=12;verticalAlign=top;resizable=0;rotatable=0;part=1;html=1;whiteSpace=wrap;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-24">
132+
<mxGeometry y="50" width="298.974358974359" height="53.260000000000005" as="geometry" />
133+
</mxCell>
134+
</root>
135+
</mxGraphModel>
136+
</diagram>
81137
</mxfile>

doc/excuter/mix_precision.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# mix precision
2+
3+
## 1. 什么是 mix precision
4+
5+
mix precision 是一种混合精度训练方法,它使用 16 位浮点数和 8 位整数来训练模型,从而在保持模型精度的同时,减少显存占用和计算时间。
6+
7+
## 2. 为什么需要 mix precision
8+
9+
在深度学习中,模型通常使用 32 位浮点数进行训练,这样可以确保模型的精度。但是,32 位浮点数占用的显存较大,计算时间较长。因此,为了减少显存占用和计算时间,可以使用 mix precision 训练方法。
10+
11+
## 3. 关于excuter的mix precision的实现
12+
13+
如:
14+
15+
matmul(A[float16],B[float16])->C[float32] //author=miaobyte id=1 create_time=1714512000 send_time=1714512000
16+
17+
我们在opfactory中,把实际参数用占位符替换,注册为
18+
19+
matmul[authora] Tensor@float16 Tensor@float16 -> Tensor@float32
20+
21+
如:
22+
23+
matmul[authora] A@float16 b@float16 -> C@float32
24+
25+
同样,在opfactory中,把实际参数用占位符替换,注册为
26+
27+
muladd[authora] Tensor@float16 Scalar@float32-> Tensor@float16
28+
29+
30+
31+
32+
33+
34+
35+
36+
37+

doc/excuter/op-mem-ompsimd/list.md

Lines changed: 11 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,15 @@
11
## excuter/op-mem-ompsimd 支持算子列表
22

3-
本页面由 `excuter/op-mem-ompsimd/src/deepx/op/opfactory.hpp` 生成,请勿手动修改
3+
本页面由 `excuter/op-mem-ompsimd/src/deepx/tf/tffactory.hpp` 生成,请勿手动修改
44

5-
| Operation | Author | Data Types | Math Formula | IR Instruction |
5+
| Operation | Author | Func Def | Math Formula | IR Instruction |
66
|-----------|--------|------------|--------------|----------------|
7-
| divscalar | miaobyte | float32, float64 | T2 = T1 / 2.0 | divscalar@float32 T1 2.0 -> T2 |
8-
| addscalar | miaobyte | float32, float64 | T2 = T1 + 1.0 | addscalar@float32 T1 1.0 -> T2 |
9-
| uniform | | float32, float64 | uniform(-1.0, 1.0,T1) | uniform@float32 -1.0 1.0 -> T1 |
10-
| deltensor | | any | del T1 | deltensor@any T1 -> |
11-
| minscalar | | float32, float64 | B= min(A, 1.0) | minscalar@float32 A 1.0 -> B |
12-
| rdivscalar | miaobyte | float32, float64 | T3 =1 / T2 | rdivscalar@float32 1 T2 -> T3 |
13-
| constant | | float32, float64 | T1 = full(shape, 0.0) | constant@float32 0.0 -> T1 |
14-
| powscalar | miaobyte | float32, float64 | T2 = T1 ^ 2.0 | powscalar@float32 T1 2.0 -> T2 |
15-
| sub | cblas | float32, float64 | T3 = T1 - T2 | sub@int32 T1 T2 -> T3 |
16-
| sub | miaobyte | float32, float64 | T3 = T1 - T2 | sub@int32 T1 T2 -> T3 |
17-
| sum | | float32, float64 | T2 = sum(T1, dims=[1,2]) | sum@float32 T1 1 2 -> T2 |
18-
| argset | | float32, float64, int32 | shape = [3, 4, 5] | argset@int32 3 4 5 -> shape |
19-
| arange | | float32, float64 | arange(start=0.0, step=1.0,T1) | arange@float32 0.0 1.0 -> T1 |
20-
| transpose | | any | T2 = transpose(T1, dimorder=[1,0]) | transpose@float32 T1 1 0 -> T2 |
21-
| clonetensor | | float32, float64, int16, int32, int64, int8 | T2 = T1.clone() | clonetensor@float32 T1 -> T2 |
22-
| add | cblas | float32, float64 | T3 = T1 + T2 | add@int32 T1 T2 -> T3 |
23-
| add | miaobyte | float32, float64, int16, int32, int64, int8 | T3 = T1 + T2 | add@int32 T1 T2 -> T3 |
24-
| copytensor | | float32, float64, int16, int32, int64, int8 | T2 = T1.copy() | copytensor@float32 T1 -> T2 |
25-
| min | | float32, float64 | C = min(A,B) | min@float32 A B -> C |
26-
| print | | any | | print@any -> |
27-
| newtensor | | float32, float64, int16, int32, int64, int8 | T1 = zeros(shape) | newtensor@float32 shape -> T1 |
28-
| mulscalar | miaobyte | float32, float64 | T2 = T1 * 2.0 | mulscalar@float32 T1 2.0 -> T2 |
29-
| div | miaobyte | float32, float64 | T3 = T1 / T2 | div_miaobyte@float32 T1 T2 -> T3 |
30-
| sqrt | miaobyte | float32, float64 | T2 = sqrt(T1) | sqrt@float32 T1 -> T2 |
31-
| mul | miaobyte | float32, float64 | T3 = T1 * T2 | mul@float32 T1 T2 -> T3 |
32-
| exp | miaobyte | float32, float64 | T2 = exp(T1) | exp@float32 T1 -> T2 |
33-
| max | | float32, float64 | T3 = max(T1,T2) | max@float32 T1 -> T2 |
34-
| pow | miaobyte | float32, float64 | T3 = T1 ^ T2 | pow@float32 T1 T2 -> T3 |
35-
| maxscalar | | float32, float64 | T2 = max(T1, 0.0) | maxscalar@float32 T1 0.0 -> T2 |
36-
| matmul | | float32, float64 | T3 = T1 @ T2 | matmul@float32 T1 T2 -> T3 |
37-
| reshape | | any | T2 = reshape(T1, [2,3,4]) | reshape@float32 T1 2 3 4 -> T2 |
38-
| expand | | any | T2 = expand(T1, axis=[4,6,12]) | expand@float32 T1 4 6 12 -> T2 |
39-
| concat | | float32 | T3 = concat([T1, T2], axis=3) | concat@float32 T1 T2 3 -> T3 |
7+
| argset | none | (arg)->(double) | shape = [3 4 5] | argset(arg )->(double d1) |
8+
| argset | none | (arg)->(float) | shape = [3 4 5] | argset(arg )->(float f1) |
9+
| argset | none | (args)->(int32) | shape = [3 4 5] | argset(args )->(int32 shape) |
10+
| newtensor | none | (shape)->(double) | T1 = zeros(shape) | newtensor(shape )->(double tensor) |
11+
| newtensor | none | (shape)->(float) | T1 = zeros(shape) | newtensor(shape )->(float tensor) |
12+
| newtensor | none | (shape)->(int64) | T1 = zeros(shape) | newtensor(shape )->(int64 tensor) |
13+
| newtensor | none | (shape)->(int32) | T1 = zeros(shape) | newtensor(shape )->(int32 tensor) |
14+
| newtensor | none | (shape)->(int16) | T1 = zeros(shape) | newtensor(shape )->(int16 tensor) |
15+
| newtensor | none | (shape)->(int8) | T1 = zeros(shape) | newtensor(shape )->(int8 tensor) |

excuter/common/CMakeLists.txt

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,23 @@ include_directories(src)
1515
# 源文件
1616

1717
file(GLOB_RECURSE DEEPX_COMMON_SOURCES "src/*.cpp")
18-
19-
18+
2019
add_library(deepx_common SHARED
2120
${DEEPX_COMMON_SOURCES}
2221
)
2322

23+
24+
find_package(yaml-cpp REQUIRED)
25+
26+
target_link_libraries(deepx_common
27+
PUBLIC
28+
yaml-cpp
29+
)
30+
2431
target_include_directories(deepx_common PUBLIC
2532
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>
2633
$<INSTALL_INTERFACE:include>
27-
)
34+
)
35+
36+
37+
add_subdirectory(test)

excuter/common/src/client/udpserver.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ namespace client
1616
close(sockfd);
1717
}
1818
}
19-
void udpserver::start(queue<deepx::op::Op> &queue)
19+
void udpserver::start(queue<deepx::tf::TF> &queue)
2020
{
2121
// 创建UDP套接字
2222
if ((sockfd = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
@@ -49,10 +49,10 @@ namespace client
4949
string line;
5050
while (getline(ss, line)) {
5151
if (!line.empty()) {
52-
deepx::op::Op op;
53-
op.recv_at = chrono::system_clock::now();
54-
op.load(line);
55-
queue.push(op);
52+
deepx::tf::TF tf;
53+
tf.recv_at = chrono::system_clock::now();
54+
tf.parse(line,true);
55+
queue.push(tf);
5656
}
5757
}
5858
}

excuter/common/src/client/udpserver.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#include <sys/un.h>
88
#include <unistd.h>
99
#include <functional>
10-
#include "deepx/op/op.hpp"
10+
#include "deepx/tf/tf.hpp"
1111
#include <queue>
1212

1313
namespace client{
@@ -24,7 +24,7 @@ namespace client{
2424
public:
2525
udpserver(int port);
2626
~udpserver();
27-
void start(queue<deepx::op::Op> &tasks);
27+
void start(queue<deepx::tf::TF> &tasks);
2828
using handlefunc = std::function<void(const char *buffer)>;
2929
handlefunc func;
3030
void resp(string str);

0 commit comments

Comments
 (0)