array2d
diff --git a/‎.cursorrules‎
Lines changed: 3 additions & 0 deletions b/‎.cursorrules‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎doc/excuter/deepx.op.drawio‎
Lines changed: 57 additions & 1 deletion b/‎doc/excuter/deepx.op.drawio‎
Lines changed: 57 additions & 1 deletion
diff --git a/‎doc/excuter/mix_precision.md‎
Lines changed: 37 additions & 0 deletions b/‎doc/excuter/mix_precision.md‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎doc/excuter/op-mem-ompsimd/list.md‎
Lines changed: 11 additions & 35 deletions b/‎doc/excuter/op-mem-ompsimd/list.md‎
Lines changed: 11 additions & 35 deletions
diff --git a/‎excuter/common/CMakeLists.txt‎
Lines changed: 13 additions & 3 deletions b/‎excuter/common/CMakeLists.txt‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎excuter/common/src/client/udpserver.cpp‎
Lines changed: 5 additions & 5 deletions b/‎excuter/common/src/client/udpserver.cpp‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎excuter/common/src/client/udpserver.hpp‎
Lines changed: 2 additions & 2 deletions b/‎excuter/common/src/client/udpserver.hpp‎
Lines changed: 2 additions & 2 deletions
@@ -11,6 +11,9 @@ Always respond in 中文
 关于概念
 deepx.Tensor仅仅就是一个tensor，不像pytorch的tensor，一个tensor其实包含了自身和梯度2个tensor的数据
 
+关于任何编程语言
+注重设计函数时，通过多级的子函数，实现层级模块化分解
+
 关于c++
 我的环境为ubuntu22,项目是c++17,使用cmake编译,
 返回c++代码区分header和source文件
 
@@ -1,4 +1,4 @@
-<mxfile host="app.diagrams.net" agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36" version="26.1.0">
+<mxfile host="app.diagrams.net" agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36" version="26.1.0" pages="2">
   <diagram name="第 1 页" id="Vvg-Fo-2kplNZ5rFatkM">
     <mxGraphModel dx="2191" dy="993" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
       <root>
@@ -78,4 +78,60 @@
       </root>
     </mxGraphModel>
   </diagram>
+  <diagram id="z9uejpskYJko357ewfTc" name="第 2 页">
+    <mxGraphModel dx="1364" dy="2162" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
+      <root>
+        <mxCell id="0" />
+        <mxCell id="1" parent="0" />
+        <mxCell id="JBEWLCwWRuB5Uu3qIstv-14" value="" style="group" vertex="1" connectable="0" parent="1">
+          <mxGeometry x="90" y="180" width="160" as="geometry" />
+        </mxCell>
+        <mxCell id="JBEWLCwWRuB5Uu3qIstv-10" value="" style="rounded=1;whiteSpace=wrap;html=1;arcSize=8;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-14">
+          <mxGeometry x="20" y="30" width="350" height="400" as="geometry" />
+        </mxCell>
+        <mxCell id="JBEWLCwWRuB5Uu3qIstv-13" value="process&lt;br&gt;excuter-cpu" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-14">
+          <mxGeometry width="80" height="80" as="geometry" />
+        </mxCell>
+        <mxCell id="JBEWLCwWRuB5Uu3qIstv-1" value="tensorfunc" style="swimlane;childLayout=stackLayout;horizontal=1;startSize=50;horizontalStack=0;rounded=1;fontSize=14;fontStyle=0;strokeWidth=2;resizeParent=0;resizeLast=1;shadow=0;dashed=0;align=center;arcSize=4;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1">
+          <mxGeometry x="240" y="470" width="160" height="120" as="geometry" />
+        </mxCell>
+        <mxCell id="JBEWLCwWRuB5Uu3qIstv-2" value="+A&lt;br&gt;+B&lt;br&gt;+C" style="align=left;strokeColor=none;fillColor=none;spacingLeft=4;spacingRight=4;fontSize=12;verticalAlign=top;resizable=0;rotatable=0;part=1;html=1;whiteSpace=wrap;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-1">
+          <mxGeometry y="50" width="160" height="70" as="geometry" />
+        </mxCell>
+        <mxCell id="JBEWLCwWRuB5Uu3qIstv-7" value="TF" style="swimlane;childLayout=stackLayout;horizontal=1;startSize=50;horizontalStack=0;rounded=1;fontSize=14;fontStyle=0;strokeWidth=2;resizeParent=0;resizeLast=1;shadow=0;dashed=0;align=center;arcSize=4;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1">
+          <mxGeometry x="240" y="250" width="160" height="120" as="geometry" />
+        </mxCell>
+        <mxCell id="JBEWLCwWRuB5Uu3qIstv-8" value="+inputs&lt;br&gt;+returns" style="align=left;strokeColor=none;fillColor=none;spacingLeft=4;spacingRight=4;fontSize=12;verticalAlign=top;resizable=0;rotatable=0;part=1;html=1;whiteSpace=wrap;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-7">
+          <mxGeometry y="50" width="160" height="70" as="geometry" />
+        </mxCell>
+        <mxCell id="JBEWLCwWRuB5Uu3qIstv-9" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="1" source="JBEWLCwWRuB5Uu3qIstv-8" target="JBEWLCwWRuB5Uu3qIstv-1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="JBEWLCwWRuB5Uu3qIstv-19" value="" style="group" vertex="1" connectable="0" parent="1">
+          <mxGeometry x="55" y="-540" width="530" height="560" as="geometry" />
+        </mxCell>
+        <mxCell id="JBEWLCwWRuB5Uu3qIstv-20" value="" style="rounded=1;whiteSpace=wrap;html=1;arcSize=8;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-19">
+          <mxGeometry x="28.64864864864865" y="78.13953488372093" width="501.35135135135135" height="468.83720930232556" as="geometry" />
+        </mxCell>
+        <mxCell id="JBEWLCwWRuB5Uu3qIstv-21" value="&lt;div&gt;process&lt;/div&gt;front-py&lt;div&gt;&lt;br/&gt;&lt;/div&gt;" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-19">
+          <mxGeometry width="84.32432432432432" height="84.32432432432432" as="geometry" />
+        </mxCell>
+        <mxCell id="JBEWLCwWRuB5Uu3qIstv-22" value="TF" style="swimlane;childLayout=stackLayout;horizontal=1;startSize=50;horizontalStack=0;rounded=1;fontSize=14;fontStyle=0;strokeWidth=2;resizeParent=0;resizeLast=1;shadow=0;dashed=0;align=center;arcSize=4;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-19">
+          <mxGeometry x="192.97179487179488" y="430" width="212" height="90.93" as="geometry" />
+        </mxCell>
+        <mxCell id="JBEWLCwWRuB5Uu3qIstv-23" value="+inputs&lt;br&gt;+returns" style="align=left;strokeColor=none;fillColor=none;spacingLeft=4;spacingRight=4;fontSize=12;verticalAlign=top;resizable=0;rotatable=0;part=1;html=1;whiteSpace=wrap;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-22">
+          <mxGeometry y="50" width="212" height="40.93000000000001" as="geometry" />
+        </mxCell>
+        <mxCell id="JBEWLCwWRuB5Uu3qIstv-27" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="JBEWLCwWRuB5Uu3qIstv-19" source="JBEWLCwWRuB5Uu3qIstv-24" target="JBEWLCwWRuB5Uu3qIstv-22">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="JBEWLCwWRuB5Uu3qIstv-24" value="Function" style="swimlane;childLayout=stackLayout;horizontal=1;startSize=50;horizontalStack=0;rounded=1;fontSize=14;fontStyle=0;strokeWidth=2;resizeParent=0;resizeLast=1;shadow=0;dashed=0;align=center;arcSize=4;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-19">
+          <mxGeometry x="149.4871794871795" y="236.74" width="298.974358974359" height="103.26" as="geometry" />
+        </mxCell>
+        <mxCell id="JBEWLCwWRuB5Uu3qIstv-25" value="&lt;div&gt;&lt;span style=&quot;background-color: transparent; color: light-dark(rgb(0, 0, 0), rgb(255, 255, 255));&quot;&gt;+forward(args,returns)&lt;/span&gt;&lt;/div&gt;+backward(args,args_grad,returns,returns_grad)" style="align=left;strokeColor=none;fillColor=none;spacingLeft=4;spacingRight=4;fontSize=12;verticalAlign=top;resizable=0;rotatable=0;part=1;html=1;whiteSpace=wrap;" vertex="1" parent="JBEWLCwWRuB5Uu3qIstv-24">
+          <mxGeometry y="50" width="298.974358974359" height="53.260000000000005" as="geometry" />
+        </mxCell>
+      </root>
+    </mxGraphModel>
+  </diagram>
 </mxfile>
@@ -0,0 +1,37 @@
+# mix precision
+
+## 1. 什么是 mix precision
+
+mix precision 是一种混合精度训练方法，它使用 16 位浮点数和 8 位整数来训练模型，从而在保持模型精度的同时，减少显存占用和计算时间。
+
+## 2. 为什么需要 mix precision
+
+在深度学习中，模型通常使用 32 位浮点数进行训练，这样可以确保模型的精度。但是，32 位浮点数占用的显存较大，计算时间较长。因此，为了减少显存占用和计算时间，可以使用 mix precision 训练方法。
+
+## 3. 关于excuter的mix precision的实现
+
+如：
+
+matmul(A[float16],B[float16])->C[float32] //author=miaobyte id=1 create_time=1714512000 send_time=1714512000
+
+我们在opfactory中,把实际参数用占位符替换，注册为
+
+matmul[authora] Tensor@float16 Tensor@float16 -> Tensor@float32
+
+如:
+
+matmul[authora] A@float16 b@float16 -> C@float32
+
+同样，在opfactory中，把实际参数用占位符替换，注册为
+
+muladd[authora] Tensor@float16 Scalar@float32-> Tensor@float16
+
+
+
+
+
+
+
+
+
+
@@ -1,39 +1,15 @@
 ## excuter/op-mem-ompsimd 支持算子列表 
 
-本页面由 `excuter/op-mem-ompsimd/src/deepx/op/opfactory.hpp` 生成，请勿手动修改 
+本页面由 `excuter/op-mem-ompsimd/src/deepx/tf/tffactory.hpp` 生成，请勿手动修改 
 
-| Operation | Author | Data Types | Math Formula | IR Instruction |
+| Operation | Author | Func Def | Math Formula | IR Instruction |
 |-----------|--------|------------|--------------|----------------|
-| divscalar | miaobyte | float32, float64 | T2 = T1 / 2.0 | divscalar@float32 T1 2.0 -> T2 |
-| addscalar | miaobyte | float32, float64 | T2 = T1 + 1.0 | addscalar@float32 T1 1.0 -> T2 |
-| uniform |  | float32, float64 | uniform(-1.0, 1.0,T1) | uniform@float32 -1.0 1.0 -> T1 |
-| deltensor |  | any | del T1 | deltensor@any T1 -> |
-| minscalar |  | float32, float64 | B= min(A, 1.0) | minscalar@float32 A 1.0 -> B |
-| rdivscalar | miaobyte | float32, float64 | T3 =1 / T2 | rdivscalar@float32 1 T2 -> T3 |
-| constant |  | float32, float64 | T1 = full(shape, 0.0) | constant@float32 0.0 -> T1 |
-| powscalar | miaobyte | float32, float64 | T2 = T1 ^ 2.0 | powscalar@float32 T1 2.0 -> T2 |
-| sub | cblas | float32, float64 | T3 = T1 - T2 | sub@int32 T1 T2 -> T3 |
-| sub | miaobyte | float32, float64 | T3 = T1 - T2 | sub@int32 T1 T2 -> T3 |
-| sum |  | float32, float64 | T2 = sum(T1, dims=[1,2]) | sum@float32 T1 1 2 -> T2 |
-| argset |  | float32, float64, int32 | shape = [3, 4, 5] | argset@int32 3 4 5 -> shape |
-| arange |  | float32, float64 | arange(start=0.0, step=1.0,T1) | arange@float32 0.0 1.0 -> T1 |
-| transpose |  | any | T2 = transpose(T1, dimorder=[1,0]) | transpose@float32 T1 1 0 -> T2 |
-| clonetensor |  | float32, float64, int16, int32, int64, int8 | T2 = T1.clone() | clonetensor@float32 T1 -> T2 |
-| add | cblas | float32, float64 | T3 = T1 + T2 | add@int32 T1 T2 -> T3 |
-| add | miaobyte | float32, float64, int16, int32, int64, int8 | T3 = T1 + T2 | add@int32 T1 T2 -> T3 |
-| copytensor |  | float32, float64, int16, int32, int64, int8 | T2 = T1.copy() | copytensor@float32 T1 -> T2 |
-| min |  | float32, float64 | C = min(A,B) | min@float32 A B -> C |
-| print |  | any |  | print@any -> |
-| newtensor |  | float32, float64, int16, int32, int64, int8 | T1 = zeros(shape) | newtensor@float32 shape -> T1 |
-| mulscalar | miaobyte | float32, float64 | T2 = T1 * 2.0 | mulscalar@float32 T1 2.0 -> T2 |
-| div | miaobyte | float32, float64 | T3 = T1 / T2 | div_miaobyte@float32 T1 T2 -> T3 |
-| sqrt | miaobyte | float32, float64 | T2 = sqrt(T1) | sqrt@float32 T1 -> T2 |
-| mul | miaobyte | float32, float64 | T3 = T1 * T2 | mul@float32 T1 T2 -> T3 |
-| exp | miaobyte | float32, float64 | T2 = exp(T1) | exp@float32 T1 -> T2 |
-| max |  | float32, float64 | T3 = max(T1,T2) | max@float32 T1 -> T2 |
-| pow | miaobyte | float32, float64 | T3 = T1 ^ T2 | pow@float32 T1 T2 -> T3 |
-| maxscalar |  | float32, float64 | T2 = max(T1, 0.0) | maxscalar@float32 T1 0.0 -> T2 |
-| matmul |  | float32, float64 | T3 = T1 @ T2 | matmul@float32 T1 T2 -> T3 |
-| reshape |  | any | T2 = reshape(T1, [2,3,4]) | reshape@float32 T1 2 3 4 -> T2 |
-| expand |  | any | T2 = expand(T1, axis=[4,6,12]) | expand@float32 T1 4 6 12 -> T2 |
-| concat |  | float32 | T3 = concat([T1, T2], axis=3) | concat@float32 T1 T2 3 -> T3 |
+| argset |  none  | (arg)->(double) | shape = [3  4  5] | argset(arg )->(double d1) |
+| argset |  none  | (arg)->(float) | shape = [3  4  5] | argset(arg )->(float f1) |
+| argset |  none  | (args)->(int32) | shape = [3  4  5] | argset(args )->(int32 shape) |
+| newtensor |  none  | (shape)->(double) | T1 = zeros(shape) | newtensor(shape )->(double tensor) |
+| newtensor |  none  | (shape)->(float) | T1 = zeros(shape) | newtensor(shape )->(float tensor) |
+| newtensor |  none  | (shape)->(int64) | T1 = zeros(shape) | newtensor(shape )->(int64 tensor) |
+| newtensor |  none  | (shape)->(int32) | T1 = zeros(shape) | newtensor(shape )->(int32 tensor) |
+| newtensor |  none  | (shape)->(int16) | T1 = zeros(shape) | newtensor(shape )->(int16 tensor) |
+| newtensor |  none  | (shape)->(int8) | T1 = zeros(shape) | newtensor(shape )->(int8 tensor) |
@@ -15,13 +15,23 @@ include_directories(src)
 # 源文件
 
 file(GLOB_RECURSE DEEPX_COMMON_SOURCES "src/*.cpp")
- 
- 
+
 add_library(deepx_common SHARED 
     ${DEEPX_COMMON_SOURCES}
 )
 
+ 
+find_package(yaml-cpp REQUIRED)
+
+target_link_libraries(deepx_common
+    PUBLIC
+    yaml-cpp
+)
+
 target_include_directories(deepx_common PUBLIC
     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>
     $<INSTALL_INTERFACE:include>
-)
+)
+ 
+ 
+add_subdirectory(test)
@@ -16,7 +16,7 @@ namespace client
             close(sockfd);
         }
     }
-    void udpserver::start(queue<deepx::op::Op> &queue)
+    void udpserver::start(queue<deepx::tf::TF> &queue)
     {
         // 创建UDP套接字
         if ((sockfd = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
@@ -49,10 +49,10 @@ namespace client
             string line;
             while (getline(ss, line)) {
                 if (!line.empty()) {
-                    deepx::op::Op op;
-                    op.recv_at = chrono::system_clock::now();
-                    op.load(line);
-                    queue.push(op);
+                    deepx::tf::TF tf;
+                    tf.recv_at = chrono::system_clock::now();
+                    tf.parse(line,true);
+                    queue.push(tf);
                 }
             }
         }
 
@@ -7,7 +7,7 @@
 #include <sys/un.h>
 #include <unistd.h>
 #include <functional>
-#include "deepx/op/op.hpp"
+#include "deepx/tf/tf.hpp"
 #include <queue>
 
 namespace client{
@@ -24,7 +24,7 @@ namespace client{
     public:
         udpserver(int port);
         ~udpserver();
-        void start(queue<deepx::op::Op> &tasks);
+        void start(queue<deepx::tf::TF> &tasks);
         using handlefunc = std::function<void(const char *buffer)>;
         handlefunc func;
         void resp(string str);
Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,7 @@ namespace client`
`16`	`16`	`close(sockfd);`
`17`	`17`	`}`
`18`	`18`	`}`
`19`		`- void udpserver::start(queue<deepx::op::Op> &queue)`
	`19`	`+ void udpserver::start(queue<deepx::tf::TF> &queue)`
`20`	`20`	`{`
`21`	`21`	`// 创建UDP套接字`
`22`	`22`	`if ((sockfd = socket(AF_INET, SOCK_DGRAM, 0)) < 0)`
`@@ -49,10 +49,10 @@ namespace client`
`49`	`49`	`string line;`
`50`	`50`	`while (getline(ss, line)) {`
`51`	`51`	`if (!line.empty()) {`
`52`		`- deepx::op::Op op;`
`53`		`- op.recv_at = chrono::system_clock::now();`
`54`		`- op.load(line);`
`55`		`- queue.push(op);`
	`52`	`+ deepx::tf::TF tf;`
	`53`	`+ tf.recv_at = chrono::system_clock::now();`
	`54`	`+ tf.parse(line,true);`
	`55`	`+ queue.push(tf);`
`56`	`56`	`}`
`57`	`57`	`}`
`58`	`58`	`}`