relu+uniform:测试ok

miaobyte · miaobyte · commit 5397848148bf · 2025-03-02T22:26:40.000+08:00
diff --git a/excuter/op-mem-ompsimd/src/deepx/op/elementwise.hpp b/excuter/op-mem-ompsimd/src/deepx/op/elementwise.hpp
@@ -251,10 +251,19 @@ namespace deepx::op
         //已验证，2025-02-19，lipeng
         void forward(mem::Mem &mem) override
         {
-            auto a = mem.gettensor<T>(this->args[0]).get();
-            auto b = mem.getarg<T>(this->args[1]);
-            auto c = mem.gettensor<T>(this->returns[0]).get();
-            deepx::tensorfunc::div(*a, b, *c);  // 直接使用除法
+            if (mem.existstensor(this->args[0])){
+                //C= A/b
+                auto A = mem.gettensor<T>(this->args[0]).get();
+                auto b = mem.getarg<T>(this->args[1]);
+                auto C = mem.gettensor<T>(this->returns[0]).get();
+                tensorfunc::div_scalar(*A, b, *C);  // 直接使用除法
+            }else{
+                //C=a/B
+                auto a = mem.getarg<T>(this->args[0]);
+                auto B = mem.gettensor<T>(this->args[1]).get();
+                auto C = mem.gettensor<T>(this->returns[0]).get();
+                tensorfunc::div_scalar(a, *B, *C);  // 直接使用除法
+            }          
         }
 
         //已验证，2025-02-19，lipeng
diff --git a/excuter/op-mem-ompsimd/src/deepx/op/opfactory.cpp b/excuter/op-mem-ompsimd/src/deepx/op/opfactory.cpp
@@ -112,12 +112,16 @@ namespace deepx::op
     }
     //reduce
     void register_reduce(OpFactory &opfactory){
-        opfactory.add_op(Sum<float>());
-        opfactory.add_op(Sum<double>());
         opfactory.add_op(Max<float>());
         opfactory.add_op(Max<double>());
+        opfactory.add_op(Max_scalar<float>());
+        opfactory.add_op(Max_scalar<double>());
         opfactory.add_op(Min<float>());
         opfactory.add_op(Min<double>());
+        opfactory.add_op(Min_scalar<float>());
+        opfactory.add_op(Min_scalar<double>());
+        opfactory.add_op(Sum<float>());
+        opfactory.add_op(Sum<double>());
     }
     int register_all(OpFactory &opfactory){
         register_new(opfactory);
diff --git a/excuter/op-mem-ompsimd/src/deepx/op/reduce.hpp b/excuter/op-mem-ompsimd/src/deepx/op/reduce.hpp
@@ -5,6 +5,7 @@
 #include "deepx/tensorfunc/reduce.hpp"
 #include "deepx/tensorfunc/broadcast.hpp"
 #include "deepx/tensorfunc/compare.hpp"
+#include "stdutil/num.hpp"
 
 namespace deepx::op
 {
@@ -82,14 +83,24 @@ namespace deepx::op
 
             void forward(mem::Mem &mem) override{
                 auto A=mem.gettensor<T>(this->args[0]);
-                auto b=mem.getarg<T>(this->args[1]);
+                T b;
+                if (!is_float(this->args[1])){
+                    b=mem.getarg<T>(this->args[1]);
+                }else{
+                    b=T(atof(this->args[1].c_str()));
+                }
                 auto output=mem.gettensor<T>(this->returns[0]);
                 deepx::tensorfunc::max(*A, b, *output);
             }
 
             void backward(mem::Mem &mem) override{
                 auto A=mem.gettensor<T>(this->args[0]);
-                auto b=mem.getarg<T>(this->args[1]);
+                T b;
+                if (!is_float(this->args[1])){
+                    b=mem.getarg<T>(this->args[1]);
+                }else{
+                    b=T(atof(this->args[1].c_str()));
+                }
                 auto A_grad=mem.gettensor<T>(this->args_grad [0]);
                 auto output_grad=mem.gettensor<T>(this->returns_grad[0]);
                 deepx::tensorfunc::max_grad(*A, b, *A_grad, *output_grad);
@@ -139,14 +150,24 @@ namespace deepx::op
             }      
             void forward(mem::Mem &mem) override{
                 auto A=mem.gettensor<T>(this->args[0]);
-                auto b=mem.getarg<T>(this->args[1]);
+                T b;
+                if (!is_float(this->args[1])){
+                    b=mem.getarg<T>(this->args[1]);
+                }else{
+                    b=T(atof(this->args[1].c_str()));
+                }
                 auto output=mem.gettensor<T>(this->returns[0]);
                 deepx::tensorfunc::min(*A, b, *output);
             }
 
             void backward(mem::Mem &mem) override{
                 auto A=mem.gettensor<T>(this->args[0]);
-                auto b=mem.getarg<T>(this->args[1]);
+                T b;
+                if (!is_float(this->args[1])){
+                    b=mem.getarg<T>(this->args[1]);
+                }else{
+                    b=T(atof(this->args[1].c_str()));
+                }
                 auto A_grad=mem.gettensor<T>(this->args_grad[0]);
                 auto output_grad=mem.gettensor<T>(this->returns_grad[0]);
                 deepx::tensorfunc::min_grad(*A, b, *A_grad, *output_grad);
diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/elementwise.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/elementwise.hpp
@@ -506,6 +506,8 @@ namespace deepx::tensorfunc
         }
     }
 
+    //div
+    // C= A/B
     template <typename T>
     void div(const Tensor<T> &A, const Tensor<T> &B, Tensor<T> &C)
     {
@@ -685,9 +687,10 @@ namespace deepx::tensorfunc
         }
     }
 
-
+    //div_scalar
+    // C= A/value
     template <typename T>
-    void div(const Tensor<T> &input, const T value, Tensor<T> &output)
+    void div_scalar(const Tensor<T> &input, const T value, Tensor<T> &output)
     {
         if (input.shape == output.shape)
         {
@@ -726,6 +729,48 @@ namespace deepx::tensorfunc
         }
     }
 
+     //div_scalar
+    // C= A/value
+    template <typename T>
+    void div_scalar(const T value,const Tensor<T> &t, Tensor<T> &output)
+    {
+        if (t.shape == output.shape)
+        {
+            output.shape.rangeParallel(output.shape.dim - 1, [&t, &output, &value](int i)
+                                       {
+                int shape_last=output.shape[-1];
+                const ScalableTag<T> tag;
+                const size_t lanes = Lanes(tag);
+                size_t j=0;
+
+                // 1. 处理前置未对齐部分
+                while (j < shape_last && !IsAligned(tag,t.data + i + j)) {
+                    output.data[i+j] = value / t.data[i+j] ;
+                    ++j;
+                }
+
+                // 2. 处理中间对齐部分
+                size_t aligned_end=shape_last-(shape_last%lanes);
+                for (; j+lanes<=aligned_end; j +=  lanes  )
+                {
+                    auto vec = Load(tag, t.data + i + j);
+                    auto scalar = Set(tag, value);
+                    auto vec_result = Div(scalar, vec);
+                    Store(vec_result, tag, output.data + i + j);
+                }
+
+                // 3. 处理尾部剩余元素
+                for (;j<shape_last;j++)
+                {
+                    output.data[i+j] = value / t.data[i+j] ;
+                } });
+        }
+        else
+        {
+            throw std::invalid_argument("shape mismatch");
+        }
+    }
+
    template <typename T>
     void sqrt(const Tensor<T> &input, Tensor<T> &output)
     {
diff --git a/front/py/deepx/nn/functional/__init__.py b/front/py/deepx/nn/functional/__init__.py
@@ -2,15 +2,15 @@
 from .new import newtensor
 from .print import printtensor
 from .matmul import matmul
-from .init import constant,full,zeros,ones,arange,rand,randn,eye
+from .init import constant,full,zeros,ones,uniform_,arange,rand,randn,eye
 from .reduce import max,min,sum,prod,mean
 from .transpose import transpose
 from .activite import relu
 
 __all__ = [
     "newtensor",
     "printtensor",
-    "constant","full","zeros","ones","arange","rand","randn","eye",
+    "constant","full","zeros","ones","uniform_","arange","rand","randn","eye",
     "add","sub","mul","div","clamp",
     "matmul",
     "max","min","sum","prod","mean",
diff --git a/front/py/deepx/nn/functional/activite.py b/front/py/deepx/nn/functional/activite.py
@@ -9,4 +9,11 @@ def relu(t: Tensor,inplace:bool=False)->Tensor:
     ir=DeepxIR("max_scalar",t.dtype,[t._node.name,0], [out._node.name])
     send(ir)
     return out
- 
+ 
+ # 数学公式：σ(x) = 1 / (1 + exp(-x))
+def sigmoid(t: Tensor,inplace:bool=False)->Tensor:
+    out=t
+    if not inplace:
+        out=Tensor(shape=t.shape, dtype=t.dtype, device=t.device)
+    out=1/(1+(t*(-1)).exp())
+    return out
diff --git a/front/py/deepx/nn/functional/elementwise.py b/front/py/deepx/nn/functional/elementwise.py
@@ -58,7 +58,6 @@ def sub(
     else:
         _A_b_elementwiseop_C(a,b,"sub_scalar",out)
 
-
 #mul
 OpNode.register("mul")
 OpNode.register("mul_scalar")
@@ -78,15 +77,15 @@ def mul(
 OpNode.register("div_scalar")
 
 def div(
-        a:Tensor,
+        a: Optional[Union[Tensor, float, int]] = None,
         b: Optional[Union[Tensor, float, int]] = None, 
         out:Tensor=None):
-    if isinstance(b,Tensor):
+    if isinstance(b,Tensor) and isinstance(a,Tensor):
         _A_B_elementwiseop_C(a,b,"div",out)
     else:
         _A_b_elementwiseop_C(a,b,"div_scalar",out)
  
- 
+
 #clamp
 OpNode.register("clamp")
 def clamp(
@@ -107,6 +106,18 @@ def clamp(
         varir=DeepxIR("clamp", a.dtype, [a.node.name,min,max], [out.node.name])
         send(str(varir))
 
+#exp
+OpNode.register("exp")
+def exp(
+        a:Tensor,
+        out:Tensor=None):
+    opnode = a.graph.add_op("exp")
+    opnode.add_input(a.node)
+    out.node.add_input(opnode)
+    if a.graph.eager:
+        ir=DeepxIR("exp", a.dtype, [a.node.name], [out.node.name])
+        send(ir)
+
 # OpNode.register("ReLU", 101)
 # OpNode.register("Placeholder", 102)
 # OpNode.register("Neg", 103)
diff --git a/front/py/deepx/nn/functional/init.py b/front/py/deepx/nn/functional/init.py
@@ -1,10 +1,16 @@
 from typing import Optional
 
-from deepx.tensor import Tensor
+from deepx import Tensor
+from deepx.autograd.graph import OpNode
 from deepx.nn.deepxir import DeepxIR
 from deepx.scheduler import send
 
 def constant(t:Tensor, fill_value):
+    opnode = t.graph.add_op("constant")
+    opnode.add_input(t.node)
+    argnode=t.graph.add_var('',fill_value)
+    opnode.add_input(argnode)
+    t.node.add_input(opnode)
     if t.graph.eager:
         ir=DeepxIR("constant", t.dtype, [fill_value], [t.node.name])
         send(ir)
@@ -23,6 +29,22 @@ def zeros(*shape, dtype=None, device=None):
 def ones(*size, dtype=None, device=None):
     return full(*size, fill_value=1, dtype=dtype, device=device)
 
+OpNode.register("uniform")
+def uniform_(t:Tensor,low=0, high=1)->Tensor:
+    if low >= high:
+        raise ValueError(f"low({low})必须小于high({high})")
+    opnode = t.graph.add_op("uniform")
+    opnode.add_input(t.node)
+    arglow=t.graph.add_var('',low)
+    arghigh=t.graph.add_var('',high)
+    opnode.add_input(arglow)
+    opnode.add_input(arghigh)
+    t.node.add_input(opnode)
+    if t.graph.eager:
+        ir=DeepxIR("uniform", t.dtype, [low, high], [t.node.name])
+        send(ir)
+    return t
+
 def rand(*size, dtype=None, device=None):
    #TODO
    pass
diff --git a/front/py/deepx/nn/modules/activation.py b/front/py/deepx/nn/modules/activation.py
@@ -4,33 +4,6 @@
 
 #copy from pytorch
 class ReLU(Module):
-    r"""Applies the rectified linear unit function element-wise.
-
-    :math:`\text{ReLU}(x) = (x)^+ = \max(0, x)`
-
-    Args:
-        inplace: can optionally do the operation in-place. Default: ``False``
-
-    Shape:
-        - Input: :math:`(*)`, where :math:`*` means any number of dimensions.
-        - Output: :math:`(*)`, same shape as the input.
-
-    .. image:: ../scripts/activation_images/ReLU.png
-
-    Examples::
-
-        >>> m = nn.ReLU()
-        >>> input = torch.randn(2)
-        >>> output = m(input)
-
-
-      An implementation of CReLU - https://arxiv.org/abs/1603.05201
-
-        >>> m = nn.ReLU()
-        >>> input = torch.randn(2).unsqueeze(0)
-        >>> output = torch.cat((m(input), m(-input)))
-    """
-
     __constants__ = ["inplace"]
     inplace: bool
 
@@ -43,4 +16,12 @@ def forward(self, input: Tensor) -> Tensor:
 
     def extra_repr(self) -> str:
         inplace_str = "inplace=True" if self.inplace else ""
-        return inplace_str
+        return inplace_str
+
+class Sigmoid(Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input: Tensor) -> Tensor:
+        return F.sigmoid(input)
+    
diff --git a/front/py/deepx/nn/modules/module.py b/front/py/deepx/nn/modules/module.py
@@ -21,10 +21,12 @@ def _generate_default_name(self) -> str:
         return f"{base_name}_{count}"
     
     def __setattr__(self, name: str, value: Any) -> None:
-        if isinstance(value, Module):
-            self.register_module(name, value)
-        elif isinstance(value, Tensor):
-            self.register_parameter(name, value)
+        if not name.startswith('_'):
+            if isinstance(value, Module):
+                self.register_module(name, value)
+            elif isinstance(value, Tensor):
+                self.register_parameter(name, value)
+            # 使用父类方法设置属性，避免递归
         super().__setattr__(name, value)
         
     def register_module(self, name: str, module: Optional['Module']) -> None:
diff --git a/front/py/deepx/tensor/__init__.py b/front/py/deepx/tensor/__init__.py
@@ -4,6 +4,7 @@
 from .elementwise import *  # 导入所有包含@tensor_method装饰的方法
 from .matmul import *       # 导入矩阵乘法相关方法
 from .transpose import *    # 导入转置方法
+from .init import *
 
 __all__ = [
     'Device','DeviceType',
diff --git a/front/py/deepx/tensor/elementwise.py b/front/py/deepx/tensor/elementwise.py
@@ -89,4 +89,10 @@ def clamp(self, min, max):
 def clamp_(self, min, max):
     from deepx.nn.functional import clamp as clamp_func
     clamp_func(self,min,max,self)
-    return self
+    return self
+
+@tensor_method
+def exp(self):
+    result = Tensor(dtype=self.dtype,shape=self.shape)
+    from deepx.nn.functional import exp as exp_func
+    exp_func(self,result)
diff --git a/front/py/deepx/tensor/init.py b/front/py/deepx/tensor/init.py
@@ -15,6 +15,11 @@ def ones_(self):
     from deepx.nn.functional import constant as constant_func
     constant_func(self,fill_value=1)
 
+@tensor_method
+def uniform_(self,low=0, high=1):
+    from deepx.nn.functional import uniform_ as uniform_func
+    uniform_func(self,low=low, high=high)
+
 @tensor_method
 def rand_(self):
     #todo
diff --git a/front/py/deepx/tensor/shape.py b/front/py/deepx/tensor/shape.py
diff --git a/front/py/examples/2_ir/5_activation.py b/front/py/examples/2_ir/5_activation.py
diff --git a/front/py/examples/3_module/1_linear.py b/front/py/examples/3_module/1_linear.py