array2d
diff --git a/‎front/py/deepx/nn/functional/activite.py‎
Lines changed: 33 additions & 0 deletions b/‎front/py/deepx/nn/functional/activite.py‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎front/py/deepx/nn/functional/elementwise.py‎
Lines changed: 23 additions & 0 deletions b/‎front/py/deepx/nn/functional/elementwise.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎front/py/deepx/nn/modules/linear.py‎
Lines changed: 0 additions & 1 deletion b/‎front/py/deepx/nn/modules/linear.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎front/py/deepx/tensor/elementwise.py‎
Lines changed: 16 additions & 0 deletions b/‎front/py/deepx/tensor/elementwise.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎front/py/deepx/transformer/models/llama/modeling_llama.py‎
Lines changed: 18 additions & 0 deletions b/‎front/py/deepx/transformer/models/llama/modeling_llama.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎front/py/examples/2_ir/5_reduce.py‎
Lines changed: 4 additions & 4 deletions b/‎front/py/examples/2_ir/5_reduce.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎front/py/examples/2_ir/math.dot‎
Lines changed: 38 additions & 0 deletions b/‎front/py/examples/2_ir/math.dot‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎front/py/examples/2_ir/math.dot.svg‎
Lines changed: 171 additions & 0 deletions b/‎front/py/examples/2_ir/math.dot.svg‎
Lines changed: 171 additions & 0 deletions
@@ -36,3 +36,36 @@ def sigmoid(
     from .elementwise import exp
     outtensor=1/(1+(t*(-1)).exp())
     return outtensor
+
+def swiglu(
+        x: Tensor,
+        w: Tensor,  # 第一个投影矩阵
+        v: Tensor,  # 第二个投影矩阵
+        beta: float = 1.0,  # swish函数的缩放因子
+        out: Union[Tensor,str] = '') -> Tensor:
+    """SwiGLU激活函数
+    
+    Args:
+        x: 输入张量
+        w: 第一个投影矩阵
+        v: 第二个投影矩阵  
+        beta: Swish函数的β参数,默认为1.0
+        out: 输出张量名称
+    """
+    # 计算两个线性变换
+    xw = x @ w  # 第一个投影
+    xv = x @ v  # 第二个投影
+    
+    # 计算Swish(xw)
+    beta_xw = xw * beta
+    sigmoid_beta_xw = 1 / (1 + (-beta_xw).exp())
+    swish = xw * sigmoid_beta_xw
+    
+    # 最终的逐元素相乘
+    result = swish * xv
+    
+    # 处理输出
+    if isinstance(out, str):
+        result.addtograph(out)
+        
+    return result
@@ -165,6 +165,29 @@ def exp(
         ir=DeepxIR("exp", a.dtype, [a.node.name], [outtensor.node.name])
         send(ir)
 
+#pow
+# todo
+OpNode.register("pow")
+def pow(
+        a:Tensor,
+        b:Union[float,int],
+        out:Union[Tensor,str]=''):
+    g=a.graph
+    opnode = g.add_op("pow")
+    opnode.add_input(a.node)
+    opnode.add_input(g.add_var('',b))
+
+    outtensor=None
+    if isinstance(out,str):
+        outtensor=Tensor(shape=a.shape, dtype=a.dtype, device=a.device)
+        outtensor.addtograph(out)
+    else:
+        outtensor=out
+    outtensor.node.add_input(opnode)
+    if a.graph.eager:
+        ir=DeepxIR("pow", a.dtype, [a.node.name,b], [outtensor.node.name])
+        send(ir)
+
 #sqrt
 OpNode.register("sqrt")
 def sqrt(
 
@@ -20,7 +20,6 @@ def __init__(
         self.in_features = in_features
         self.out_features = out_features
         self.weight = Tensor(shape=(out_features, in_features),dtype=dtype)
-    
         if bias:
             self.bias = Tensor(shape=(out_features,),dtype=dtype)
         else:
 
@@ -139,6 +139,22 @@ def exp_(self):
     exp_func(self,self)
     return self
 
+@tensor_method
+def pow(self,
+        b:Union[float,int],
+        out:Union[Tensor,str]=''):
+    from deepx.nn.functional import pow as pow_func
+    result=pow_func(self,b,out)
+    return result
+
+@tensor_method
+def pow_(self,
+        b:Union[float,int]):
+    from deepx.nn.functional import pow as pow_func
+    result=pow_func(self,b,self)
+    return result
+
+
 @tensor_method
 def sqrt(self,out:Optional[Union[str]]=None):
     result = Tensor(dtype=self.dtype,shape=self.shape)
 
@@ -0,0 +1,18 @@
+from deepx.nn.modules import Module
+from deepx import Tensor,ones,rsqrt
+class LlamaRMSNorm(Module):
+    def __init__(self, hidden_size, eps=1e-6):
+        """
+        LlamaRMSNorm is equivalent to T5LayerNorm
+        """
+        super().__init__()
+        self.weight =  ones(hidden_size)
+        self.variance_epsilon = eps
+
+    def forward(self, hidden_states:Tensor):
+        variance = hidden_states.pow(2).mean(-1, keepdim=True)
+        hidden_states = hidden_states * rsqrt(variance + self.variance_epsilon)
+        return self.weight * hidden_states
+
+    def extra_repr(self):
+        return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}"
@@ -5,21 +5,21 @@
 t.addtograph("t")
 t.uniform_(low=-1,high=1)
 print((t))
-s=sum(t,dims=[0,2],out="s")
+s=sum(t,dim=[0,2],out="s")
 print(s)
 
 
 t1=ones(4,5,6,name="t1")
 print(t1)
-t2=sum(t1,dims=[0,1],out='t2')
+t2=sum(t1,dim=[0,1],out='t2')
 print(t2)
 
 t3=arange(0,120,1,name="t3")
 t3.reshape_(4,5,6)
 print(t3)
 
-t3_mean=mean(t3,dims=[0,1],out='t3_mean')
+t3_mean=mean(t3,dim=[0,1],out='t3_mean')
 print(t3_mean)
 
 gviz=t.graph.to_dot()
-gviz.render('sum.dot',format='svg')
+gviz.render('reduce.dot',format='svg')
@@ -0,0 +1,38 @@
+// Computational Graph
+digraph {
+	rankdir=TB
+	node [shape=record]
+	128076282530160 [label="tensor_1
+(2, 3)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	128076282537744 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled]
+	128076282538512 [label="var_1
+1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	128076283260656 [label="tensor_2
+(2, 3)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	128076283260752 [label=add_scalar color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled]
+	128076283260704 [label="var_2
+3" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	128076283260848 [label="tensor_3
+(2, 3)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	128076283261088 [label=sqrt color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled]
+	128076283261184 [label="tensor_4
+(2, 3)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	128076283261424 [label=sqrt color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled]
+	128076283261664 [label="tensor_5
+(2, 3)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	128076283261760 [label=rdiv_scalar color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled]
+	128076283261712 [label="var_3
+1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	128076282537744 -> 128076282530160 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	128076282538512 -> 128076282537744 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	128076283260752 -> 128076283260656 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	128076282530160 -> 128076283260752 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	128076283260704 -> 128076283260752 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	128076283261088 -> 128076283260848 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	128076283260656 -> 128076283261088 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	128076283261424 -> 128076283261184 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	128076283260656 -> 128076283261424 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	128076283261760 -> 128076283261664 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	128076283261712 -> 128076283261760 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	128076283261184 -> 128076283261760 [arrowsize=0.8 color=gray40 penwidth=1.2]
+}