array2d
diff --git a/‎excuter/op-mem-cuda/src/deepx/tensorfunc/cuda.hpp‎
Lines changed: 39 additions & 0 deletions b/‎excuter/op-mem-cuda/src/deepx/tensorfunc/cuda.hpp‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_cublas_basic.hpp‎
Lines changed: 2 additions & 23 deletions b/‎excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_cublas_basic.hpp‎
Lines changed: 2 additions & 23 deletions
diff --git a/‎excuter/op-mem-cuda/src/deepx/tensorfunc/matmul.hpp‎
Lines changed: 0 additions & 56 deletions b/‎excuter/op-mem-cuda/src/deepx/tensorfunc/matmul.hpp‎
Lines changed: 0 additions & 56 deletions
diff --git a/‎excuter/op-mem-cuda/src/deepx/tensorfunc/matmul_cublas.hpp‎
Lines changed: 63 additions & 0 deletions b/‎excuter/op-mem-cuda/src/deepx/tensorfunc/matmul_cublas.hpp‎
Lines changed: 63 additions & 0 deletions
@@ -0,0 +1,39 @@
+#ifndef DEEPX_TENSORFUNC_CUDA_HPP
+#define DEEPX_TENSORFUNC_CUDA_HPP
+#include <cuda_fp16.h> // 为了支持half精度
+#include <cuda_bf16.h>
+#include <cublas_v2.h>
+#include <cstdint>
+#include <stdexcept>
+
+#include "deepx/tensor.hpp"
+#include "authors.hpp"
+
+namespace deepx::tensorfunc
+{
+    class CublasHandle
+    {
+    public:
+        CublasHandle()
+        {
+            if (cublasCreate(&handle_) != CUBLAS_STATUS_SUCCESS)
+            {
+                throw std::runtime_error("Failed to create cuBLAS handle");
+            }
+        }
+
+        ~CublasHandle()
+        {
+            if (handle_)
+                cublasDestroy(handle_);
+        }
+
+        cublasHandle_t get() { return handle_; }
+
+    private:
+        cublasHandle_t handle_;
+    };
+
+}
+
+#endif
@@ -11,32 +11,11 @@
 #include "deepx/tensorfunc/elementwise.hpp"
 #include "deepx/tensorfunc/elementwise_basic.hpp"
 #include "deepx/tensorfunc/authors.hpp"
+#include "deepx/tensorfunc/cuda.hpp"
 namespace deepx::tensorfunc
 {
     // cuBLAS handle管理
-    class CublasHandle
-    {
-    public:
-        CublasHandle()
-        {
-            if (cublasCreate(&handle_) != CUBLAS_STATUS_SUCCESS)
-            {
-                throw std::runtime_error("Failed to create cuBLAS handle");
-            }
-        }
-
-        ~CublasHandle()
-        {
-            if (handle_)
-                cublasDestroy(handle_);
-        }
-
-        cublasHandle_t get() { return handle_; }
-
-    private:
-        cublasHandle_t handle_;
-    };
-
+   
     // cublas作者的特化实现
     template <>
     struct _author_add<cublas>
 
@@ -0,0 +1,63 @@
+#ifndef DEEPX_TENSORFUNC_MATMUL_CUBLAS_HPP
+#define DEEPX_TENSORFUNC_MATMUL_CUBLAS_HPP
+ 
+#include "deepx/tensor.hpp"
+#include "authors.hpp"
+
+namespace deepx::tensorfunc
+{
+  
+   template <typename T>
+    struct matmulDispatcher<cublas,T>
+    {
+        static void matmul(const Tensor<T> &A, const Tensor<T> &B, Tensor<T> &C)
+        {
+            if (!check_matmul_shape(A.shape, B.shape))
+            {
+                throw std::invalid_argument("A.shape could matmul with B.shape");
+            }
+            C.shape.rangeParallel(C.shape.dim - 2, [&](const std::vector<int> &indices)
+                                  {
+                        int aIdx=A.shape.linearat(indices);
+                        int bIdx=B.shape.linearat(indices);
+                        int cIdx=C.shape.linearat(indices);
+                        int m=A.shape[-2];
+                        int k=A.shape[-1];
+                        int n=B.shape[-1];
+                        for(int i=0;i<m;i++){
+                            for(int j=0;j<n;j++){
+                                T sum=0;
+                                for(int l=0;l<k;l++){
+                                    sum+=A.data[aIdx+i*k+l]*B.data[bIdx+l*n+j];
+                                }
+                                C.data[cIdx+i*n+j]=sum;
+                            }
+                        } });
+        }
+    };
+
+  template <>
+  void matmul<float>(const Tensor<float> &a, const Tensor<float> &b, Tensor<float> &c)
+  {
+  }
+
+  template <>
+  void matmul<double>(const Tensor<double> &a, const Tensor<double> &b, Tensor<double> &c)
+  {
+  }
+  template <typename T>
+  void matmuladd(const Tensor<T> &a, const Tensor<T> &b, const T &alpha, const T &beta, Tensor<T> &c)
+  {
+  }
+
+  template <>
+  void matmuladd<float>(const Tensor<float> &a, const Tensor<float> &b, const float &alpha, const float &beta, Tensor<float> &c)
+  {
+  }
+
+  template <>
+  void matmuladd<double>(const Tensor<double> &a, const Tensor<double> &b, const double &alpha, const double &beta, Tensor<double> &c)
+  {
+  }
+}
+#endif // DEEPX_TENSORFUNC_MATMUL_HPP