Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 74 additions & 73 deletions doc/excuter/op-mem-cuda/list.md

Large diffs are not rendered by default.

143 changes: 72 additions & 71 deletions doc/excuter/op-mem-ompsimd/list.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion excuter/cpp-common/src/deepx/shape.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ namespace deepx

// rangeParallel 支持omp,但omp内无需线程local变量
void rangeParallel(int dimCount, std::function<void(const std::vector<int> &indices)> func) const;
void rangeParallel(int dimCount, std::function<void(const int idx_linear)> func) const;
void rangeElementwiseParallel( std::function<void(const int idx_linear,const int idx_linear_end)> func) const;
void rangeParallel(int dimCount, std::function<void(const int idx_linear, const std::vector<int> &indices)> func) const;

// 支持omp,但omp内需要线程local变量
Expand Down
29 changes: 18 additions & 11 deletions excuter/cpp-common/src/deepx/shape_range.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <vector>
#include <functional>
#include <any>
#include <thread>

#include <omp.h>
#include "deepx/shape.hpp"
Expand Down Expand Up @@ -113,18 +114,24 @@ namespace deepx
}
}
}
void Shape::rangeParallel(int dimCount, std::function<void(const int idx_linear)> func) const
{
dimCount = checkdim(dimCount, dim());
int stride = checkStride(dimCount, shape);

// 计算总循环次数
int total = size / stride;

#pragma omp parallel for
for (int idx = 0; idx < total; idx++)
void Shape::rangeElementwiseParallel(std::function<void(const int idx_linear,const int idx_linear_end)> func) const
{
int num_threads = std::thread::hardware_concurrency();
int alignblock=size/num_threads;
const int minblock=256;
if (alignblock<minblock)
{
func(idx * stride);
alignblock=minblock;
num_threads=size/alignblock;
}
#pragma omp parallel for num_threads(num_threads)
for (int idx = 0; idx < size; idx+=alignblock)
{
int end = idx + alignblock;
if (end > size) {
end = size;
}
func(idx,end);
}
}

Expand Down
1 change: 1 addition & 0 deletions excuter/cpp-common/src/deepx/tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ namespace deepx
DeleteFn deleter; // 释放内存

using CopyFn = void (*)(T *, T *, int);
//copyer(src, dest, size)
CopyFn copyer; // 拷贝内存

using SaveFn = void (*)(T *,size_t,const std::string &);
Expand Down
4 changes: 4 additions & 0 deletions excuter/cpp-common/src/deepx/tensorfunc/elementwise.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

namespace deepx::tensorfunc
{
//todtype
template <typename T,typename Dtype>
void todtype(const Tensor<T> &input, Tensor<Dtype> &output);

template <typename Author, typename T>
struct addDispatcher
{
Expand Down
7 changes: 3 additions & 4 deletions excuter/cpp-common/src/deepx/tf/tffactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,14 @@ namespace deepx::tf
// 为每个tftype生成一个表格
for (const auto &[tftype, tfs] : tf_by_type) {
ss << "### " << tftype << "\n\n";
ss << "| Operation | Author | Func Def | Math Formula | IR Instruction |\n";
ss << "|-----------|--------|------------|--------------|----------------|\n";
ss << "| Operation | Author | Math Formula | IR Instruction |\n";
ss << "|-----------|--------|--------------|----------------|\n";

for (const auto &tf : tfs) {
ss << "| " << tf->name << " | ";
ss << (tf->metadata.author.empty() ? " none " : tf->metadata.author) << " | ";
ss << tf->to_string(false, true) << " | ";
ss << tf->math_formula() << " | ";
ss << tf->to_string(false, true) << " |\n";
ss << stdutil::escape_markdown(tf->to_string(false, true)) << " |\n";
}

ss << "\n";
Expand Down
42 changes: 41 additions & 1 deletion excuter/cpp-common/src/stdutil/string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,49 @@ namespace stdutil
str.erase(str.find_last_not_of(" ") + 1);
}

void trim(string &str,const string &chars)
void trim(string &str, const string &chars)
{
str.erase(0, str.find_first_not_of(chars));
str.erase(str.find_last_not_of(chars) + 1);
}

string escape_markdown(const string &str)
{
std::string result;
for (char c : str)
{
switch (c)
{
case '\\':
result += "\\\\";
break;
case '\"':
result += "\\\"";
break;
case '\'':
result += "\\\'";
break;
case '\n':
result += "\\n";
break;
case '\t':
result += "\\t";
break;
case '\r':
result += "\\r";
break;
case '\b':
result += "\\b";
break;
case '\f':
result += "\\f";
break;
default:
// 普通字符直接添加
result += c;
}
}
return result;
}

} // namespace stdutil
1 change: 1 addition & 0 deletions excuter/cpp-common/src/stdutil/string.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ namespace stdutil
void trimspace(string &str);
void trim(string &str,const string &chars=" \t\n\r\f\v");

string escape_markdown(const string &str);
}


Expand Down
12 changes: 12 additions & 0 deletions excuter/op-mem-cuda/src/client/tfs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,18 @@ namespace deepx::tf
// elementwise
void register_elementwise(TfFactory &tffactory)
{
//todtype
tffactory.add_tf(std::make_shared<Todtype>(vector<Param>(
{
Param("a", DataCategory::Tensor, Precision::Any),
}),
vector<Param>(
{
Param("b", DataCategory::Tensor, Precision::Any),
})));


// add
tffactory.add_tf(std::make_shared<Add<miaobyte>>(vector<Param>(
{
Param("a", DataCategory::Tensor, Precision::Any),
Expand Down
Loading
Loading