We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 73addbb commit 1f1e9a9Copy full SHA for 1f1e9a9
1 file changed
excuter/op-mem-cuda/src/deepx/dtype_cuda.hpp
@@ -3,6 +3,8 @@
3
4
#include <cuda_fp16.h>
5
#include <cuda_bf16.h>
6
+#include <cuda_fp8.h>
7
+#include <cuda_fp4.h>
8
9
#include "deepx/dtype.hpp"
10
@@ -34,6 +36,27 @@ namespace deepx
34
36
else
35
37
return Precision::Any;
38
}
39
+
40
41
+ template <>
42
+ struct to_tensor_type<PrecisionWrapper<Precision::BFloat16>> {
43
+ using type = nv_bfloat16;
44
+ };
45
46
47
+ struct to_tensor_type<PrecisionWrapper<Precision::Float16>> {
48
+ using type = half;
49
50
51
52
+ struct to_tensor_type<PrecisionWrapper<Precision::Float8E5M2>> {
53
+ using type = __nv_fp8_e5m2;
54
55
56
57
+ struct to_tensor_type<PrecisionWrapper<Precision::Float8e4m3>> {
58
+ using type = __nv_fp8_e4m3;
59
+ }
60
61
62
#endif // DEEPX_DTYPE_CUDA_HPP
0 commit comments