From 68ed4d5825257f5419d5452e3f9327709a22a708 Mon Sep 17 00:00:00 2001
From: yuchunliu-metax <yuchun.liu@metax-tech.com>
Date: Fri, 12 Sep 2025 16:34:34 +0800
Subject: [PATCH] Add is_custom_device to support metax-gpu

---
 cpp/open3d/ml/paddle/PaddleHelper.h                        | 7 ++++---
 cpp/open3d/ml/paddle/continuous_conv/ContinuousConvOps.cpp | 4 ++--
 .../paddle/continuous_conv/ContinuousConvTransposeOps.cpp  | 4 ++--
 cpp/open3d/ml/paddle/misc/BuildSpatialHashTableOps.cpp     | 2 +-
 cpp/open3d/ml/paddle/misc/FixedRadiusSearchOps.cpp         | 2 +-
 cpp/open3d/ml/paddle/misc/InvertNeighborsListOps.cpp       | 3 ++-
 cpp/open3d/ml/paddle/misc/KnnSearchOps.cpp                 | 2 +-
 cpp/open3d/ml/paddle/misc/NmsOps.cpp                       | 2 +-
 cpp/open3d/ml/paddle/misc/RadiusSearchOps.cpp              | 2 +-
 cpp/open3d/ml/paddle/misc/RaggedToDenseOps.cpp             | 2 +-
 cpp/open3d/ml/paddle/misc/ReduceSubarraysSumOps.cpp        | 2 +-
 cpp/open3d/ml/paddle/misc/VoxelPoolingOps.cpp              | 4 ++--
 cpp/open3d/ml/paddle/misc/VoxelizeOps.cpp                  | 2 +-
 cpp/open3d/ml/paddle/sparse_conv/SparseConvOps.cpp         | 4 ++--
 .../ml/paddle/sparse_conv/SparseConvTransposeOps.cpp       | 4 ++--
 15 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/cpp/open3d/ml/paddle/PaddleHelper.h b/cpp/open3d/ml/paddle/PaddleHelper.h
index 17f86e21e..6ef7f5b2f 100644
--- a/cpp/open3d/ml/paddle/PaddleHelper.h
+++ b/cpp/open3d/ml/paddle/PaddleHelper.h
@@ -19,9 +19,10 @@
 #include "paddle/phi/core/allocator.h"
 
 // Macros for checking tensor properties
-#define CHECK_CUDA(x)                                      \
-    do {                                                   \
-        PD_CHECK(x.is_gpu(), #x " must be a CUDA tensor"); \
+#define CHECK_CUDA(x)                                \
+    do {                                             \
+        PD_CHECK(x.is_gpu() || x.is_custom_device(), \
+                 #x " must be a CUDA tensor");       \
     } while (0)
 
 // NOTE: The input Tensor will be preprocessed into a contiguous Tensor within
diff --git a/cpp/open3d/ml/paddle/continuous_conv/ContinuousConvOps.cpp b/cpp/open3d/ml/paddle/continuous_conv/ContinuousConvOps.cpp
index 677086e60..cc0c6415f 100644
--- a/cpp/open3d/ml/paddle/continuous_conv/ContinuousConvOps.cpp
+++ b/cpp/open3d/ml/paddle/continuous_conv/ContinuousConvOps.cpp
@@ -97,7 +97,7 @@ std::vector<paddle::Tensor> ContinuousConvForward(
         return {out_features};                             \
     }
 
-    if (inp_features.is_gpu()) {
+    if (inp_features.is_gpu() || inp_features.is_custom_device()) {
 #ifdef BUILD_CUDA_MODULE
         CALL(float, float, float, int32_t, ::ContinuousConvCUDA)
 #else
@@ -189,7 +189,7 @@ std::vector<paddle::Tensor> ContinuousConvBackward(
     }
 
     bool dispatch_success = false;
-    if (inp_features.is_gpu()) {
+    if (inp_features.is_gpu() || inp_features.is_custom_device()) {
 #ifdef BUILD_CUDA_MODULE
         CALL(float, float, float, int32_t, CUDA)
 #else
diff --git a/cpp/open3d/ml/paddle/continuous_conv/ContinuousConvTransposeOps.cpp b/cpp/open3d/ml/paddle/continuous_conv/ContinuousConvTransposeOps.cpp
index ca438b39b..19bb030df 100644
--- a/cpp/open3d/ml/paddle/continuous_conv/ContinuousConvTransposeOps.cpp
+++ b/cpp/open3d/ml/paddle/continuous_conv/ContinuousConvTransposeOps.cpp
@@ -110,7 +110,7 @@ std::vector<paddle::Tensor> ContinuousConvTransposeForward(
         return {out_features};                             \
     }
 
-    if (inp_features.is_gpu()) {
+    if (inp_features.is_gpu() || inp_features.is_custom_device()) {
 #ifdef BUILD_CUDA_MODULE
         CALL(float, float, float, int32_t, ::ContinuousConvTransposeCUDA)
 #else
@@ -206,7 +206,7 @@ std::vector<paddle::Tensor> ContinuousConvTransposeBackward(
     }
 
     bool dispatch_success = false;
-    if (inp_features.is_gpu()) {
+    if (inp_features.is_gpu() || inp_features.is_custom_device()) {
 #ifdef BUILD_CUDA_MODULE
         CALL(float, float, float, int32_t, CUDA)
 #else
diff --git a/cpp/open3d/ml/paddle/misc/BuildSpatialHashTableOps.cpp b/cpp/open3d/ml/paddle/misc/BuildSpatialHashTableOps.cpp
index a0f49e6a4..6c212f6b4 100644
--- a/cpp/open3d/ml/paddle/misc/BuildSpatialHashTableOps.cpp
+++ b/cpp/open3d/ml/paddle/misc/BuildSpatialHashTableOps.cpp
@@ -86,7 +86,7 @@ std::vector<paddle::Tensor> BuildSpatialHashTable(
         return {hash_table_index, hash_table_cell_splits, \
                 out_hash_table_splits};                   \
     }
-    if (points.is_gpu()) {
+    if (points.is_gpu() || points.is_custom_device()) {
 #ifdef BUILD_CUDA_MODULE
         // pass to cuda function
         CALL(float, BuildSpatialHashTableCUDA)
diff --git a/cpp/open3d/ml/paddle/misc/FixedRadiusSearchOps.cpp b/cpp/open3d/ml/paddle/misc/FixedRadiusSearchOps.cpp
index 26feb76bd..944c267db 100644
--- a/cpp/open3d/ml/paddle/misc/FixedRadiusSearchOps.cpp
+++ b/cpp/open3d/ml/paddle/misc/FixedRadiusSearchOps.cpp
@@ -116,7 +116,7 @@ std::vector<paddle::Tensor> FixedRadiusSearch(
             metric, ignore_query_point, return_distances, neighbors_index, \
             neighbors_row_splits, neighbors_distance
 
-    if (points.is_gpu()) {
+    if (points.is_gpu() || points.is_custom_device()) {
 #ifdef BUILD_CUDA_MODULE
         // pass to cuda function
         if (ComparePaddleDtype<float>(point_type)) {
diff --git a/cpp/open3d/ml/paddle/misc/InvertNeighborsListOps.cpp b/cpp/open3d/ml/paddle/misc/InvertNeighborsListOps.cpp
index 2fb3967d8..a157d2f74 100644
--- a/cpp/open3d/ml/paddle/misc/InvertNeighborsListOps.cpp
+++ b/cpp/open3d/ml/paddle/misc/InvertNeighborsListOps.cpp
@@ -46,7 +46,8 @@ std::vector<paddle::Tensor> InvertNeighborsList(
 
     CHECK_SAME_DEVICE_TYPE(inp_neighbors_index, inp_neighbors_row_splits,
                            inp_neighbors_attributes);
-    if (inp_neighbors_index.is_gpu()) {
+    if (inp_neighbors_index.is_gpu() ||
+        inp_neighbors_index.is_custom_device()) {
 #ifdef BUILD_CUDA_MODULE
         // pass to cuda function
         CALL(int32_t, uint8_t, InvertNeighborsListCUDA)
diff --git a/cpp/open3d/ml/paddle/misc/KnnSearchOps.cpp b/cpp/open3d/ml/paddle/misc/KnnSearchOps.cpp
index e1d9f1432..8ae017c6d 100644
--- a/cpp/open3d/ml/paddle/misc/KnnSearchOps.cpp
+++ b/cpp/open3d/ml/paddle/misc/KnnSearchOps.cpp
@@ -83,7 +83,7 @@ std::vector<paddle::Tensor> KnnSearch(paddle::Tensor& points,
             ignore_query_point, return_distances, neighbors_index,     \
             neighbors_row_splits, neighbors_distance
 
-    if (points.is_gpu()) {
+    if (points.is_gpu() || points.is_custom_device()) {
         PD_CHECK(false, "KnnSearch does not support CUDA");
     } else {
         if (ComparePaddleDtype<float>(point_type)) {
diff --git a/cpp/open3d/ml/paddle/misc/NmsOps.cpp b/cpp/open3d/ml/paddle/misc/NmsOps.cpp
index 0a160515b..50839ffd2 100644
--- a/cpp/open3d/ml/paddle/misc/NmsOps.cpp
+++ b/cpp/open3d/ml/paddle/misc/NmsOps.cpp
@@ -18,7 +18,7 @@ std::vector<paddle::Tensor> Nms(paddle::Tensor& boxes,
     CHECK_TYPE(scores, phi::DataType::FLOAT32);
 
     std::vector<int64_t> keep_indices_blob;
-    if (boxes.is_gpu()) {
+    if (boxes.is_gpu() || boxes.is_custom_device()) {
 #ifdef BUILD_CUDA_MODULE
         keep_indices_blob = open3d::ml::contrib::NmsCUDAKernel(
                 boxes.data<float>(), scores.data<float>(), boxes.shape()[0],
diff --git a/cpp/open3d/ml/paddle/misc/RadiusSearchOps.cpp b/cpp/open3d/ml/paddle/misc/RadiusSearchOps.cpp
index 4f4805742..6fbff5566 100644
--- a/cpp/open3d/ml/paddle/misc/RadiusSearchOps.cpp
+++ b/cpp/open3d/ml/paddle/misc/RadiusSearchOps.cpp
@@ -85,7 +85,7 @@ std::vector<paddle::Tensor> MultiRadiusSearch(
             ignore_query_point, return_distances, normalize_distances,     \
             neighbors_index, neighbors_row_splits, neighbors_distance
 
-    if (points.is_gpu()) {
+    if (points.is_gpu() || points.is_custom_device()) {
         PD_CHECK(false, "MultiRadiusSearch does not support CUDA");
     } else {
         if (ComparePaddleDtype<float>(point_type)) {
diff --git a/cpp/open3d/ml/paddle/misc/RaggedToDenseOps.cpp b/cpp/open3d/ml/paddle/misc/RaggedToDenseOps.cpp
index 60ea9885d..8c8e296e8 100644
--- a/cpp/open3d/ml/paddle/misc/RaggedToDenseOps.cpp
+++ b/cpp/open3d/ml/paddle/misc/RaggedToDenseOps.cpp
@@ -59,7 +59,7 @@ std::vector<paddle::Tensor> RaggedToDense(paddle::Tensor& values,
         return {fn<value_t>(values, row_splits, out_col_size, default_value)}; \
     }
 
-    if (values.is_gpu()) {
+    if (values.is_gpu() || values.is_custom_device()) {
 #ifdef BUILD_CUDA_MODULE
         // pass to cuda function
         CALL(uint8_t, RaggedToDenseCUDA)
diff --git a/cpp/open3d/ml/paddle/misc/ReduceSubarraysSumOps.cpp b/cpp/open3d/ml/paddle/misc/ReduceSubarraysSumOps.cpp
index c55db9217..917e0adfa 100644
--- a/cpp/open3d/ml/paddle/misc/ReduceSubarraysSumOps.cpp
+++ b/cpp/open3d/ml/paddle/misc/ReduceSubarraysSumOps.cpp
@@ -33,7 +33,7 @@ std::vector<paddle::Tensor> ReduceSubarraysSum(paddle::Tensor& values,
 
     CHECK_SAME_DEVICE_TYPE(values, row_splits);
 
-    if (values.is_gpu()) {
+    if (values.is_gpu() || values.is_custom_device()) {
 #ifdef BUILD_CUDA_MODULE
         // pass to cuda function
         CALL(int32_t, ReduceSubarraysSumCUDA)
diff --git a/cpp/open3d/ml/paddle/misc/VoxelPoolingOps.cpp b/cpp/open3d/ml/paddle/misc/VoxelPoolingOps.cpp
index 670b5a231..724979acb 100644
--- a/cpp/open3d/ml/paddle/misc/VoxelPoolingOps.cpp
+++ b/cpp/open3d/ml/paddle/misc/VoxelPoolingOps.cpp
@@ -92,7 +92,7 @@ std::vector<paddle::Tensor> VoxelPoolingForward(
     }
 
     CHECK_SAME_DEVICE_TYPE(positions, features);
-    if (positions.is_gpu()) {
+    if (positions.is_gpu() || positions.is_custom_device()) {
         PD_CHECK(false, "VoxelPooling does not support CUDA");
     } else {
         CALL(float, float, VoxelPoolingCPU)
@@ -170,7 +170,7 @@ std::vector<paddle::Tensor> VoxelPoolingBackward(
     }
 
     CHECK_SAME_DEVICE_TYPE(positions, features);
-    if (positions.is_gpu()) {
+    if (positions.is_gpu() || positions.is_custom_device()) {
         PD_CHECK(false, "VoxelPooling backward does not support CUDA");
     } else {
         CALL(float, float, VoxelPoolingGradCPU)
diff --git a/cpp/open3d/ml/paddle/misc/VoxelizeOps.cpp b/cpp/open3d/ml/paddle/misc/VoxelizeOps.cpp
index 100d829be..a5dcabd2c 100644
--- a/cpp/open3d/ml/paddle/misc/VoxelizeOps.cpp
+++ b/cpp/open3d/ml/paddle/misc/VoxelizeOps.cpp
@@ -58,7 +58,7 @@ std::vector<paddle::Tensor> Voxelize(paddle::Tensor& points,
                 voxel_batch_splits};                                           \
     }
 
-    if (points.is_gpu()) {
+    if (points.is_gpu() || points.is_custom_device()) {
 #ifdef BUILD_CUDA_MODULE
         // pass to cuda function
         CALL(float, VoxelizeCUDA)
diff --git a/cpp/open3d/ml/paddle/sparse_conv/SparseConvOps.cpp b/cpp/open3d/ml/paddle/sparse_conv/SparseConvOps.cpp
index 55d01bc87..450ecaae7 100644
--- a/cpp/open3d/ml/paddle/sparse_conv/SparseConvOps.cpp
+++ b/cpp/open3d/ml/paddle/sparse_conv/SparseConvOps.cpp
@@ -74,7 +74,7 @@ std::vector<paddle::Tensor> SparseConvForward(
         return {out_features};                                     \
     }
 
-    if (inp_features.is_gpu()) {
+    if (inp_features.is_gpu() || inp_features.is_custom_device()) {
 #ifdef BUILD_CUDA_MODULE
         CALL(float, float, int32_t, uint8_t, ::SparseConvCUDA)
 #else
@@ -166,7 +166,7 @@ std::vector<paddle::Tensor> SparseConvBackward(
     }
 
     bool dispatch_success = false;
-    if (inp_features.is_gpu()) {
+    if (inp_features.is_gpu() || inp_features.is_custom_device()) {
 #ifdef BUILD_CUDA_MODULE
         CALL(float, float, int32_t, uint8_t, CUDA)
 #else
diff --git a/cpp/open3d/ml/paddle/sparse_conv/SparseConvTransposeOps.cpp b/cpp/open3d/ml/paddle/sparse_conv/SparseConvTransposeOps.cpp
index 490b7b9a7..ae6345f89 100644
--- a/cpp/open3d/ml/paddle/sparse_conv/SparseConvTransposeOps.cpp
+++ b/cpp/open3d/ml/paddle/sparse_conv/SparseConvTransposeOps.cpp
@@ -87,7 +87,7 @@ std::vector<paddle::Tensor> SparseConvTransposeForward(
         return {out_features};                                     \
     }
 
-    if (inp_features.is_gpu()) {
+    if (inp_features.is_gpu() || inp_features.is_custom_device()) {
 #ifdef BUILD_CUDA_MODULE
         CALL(float, float, int32_t, uint8_t, ::SparseConvTransposeCUDA)
 #else
@@ -176,7 +176,7 @@ std::vector<paddle::Tensor> SparseConvTransposeBackward(
     }
 
     bool dispatch_success = false;
-    if (inp_features.is_gpu()) {
+    if (inp_features.is_gpu() || inp_features.is_custom_device()) {
 #ifdef BUILD_CUDA_MODULE
         CALL(float, float, int32_t, uint8_t, CUDA)
 #else