diff --git a/torchvision/csrc/ops/cuda/deform_conv2d_kernel.cu b/torchvision/csrc/ops/cuda/deform_conv2d_kernel.cu index ae496b37d48..155064670a3 100644 --- a/torchvision/csrc/ops/cuda/deform_conv2d_kernel.cu +++ b/torchvision/csrc/ops/cuda/deform_conv2d_kernel.cu @@ -247,10 +247,11 @@ void deformable_im2col( out_w > std::numeric_limits::max()); + const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); if (use_64bits_indexing) { AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "deformable_im2col", ([&] { - deformable_im2col_kernel<<>>( + deformable_im2col_kernel<<>>( num_kernels, input.data_ptr(), data_offset.data_ptr(), @@ -277,7 +278,7 @@ void deformable_im2col( } else { AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "deformable_im2col", ([&] { - deformable_im2col_kernel<<>>( + deformable_im2col_kernel<<>>( num_kernels, input.data_ptr(), data_offset.data_ptr(), @@ -436,10 +437,11 @@ void compute_grad_input( at::globalContext().alertNotDeterministic("compute_grad_input"); + const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); if (use_64bits_indexing) { AT_DISPATCH_FLOATING_TYPES_AND_HALF( columns.scalar_type(), "compute_grad_input", ([&] { - deformable_col2im_kernel<<>>( + deformable_col2im_kernel<<>>( num_kernels, columns.data_ptr(), offset.data_ptr(), @@ -465,7 +467,7 @@ void compute_grad_input( } else { AT_DISPATCH_FLOATING_TYPES_AND_HALF( columns.scalar_type(), "compute_grad_input", ([&] { - deformable_col2im_kernel<<>>( + deformable_col2im_kernel<<>>( num_kernels, columns.data_ptr(), offset.data_ptr(), @@ -678,11 +680,12 @@ void compute_grad_offset_and_mask( ((int64_t)channels * weight_h * weight_w * parallel_imgs * out_h * out_w > std::numeric_limits::max()); + const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); if (use_64bits_indexing) { AT_DISPATCH_FLOATING_TYPES_AND_HALF( columns.scalar_type(), "compute_grad_offset_and_mask", ([&] { deformable_col2im_coord_kernel - <<>>( + <<>>( num_kernels, columns.data_ptr(), input.data_ptr(), @@ -711,7 +714,7 @@ void compute_grad_offset_and_mask( } else { AT_DISPATCH_FLOATING_TYPES_AND_HALF( columns.scalar_type(), "compute_grad_offset_and_mask", ([&] { - deformable_col2im_coord_kernel<<>>( + deformable_col2im_coord_kernel<<>>( num_kernels, columns.data_ptr(), input.data_ptr(),