From 8903e6d0655d42be5b4053b7739608cceb676018 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Mon, 1 Jun 2026 11:07:55 +0100 Subject: [PATCH] fix(rocm): allow chunked model copy without OOM When DS4_CUDA_COPY_MODEL_CHUNKED is set, skip cudaHostRegister. Registering a large memory map prevents posix_madvise(DONTNEED) from freeing pages during the chunked copy, leading to catastrophic system RAM exhaustion on APUs with unified memory. --- ds4_cuda.cu | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ds4_cuda.cu b/ds4_cuda.cu index 8b6241ca3..5db8b15b2 100644 --- a/ds4_cuda.cu +++ b/ds4_cuda.cu @@ -1430,6 +1430,11 @@ extern "C" int ds4_gpu_set_model_map(const void *model_map, uint64_t model_size) } } + const char *chunked_env = getenv("DS4_CUDA_COPY_MODEL_CHUNKED"); + if (chunked_env && chunked_env[0]) { + return 1; + } + cudaError_t err = cudaHostRegister((void *)model_map, (size_t)model_size, cudaHostRegisterMapped | cudaHostRegisterReadOnly); if (err == cudaSuccess) {