diff --git a/.github/workflows/style.yaml b/.github/workflows/style.yaml index 87e39337..d346da5d 100644 --- a/.github/workflows/style.yaml +++ b/.github/workflows/style.yaml @@ -6,11 +6,12 @@ on: pull_request: jobs: - cpplint: + lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@master - - uses: reviewdog/action-cpplint@master + - uses: actions/checkout@v3 + - name: Run clang-format style check for C/C++ programs. + uses: jidicula/clang-format-action@v4.18.0 with: - github_token: ${{ secrets.github_token }} - args: --linelength=120 + clang-format-version: '18' + check-path: 'src' diff --git a/src/.clang-format b/src/.clang-format new file mode 100644 index 00000000..4ca6bcb7 --- /dev/null +++ b/src/.clang-format @@ -0,0 +1,3 @@ +BasedOnStyle: Google +IndentWidth: 4 +ColumnLimit: 100 diff --git a/src/allocator/allocator.c b/src/allocator/allocator.c old mode 100755 new mode 100644 index b990129f..0ce5eb87 --- a/src/allocator/allocator.c +++ b/src/allocator/allocator.c @@ -1,44 +1,42 @@ #include "allocator.h" -#include "include/log_utils.h" + #include "include/libcuda_hook.h" +#include "include/log_utils.h" #include "multiprocess/multiprocess_memory_limit.h" - size_t BITSIZE = 512; size_t IPCSIZE = 2097152; size_t OVERSIZE = 134217728; -//int pidfound; +// int pidfound; region_list *r_list; allocated_list *device_overallocated; allocated_list *device_allocasync; -#define ALIGN 2097152 +#define ALIGN 2097152 #define MULTI_PARAM 1 -#define CHUNK_SIZE (OVERSIZE/BITSIZE) -#define __CHUNK_SIZE__ CHUNK_SIZE +#define CHUNK_SIZE (OVERSIZE / BITSIZE) +#define __CHUNK_SIZE__ CHUNK_SIZE extern size_t initial_offset; -extern CUresult - cuMemoryAllocate(CUdeviceptr* dptr, size_t bytesize, void* data); +extern CUresult cuMemoryAllocate(CUdeviceptr *dptr, size_t bytesize, void *data); extern CUresult cuMemoryFree(CUdeviceptr dptr); pthread_once_t allocator_allocate_flag = PTHREAD_ONCE_INIT; pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; size_t round_up(size_t size, size_t unit) { - if (size & (unit-1)) - return ((size / unit) + 1 ) * unit; + if (size & (unit - 1)) return ((size / unit) + 1) * unit; return size; } int oom_check(const int dev, size_t addon) { CUdevice d; - if (dev==-1) + if (dev == -1) cuCtxGetDevice(&d); else - d=dev; + d = dev; uint64_t limit = get_current_device_memory_limit(d); size_t _usage = get_gpu_memory_usage(d); @@ -47,12 +45,11 @@ int oom_check(const int dev, size_t addon) { } size_t new_allocated = _usage + addon; - LOG_INFO("_usage=%lu limit=%lu new_allocated=%lu",_usage,limit,new_allocated); + LOG_INFO("_usage=%lu limit=%lu new_allocated=%lu", _usage, limit, new_allocated); if (new_allocated > limit) { LOG_ERROR("Device %d OOM %lu / %lu", d, new_allocated, limit); - if (clear_proc_slot_nolock(1) > 0) - return oom_check(dev,addon); + if (clear_proc_slot_nolock(1) > 0) return oom_check(dev, addon); return 1; } return 0; @@ -61,27 +58,27 @@ int oom_check(const int dev, size_t addon) { CUresult view_vgpu_allocator() { allocated_list_entry *al; size_t total; - total=0; + total = 0; LOG_INFO("[view1]:overallocated:"); - for (al=device_overallocated->head;al!=NULL;al=al->next){ - LOG_INFO("(%p %lu)\t",(void *)al->entry->address,al->entry->length); - total+=al->entry->length; + for (al = device_overallocated->head; al != NULL; al = al->next) { + LOG_INFO("(%p %lu)\t", (void *)al->entry->address, al->entry->length); + total += al->entry->length; } - LOG_INFO("total=%lu",total); + LOG_INFO("total=%lu", total); size_t t = get_current_device_memory_usage(0); - LOG_INFO("current_device_memory_usage:%lu",t); + LOG_INFO("current_device_memory_usage:%lu", t); return 0; } CUresult get_listsize(allocated_list *al, size_t *size) { - if (al->length == 0){ + if (al->length == 0) { *size = 0; return CUDA_SUCCESS; } - size_t count=0; + size_t count = 0; allocated_list_entry *val; - for (val=al->head;val!=NULL;val=val->next){ - count+=val->entry->length; + for (val = al->head; val != NULL; val = val->next) { + count += val->entry->length; } *size = count; return CUDA_SUCCESS; @@ -92,10 +89,10 @@ void allocator_init() { device_overallocated = malloc(sizeof(allocated_list)); LIST_INIT(device_overallocated); - device_allocasync=malloc(sizeof(allocated_list)); + device_allocasync = malloc(sizeof(allocated_list)); LIST_INIT(device_allocasync); - pthread_mutex_init(&mutex,NULL); + pthread_mutex_init(&mutex, NULL); } int add_chunk(CUdeviceptr *address, size_t size) { @@ -105,8 +102,7 @@ int add_chunk(CUdeviceptr *address, size_t size) { cuCtxGetDevice(&dev); /* OOM pre-check without lock */ - if (oom_check(dev, size)) - return CUDA_ERROR_OUT_OF_MEMORY; + if (oom_check(dev, size)) return CUDA_ERROR_OUT_OF_MEMORY; /* GPU allocation outside lock — the expensive part */ if (size <= IPCSIZE) { @@ -141,17 +137,17 @@ int add_chunk(CUdeviceptr *address, size_t size) { int add_chunk_only(CUdeviceptr address, size_t size, CUdevice dev) { pthread_mutex_lock(&mutex); - size_t addr=0; + size_t addr = 0; size_t allocsize; - if (oom_check(dev,size)){ + if (oom_check(dev, size)) { pthread_mutex_unlock(&mutex); return -1; } allocated_list_entry *e; INIT_ALLOCATED_LIST_ENTRY(e, addr, size, dev); - LIST_ADD(device_overallocated,e); - //uint64_t t_size; - e->entry->address=address; + LIST_ADD(device_overallocated, e); + // uint64_t t_size; + e->entry->address = address; allocsize = size; add_gpu_device_memory_usage(getpid(), dev, allocsize, 2); pthread_mutex_unlock(&mutex); @@ -161,8 +157,9 @@ int add_chunk_only(CUdeviceptr address, size_t size, CUdevice dev) { int check_memory_type(CUdeviceptr address) { allocated_list_entry *cursor; cursor = device_overallocated->head; - for (cursor=device_overallocated->head;cursor!=NULL;cursor=cursor->next){ - if ((cursor->entry->address <= address) && (cursor->entry->address+cursor->entry->length>=address)) + for (cursor = device_overallocated->head; cursor != NULL; cursor = cursor->next) { + if ((cursor->entry->address <= address) && + (cursor->entry->address + cursor->entry->length >= address)) return CU_MEMORYTYPE_DEVICE; } return CU_MEMORYTYPE_HOST; @@ -218,16 +215,11 @@ int remove_chunk_only(CUdeviceptr dptr) { return -1; } -int allocate_raw(CUdeviceptr *dptr, size_t size) { - return add_chunk(dptr, size); -} +int allocate_raw(CUdeviceptr *dptr, size_t size) { return add_chunk(dptr, size); } -int free_raw(CUdeviceptr dptr) { - return remove_chunk(device_overallocated, dptr); -} +int free_raw(CUdeviceptr dptr) { return remove_chunk(device_overallocated, dptr); } -int remove_chunk_async( - allocated_list *a_list, CUdeviceptr dptr, CUstream hStream) { +int remove_chunk_async(allocated_list *a_list, CUdeviceptr dptr, CUstream hStream) { size_t t_size; if (a_list->length == 0) { return -1; @@ -235,13 +227,13 @@ int remove_chunk_async( allocated_list_entry *val; for (val = a_list->head; val != NULL; val = val->next) { if (val->entry->address == dptr) { - t_size=val->entry->length; - CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemFreeAsync,dptr,hStream); - LIST_REMOVE(a_list,val); - a_list->limit-=t_size; + t_size = val->entry->length; + CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemFreeAsync, dptr, hStream); + LIST_REMOVE(a_list, val); + a_list->limit -= t_size; CUdevice dev; cuCtxGetDevice(&dev); - rm_gpu_device_memory_usage(getpid(),dev,t_size,2); + rm_gpu_device_memory_usage(getpid(), dev, t_size, 2); return 0; } } @@ -256,53 +248,56 @@ int free_raw_async(CUdeviceptr dptr, CUstream hStream) { } int add_chunk_async(CUdeviceptr *address, size_t size, CUstream hStream) { - size_t addr=0; + size_t addr = 0; size_t allocsize; CUresult res = CUDA_SUCCESS; CUdevice dev; cuCtxGetDevice(&dev); - if (oom_check(dev,size)) - return -1; + if (oom_check(dev, size)) return -1; allocated_list_entry *e; INIT_ALLOCATED_LIST_ENTRY(e, addr, size, dev); - res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemAllocAsync,&e->entry->address,size,hStream); + res = + CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocAsync, &e->entry->address, size, hStream); if (res != CUDA_SUCCESS) { - LOG_ERROR("cuMemoryAllocate failed res=%d",res); + LOG_ERROR("cuMemoryAllocate failed res=%d", res); return res; } *address = e->entry->address; CUmemoryPool pool; - res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceGetMemPool,&pool,dev); + res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetMemPool, &pool, dev); if (res != CUDA_SUCCESS) { - LOG_ERROR("cuDeviceGetMemPool failed res=%d",res); + LOG_ERROR("cuDeviceGetMemPool failed res=%d", res); return res; } size_t poollimit; - res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolGetAttribute,pool,CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH,&poollimit); + res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolGetAttribute, pool, + CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH, &poollimit); if (res != CUDA_SUCCESS) { - LOG_ERROR("cuMemPoolGetAttribute failed res=%d",res); + LOG_ERROR("cuMemPoolGetAttribute failed res=%d", res); return res; } if (poollimit != 0) { - if (poollimit> device_allocasync->limit) { - allocsize = (poollimit-device_allocasync->limit < size)? poollimit-device_allocasync->limit : size; + if (poollimit > device_allocasync->limit) { + allocsize = (poollimit - device_allocasync->limit < size) + ? poollimit - device_allocasync->limit + : size; cuCtxGetDevice(&dev); add_gpu_device_memory_usage(getpid(), dev, allocsize, 2); - device_allocasync->limit=device_allocasync->limit+allocsize; - e->entry->length=allocsize; - }else{ - e->entry->length=0; + device_allocasync->limit = device_allocasync->limit + allocsize; + e->entry->length = allocsize; + } else { + e->entry->length = 0; } } - LIST_ADD(device_allocasync,e); + LIST_ADD(device_allocasync, e); return 0; } int allocate_async_raw(CUdeviceptr *dptr, size_t size, CUstream hStream) { int tmp; pthread_mutex_lock(&mutex); - tmp = add_chunk_async(dptr,size,hStream); + tmp = add_chunk_async(dptr, size, hStream); pthread_mutex_unlock(&mutex); return tmp; } diff --git a/src/allocator/allocator.h b/src/allocator/allocator.h old mode 100755 new mode 100644 index 38933287..518a4022 --- a/src/allocator/allocator.h +++ b/src/allocator/allocator.h @@ -1,16 +1,16 @@ -#include -#include #include -#include +#include #include +#include #include +#include #include -#include +#include #define CUMALLOC 0 #define CUCREATE 1 -struct allocated_device_memory_struct{ +struct allocated_device_memory_struct { CUdeviceptr address; size_t length; CUcontext ctx; @@ -19,13 +19,13 @@ struct allocated_device_memory_struct{ }; typedef struct allocated_device_memory_struct allocated_device_memory; -struct allocated_list_entry_struct{ +struct allocated_list_entry_struct { allocated_device_memory *entry; - struct allocated_list_entry_struct *next,*prev; + struct allocated_list_entry_struct *next, *prev; }; typedef struct allocated_list_entry_struct allocated_list_entry; -struct allocated_list_struct{ +struct allocated_list_struct { allocated_list_entry *head; allocated_list_entry *tail; size_t length; @@ -33,7 +33,7 @@ struct allocated_list_struct{ }; typedef struct allocated_list_struct allocated_list; -struct region_struct{ +struct region_struct { size_t start; size_t freemark; size_t freed_map; @@ -45,15 +45,15 @@ struct region_struct{ }; typedef struct region_struct region; -struct region_list_entry_struct{ +struct region_list_entry_struct { region *entry; - struct region_list_entry_struct *next,*prev; + struct region_list_entry_struct *next, *prev; }; typedef struct region_list_entry_struct region_list_entry; -struct region_list_struct{ - region_list_entry *head; - region_list_entry *tail; +struct region_list_struct { + region_list_entry *head; + region_list_entry *tail; size_t length; }; typedef struct region_list_struct region_list; @@ -63,90 +63,90 @@ extern allocated_list *device_overallocated; extern allocated_list *device_allocasync; extern pthread_mutex_t mutex; -#define LIST_INIT(list) { \ - list->head=NULL; \ - list->tail=NULL; \ - list->length=0; \ - list->limit=0; \ +#define LIST_INIT(list) \ + { \ + list->head = NULL; \ + list->tail = NULL; \ + list->length = 0; \ + list->limit = 0; \ } #define __LIST_INIT(list) LIST_INIT(list) -#define QUIT_WITH_ERROR(__message) { \ - LOG_ERROR("%s\n",#__message); \ - return -1; \ -} - -#define LIST_REMOVE(list,val) { \ - if (val->prev!=NULL) \ - val->prev->next=val->next; \ - if (val->next!=NULL) \ - val->next->prev=val->prev; \ - if (val == list->tail) \ - list->tail = val->prev; \ - if (val == list->head) \ - list->head = val->next; \ - free(val->entry->allocHandle); \ - free(val->entry); \ - free(val); \ - list->length--; \ -} - -#define INIT_ALLOCATED_LIST_ENTRY(__list_entry, __address, __size, __dev) { \ - CUcontext __ctx; \ - CUresult __res=cuCtxGetCurrent(&__ctx); \ - if (__res!=CUDA_SUCCESS) QUIT_WITH_ERROR("cuCtxGetCurrent failed"); \ - __list_entry = malloc(sizeof(allocated_list_entry)); \ - if (__list_entry == NULL) QUIT_WITH_ERROR("malloc failed"); \ - __list_entry->entry = malloc(sizeof(allocated_device_memory)); \ - if (__list_entry->entry == NULL) QUIT_WITH_ERROR("malloc failed"); \ - __list_entry->entry->address=__address; \ - __list_entry->entry->length=__size; \ - __list_entry->entry->dev = __dev; \ - __list_entry->entry->allocHandle=malloc(sizeof(CUmemGenericAllocationHandle)); \ - __list_entry->entry->ctx=__ctx; \ - __list_entry->next=NULL; \ - __list_entry->prev=NULL; \ -} - -#define INIT_REGION_LIST_ENTRY(__list_entry,__address,__size) \ - do{ \ - CUcontext __ctx; \ - CUresult __res; \ - __res = cuCtxGetCurrent(&__ctx); \ - if (__res!=CUDA_SUCCESS) QUIT_WITH_ERROR("cuCtxGetCurrent failed"); \ - __list_entry = malloc(sizeof(region_list_entry)); \ - if (__list_entry == NULL) QUIT_WITH_ERROR("malloc failed"); \ - __list_entry->entry = malloc(sizeof(region)); \ - if (__list_entry->entry == NULL) QUIT_WITH_ERROR("malloc failed") \ - __list_entry->entry->region_allocs = malloc(sizeof(allocated_list)); \ - if (__list_entry->entry->region_allocs == NULL) QUIT_WITH_ERROR("malloc failed") \ - __list_entry->entry->start=__address; \ - __list_entry->entry->freed_map=__CHUNK_SIZE__; \ - __list_entry->entry->freemark=0; \ - __list_entry->entry->length=0; \ - __list_entry->entry->ctx=__ctx; \ - __list_entry->entry->allocHandle=malloc(sizeof(CUmemGenericAllocationHandle)); \ - __list_entry->entry->bitmap=malloc(__CHUNK_SIZE__); \ - memset(__list_entry->entry->bitmap,0,__CHUNK_SIZE__); \ - __LIST_INIT(__list_entry->entry->region_allocs); \ - region_fill(__list_entry->entry,0,__size); \ - __list_entry->next=NULL; \ - __list_entry->prev=NULL; \ - }while(0); - -#define LIST_ADD(list,__entry) { \ - if (list->head == NULL) { \ - list->head = __entry; \ - list->tail = __entry; \ - list->length=1; \ - }else{ \ - __entry->prev = list->tail; \ - list->tail->next=__entry; \ - list->tail = __entry; \ - list->length++; \ - } \ -} +#define QUIT_WITH_ERROR(__message) \ + { \ + LOG_ERROR("%s\n", #__message); \ + return -1; \ + } + +#define LIST_REMOVE(list, val) \ + { \ + if (val->prev != NULL) val->prev->next = val->next; \ + if (val->next != NULL) val->next->prev = val->prev; \ + if (val == list->tail) list->tail = val->prev; \ + if (val == list->head) list->head = val->next; \ + free(val->entry->allocHandle); \ + free(val->entry); \ + free(val); \ + list->length--; \ + } + +#define INIT_ALLOCATED_LIST_ENTRY(__list_entry, __address, __size, __dev) \ + { \ + CUcontext __ctx; \ + CUresult __res = cuCtxGetCurrent(&__ctx); \ + if (__res != CUDA_SUCCESS) QUIT_WITH_ERROR("cuCtxGetCurrent failed"); \ + __list_entry = malloc(sizeof(allocated_list_entry)); \ + if (__list_entry == NULL) QUIT_WITH_ERROR("malloc failed"); \ + __list_entry->entry = malloc(sizeof(allocated_device_memory)); \ + if (__list_entry->entry == NULL) QUIT_WITH_ERROR("malloc failed"); \ + __list_entry->entry->address = __address; \ + __list_entry->entry->length = __size; \ + __list_entry->entry->dev = __dev; \ + __list_entry->entry->allocHandle = malloc(sizeof(CUmemGenericAllocationHandle)); \ + __list_entry->entry->ctx = __ctx; \ + __list_entry->next = NULL; \ + __list_entry->prev = NULL; \ + } +#define INIT_REGION_LIST_ENTRY(__list_entry, __address, __size) \ + do { \ + CUcontext __ctx; \ + CUresult __res; \ + __res = cuCtxGetCurrent(&__ctx); \ + if (__res != CUDA_SUCCESS) QUIT_WITH_ERROR("cuCtxGetCurrent failed"); \ + __list_entry = malloc(sizeof(region_list_entry)); \ + if (__list_entry == NULL) QUIT_WITH_ERROR("malloc failed"); \ + __list_entry->entry = malloc(sizeof(region)); \ + if (__list_entry->entry == NULL) QUIT_WITH_ERROR("malloc failed") \ + __list_entry->entry->region_allocs = malloc(sizeof(allocated_list)); \ + if (__list_entry->entry->region_allocs == NULL) QUIT_WITH_ERROR("malloc failed") \ + __list_entry->entry->start = __address; \ + __list_entry->entry->freed_map = __CHUNK_SIZE__; \ + __list_entry->entry->freemark = 0; \ + __list_entry->entry->length = 0; \ + __list_entry->entry->ctx = __ctx; \ + __list_entry->entry->allocHandle = malloc(sizeof(CUmemGenericAllocationHandle)); \ + __list_entry->entry->bitmap = malloc(__CHUNK_SIZE__); \ + memset(__list_entry->entry->bitmap, 0, __CHUNK_SIZE__); \ + __LIST_INIT(__list_entry->entry->region_allocs); \ + region_fill(__list_entry->entry, 0, __size); \ + __list_entry->next = NULL; \ + __list_entry->prev = NULL; \ + } while (0); + +#define LIST_ADD(list, __entry) \ + { \ + if (list->head == NULL) { \ + list->head = __entry; \ + list->tail = __entry; \ + list->length = 1; \ + } else { \ + __entry->prev = list->tail; \ + list->tail->next = __entry; \ + list->tail = __entry; \ + list->length++; \ + } \ + } int getallochandle(CUmemGenericAllocationHandle *handle, size_t size, size_t *allocsize); @@ -154,7 +154,7 @@ int getallochandle(CUmemGenericAllocationHandle *handle, size_t size, size_t *al CUresult view_vgpu_allocator(); // Checks if oom -int oom_check(const int dev,size_t addon); +int oom_check(const int dev, size_t addon); // Allocate and free device memory int allocate_raw(CUdeviceptr *dptr, size_t size); @@ -166,4 +166,3 @@ int free_raw_async(CUdeviceptr dptr, CUstream hStream); // Checks memory type int check_memory_type(CUdeviceptr address); - diff --git a/src/cuda/context.c b/src/cuda/context.c old mode 100755 new mode 100644 index 3838970e..9e33fc13 --- a/src/cuda/context.c +++ b/src/cuda/context.c @@ -4,149 +4,154 @@ extern size_t context_size; extern int ctx_activate[16]; - -CUresult cuDevicePrimaryCtxGetState( CUdevice dev, unsigned int* flags, int* active ){ - LOG_DEBUG("into cuDevicePrimaryCtxGetState dev=%d",dev); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDevicePrimaryCtxGetState,dev,flags,active); +CUresult cuDevicePrimaryCtxGetState(CUdevice dev, unsigned int* flags, int* active) { + LOG_DEBUG("into cuDevicePrimaryCtxGetState dev=%d", dev); + CUresult res = + CUDA_OVERRIDE_CALL(cuda_library_entry, cuDevicePrimaryCtxGetState, dev, flags, active); return res; } -CUresult cuDevicePrimaryCtxRetain(CUcontext *pctx, CUdevice dev){ - LOG_INFO("dev=%d context_size=%ld",dev,context_size); - //for Initialization only - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDevicePrimaryCtxRetain,pctx,dev); +CUresult cuDevicePrimaryCtxRetain(CUcontext* pctx, CUdevice dev) { + LOG_INFO("dev=%d context_size=%ld", dev, context_size); + // for Initialization only + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDevicePrimaryCtxRetain, pctx, dev); if (ctx_activate[dev] == 0) { - add_gpu_device_memory_usage(getpid(),dev,context_size,0); + add_gpu_device_memory_usage(getpid(), dev, context_size, 0); } - if (context_size>0) { + if (context_size > 0) { ctx_activate[dev] = 1; } return res; } - -CUresult cuDevicePrimaryCtxSetFlags_v2( CUdevice dev, unsigned int flags ){ - LOG_DEBUG("into cuDevicePrimaryCtxSetFlags dev=%d flags=%d",dev,flags); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuDevicePrimaryCtxSetFlags_v2,dev,flags); +CUresult cuDevicePrimaryCtxSetFlags_v2(CUdevice dev, unsigned int flags) { + LOG_DEBUG("into cuDevicePrimaryCtxSetFlags dev=%d flags=%d", dev, flags); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDevicePrimaryCtxSetFlags_v2, dev, flags); } -CUresult cuDevicePrimaryCtxRelease_v2( CUdevice dev ){ +CUresult cuDevicePrimaryCtxRelease_v2(CUdevice dev) { if (ctx_activate[dev] == 1) { - rm_gpu_device_memory_usage(getpid(),dev,context_size,0); + rm_gpu_device_memory_usage(getpid(), dev, context_size, 0); } ctx_activate[dev] = 0; - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDevicePrimaryCtxRelease_v2,dev); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDevicePrimaryCtxRelease_v2, dev); return res; } CUresult cuCtxGetDevice(CUdevice* device) { - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxGetDevice,device); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetDevice, device); return res; } #if CUDA_VERSION < 13000 -CUresult cuCtxCreate_v2 ( CUcontext* pctx, unsigned int flags, CUdevice dev ){ - LOG_DEBUG("into cuCtxCreate pctx=%p flags=%d dev=%d",pctx,flags,dev); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxCreate_v2,pctx,flags,dev); +CUresult cuCtxCreate_v2(CUcontext* pctx, unsigned int flags, CUdevice dev) { + LOG_DEBUG("into cuCtxCreate pctx=%p flags=%d dev=%d", pctx, flags, dev); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxCreate_v2, pctx, flags, dev); return res; } -CUresult cuCtxCreate_v3 ( CUcontext* pctx, CUexecAffinityParam* paramsArray, int numParams, unsigned int flags, CUdevice dev ){ - LOG_DEBUG("into cuCtxCreate_v3 pctx=%p paramsArray=%p numParams=%d flags=%d dev=%d",pctx,paramsArray,numParams,flags,dev); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxCreate_v3,pctx,paramsArray,numParams,flags,dev); +CUresult cuCtxCreate_v3(CUcontext* pctx, CUexecAffinityParam* paramsArray, int numParams, + unsigned int flags, CUdevice dev) { + LOG_DEBUG("into cuCtxCreate_v3 pctx=%p paramsArray=%p numParams=%d flags=%d dev=%d", pctx, + paramsArray, numParams, flags, dev); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxCreate_v3, pctx, paramsArray, + numParams, flags, dev); return res; } #endif -CUresult cuCtxCreate_v4(CUcontext* pctx, CUctxCreateParams* ctxCreateParams, unsigned int flags, CUdevice dev) { - LOG_DEBUG("into cuCtxCreate_v4 pctx=%p ctxCreateParams=%p flags=%d dev=%d", pctx, ctxCreateParams, flags, dev); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxCreate_v4, pctx, ctxCreateParams, flags, dev); +CUresult cuCtxCreate_v4(CUcontext* pctx, CUctxCreateParams* ctxCreateParams, unsigned int flags, + CUdevice dev) { + LOG_DEBUG("into cuCtxCreate_v4 pctx=%p ctxCreateParams=%p flags=%d dev=%d", pctx, + ctxCreateParams, flags, dev); + CUresult res = + CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxCreate_v4, pctx, ctxCreateParams, flags, dev); return res; } -CUresult cuCtxDestroy_v2 ( CUcontext ctx ){ - LOG_DEBUG("into cuCtxDestroy_v2 ctx=%p",ctx); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxDestroy_v2,ctx); +CUresult cuCtxDestroy_v2(CUcontext ctx) { + LOG_DEBUG("into cuCtxDestroy_v2 ctx=%p", ctx); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxDestroy_v2, ctx); } -CUresult cuCtxGetApiVersion ( CUcontext ctx, unsigned int* version ){ - LOG_INFO("into cuCtxGetApiVersion ctx=%p",ctx); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxGetApiVersion,ctx,version); - if (res!=CUDA_SUCCESS){ - LOG_ERROR("cuCtxGetApiVersion res=%d",res); +CUresult cuCtxGetApiVersion(CUcontext ctx, unsigned int* version) { + LOG_INFO("into cuCtxGetApiVersion ctx=%p", ctx); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetApiVersion, ctx, version); + if (res != CUDA_SUCCESS) { + LOG_ERROR("cuCtxGetApiVersion res=%d", res); } return res; } -CUresult cuCtxGetCacheConfig ( CUfunc_cache* pconfig ){ +CUresult cuCtxGetCacheConfig(CUfunc_cache* pconfig) { LOG_DEBUG("into cuCtxGetCacheConfig"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxGetCacheConfig,pconfig); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetCacheConfig, pconfig); } -CUresult cuCtxGetCurrent ( CUcontext* pctx ){ - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxGetCurrent,pctx); +CUresult cuCtxGetCurrent(CUcontext* pctx) { + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetCurrent, pctx); return res; } -CUresult cuCtxGetFlags ( unsigned int* flags ){ - LOG_DEBUG("into cuCtxGetFlags flags=%p",flags); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxGetFlags,flags); +CUresult cuCtxGetFlags(unsigned int* flags) { + LOG_DEBUG("into cuCtxGetFlags flags=%p", flags); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetFlags, flags); } -CUresult cuCtxGetLimit ( size_t* pvalue, CUlimit limit ){ - LOG_DEBUG("into cuCtxGetLimit pvalue=%p",pvalue); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxGetLimit,pvalue,limit); +CUresult cuCtxGetLimit(size_t* pvalue, CUlimit limit) { + LOG_DEBUG("into cuCtxGetLimit pvalue=%p", pvalue); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetLimit, pvalue, limit); } -CUresult cuCtxGetSharedMemConfig ( CUsharedconfig* pConfig ){ - LOG_DEBUG("cuCtxGetSharedMemConfig pConfig=%p",pConfig); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxGetSharedMemConfig,pConfig); +CUresult cuCtxGetSharedMemConfig(CUsharedconfig* pConfig) { + LOG_DEBUG("cuCtxGetSharedMemConfig pConfig=%p", pConfig); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetSharedMemConfig, pConfig); } -CUresult cuCtxGetStreamPriorityRange ( int* leastPriority, int* greatestPriority ){ - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxGetStreamPriorityRange,leastPriority,greatestPriority); - if (res!=CUDA_SUCCESS){ - LOG_ERROR("cuCtxGetStreamPriorityRange err=%d",res); +CUresult cuCtxGetStreamPriorityRange(int* leastPriority, int* greatestPriority) { + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetStreamPriorityRange, + leastPriority, greatestPriority); + if (res != CUDA_SUCCESS) { + LOG_ERROR("cuCtxGetStreamPriorityRange err=%d", res); } return res; } -CUresult cuCtxPopCurrent_v2 ( CUcontext* pctx ){ - LOG_INFO("cuCtxPopCurrent pctx=%p",pctx); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxPopCurrent_v2,pctx); +CUresult cuCtxPopCurrent_v2(CUcontext* pctx) { + LOG_INFO("cuCtxPopCurrent pctx=%p", pctx); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxPopCurrent_v2, pctx); } -CUresult cuCtxPushCurrent_v2 ( CUcontext ctx ){ - LOG_INFO("cuCtxPushCurrent ctx=%p",ctx); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxPushCurrent_v2,ctx); +CUresult cuCtxPushCurrent_v2(CUcontext ctx) { + LOG_INFO("cuCtxPushCurrent ctx=%p", ctx); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxPushCurrent_v2, ctx); } -CUresult cuCtxSetCacheConfig ( CUfunc_cache config ){ - LOG_DEBUG("cuCtxSetCacheConfig config=%d",config); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxSetCacheConfig,config); +CUresult cuCtxSetCacheConfig(CUfunc_cache config) { + LOG_DEBUG("cuCtxSetCacheConfig config=%d", config); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxSetCacheConfig, config); } -CUresult cuCtxSetCurrent ( CUcontext ctx ){ - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxSetCurrent,ctx); - if (res!=CUDA_SUCCESS){ - LOG_ERROR("cuCtxSetCurrent111 failed res=%d ctx=%p",res,ctx); +CUresult cuCtxSetCurrent(CUcontext ctx) { + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxSetCurrent, ctx); + if (res != CUDA_SUCCESS) { + LOG_ERROR("cuCtxSetCurrent111 failed res=%d ctx=%p", res, ctx); } return res; } -CUresult cuCtxSetLimit ( CUlimit limit, size_t value ){ +CUresult cuCtxSetLimit(CUlimit limit, size_t value) { LOG_DEBUG("cuCtxSetLimit"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxSetLimit,limit,value); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxSetLimit, limit, value); } -CUresult cuCtxSetSharedMemConfig ( CUsharedconfig config ){ +CUresult cuCtxSetSharedMemConfig(CUsharedconfig config) { LOG_DEBUG("cuCtxSetSharedMemConfig"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxSetSharedMemConfig,config); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxSetSharedMemConfig, config); } -CUresult cuCtxSynchronize ( void ){ +CUresult cuCtxSynchronize(void) { LOG_DEBUG("INTO CtxSync"); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxSynchronize); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxSynchronize); return res; } - diff --git a/src/cuda/device.c b/src/cuda/device.c old mode 100755 new mode 100644 index ac1833ab..bd7efad2 --- a/src/cuda/device.c +++ b/src/cuda/device.c @@ -1,27 +1,26 @@ +#include "allocator/allocator.h" #include "include/libcuda_hook.h" -#include "multiprocess/multiprocess_memory_limit.h" -#include "include/nvml_prefix.h" #include "include/libnvml_hook.h" - -#include "allocator/allocator.h" #include "include/memory_limit.h" +#include "include/nvml_prefix.h" +#include "multiprocess/multiprocess_memory_limit.h" -CUresult CUDAAPI cuDeviceGetAttribute ( int* pi, CUdevice_attribute attrib, CUdevice dev ) { - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceGetAttribute,pi,attrib,dev); - //LOG_DEBUG("[%d]cuDeviceGetAttribute dev=%d attrib=%d %d",res,dev,(int)attrib,*pi); +CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev) { + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetAttribute, pi, attrib, dev); + // LOG_DEBUG("[%d]cuDeviceGetAttribute dev=%d attrib=%d %d",res,dev,(int)attrib,*pi); return res; } -CUresult cuDeviceGet(CUdevice *device,int ordinal){ - LOG_DEBUG("into cuDeviceGet ordinal=%d\n",ordinal); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceGet,device,ordinal); +CUresult cuDeviceGet(CUdevice *device, int ordinal) { + LOG_DEBUG("into cuDeviceGet ordinal=%d\n", ordinal); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGet, device, ordinal); return res; } -CUresult cuDeviceGetCount( int* count ) { +CUresult cuDeviceGetCount(int *count) { LOG_DEBUG("into cuDeviceGetCount"); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceGetCount,count); - LOG_DEBUG("cuDeviceGetCount res=%d count=%d",res,*count); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetCount, count); + LOG_DEBUG("cuDeviceGetCount res=%d count=%d", res, *count); return res; } @@ -31,39 +30,38 @@ CUresult cuDeviceGetName(char *name, int len, CUdevice dev) { return res; } -CUresult cuDeviceCanAccessPeer( int* canAccessPeer, CUdevice dev, CUdevice peerDev ) { - LOG_INFO("into cuDeviceCanAccessPeer %d %d",dev,peerDev); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceCanAccessPeer,canAccessPeer,dev,peerDev); +CUresult cuDeviceCanAccessPeer(int *canAccessPeer, CUdevice dev, CUdevice peerDev) { + LOG_INFO("into cuDeviceCanAccessPeer %d %d", dev, peerDev); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceCanAccessPeer, canAccessPeer, dev, + peerDev); } -CUresult cuDeviceGetP2PAttribute(int *value, CUdevice_P2PAttribute attrib, - CUdevice srcDevice, CUdevice dstDevice) { +CUresult cuDeviceGetP2PAttribute(int *value, CUdevice_P2PAttribute attrib, CUdevice srcDevice, + CUdevice dstDevice) { LOG_DEBUG("into cuDeviceGetP2PAttribute\n"); - return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetP2PAttribute, value, - attrib, srcDevice, dstDevice); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetP2PAttribute, value, attrib, srcDevice, + dstDevice); } CUresult cuDeviceGetByPCIBusId(CUdevice *dev, const char *pciBusId) { - return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetByPCIBusId, dev, - pciBusId); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetByPCIBusId, dev, pciBusId); } CUresult cuDeviceGetPCIBusId(char *pciBusId, int len, CUdevice dev) { - LOG_INFO("into cuDeviceGetPCIBusId dev=%d len=%d",dev,len); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetPCIBusId, pciBusId, len, - dev); + LOG_INFO("into cuDeviceGetPCIBusId dev=%d len=%d", dev, len); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetPCIBusId, pciBusId, len, dev); return res; } #if CUDA_VERSION < 13000 -CUresult cuDeviceGetUuid(CUuuid* uuid,CUdevice dev) { - LOG_DEBUG("into cuDeviceGetUuid dev=%d",dev); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceGetUuid,uuid,dev); +CUresult cuDeviceGetUuid(CUuuid *uuid, CUdevice dev) { + LOG_DEBUG("into cuDeviceGetUuid dev=%d", dev); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetUuid, uuid, dev); return res; } #endif -CUresult cuDeviceGetUuid_v2(CUuuid* uuid, CUdevice dev) { +CUresult cuDeviceGetUuid_v2(CUuuid *uuid, CUdevice dev) { LOG_DEBUG("into cuDeviceGetUuid_v2 dev=%d", dev); CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetUuid_v2, uuid, dev); return res; @@ -71,23 +69,20 @@ CUresult cuDeviceGetUuid_v2(CUuuid* uuid, CUdevice dev) { CUresult cuDeviceGetDefaultMemPool(CUmemoryPool *pool_out, CUdevice dev) { LOG_DEBUG("cuDeviceGetDefaultMemPool"); - return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetDefaultMemPool, - pool_out, dev); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetDefaultMemPool, pool_out, dev); } -CUresult cuDeviceGetMemPool(CUmemoryPool *pool, CUdevice dev){ +CUresult cuDeviceGetMemPool(CUmemoryPool *pool, CUdevice dev) { LOG_DEBUG("cuDeviceGetMemPool"); return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetMemPool, pool, dev); } -CUresult cuDeviceGetLuid(char *luid, unsigned int *deviceNodeMask, - CUdevice dev) { - LOG_DEBUG("cuDeviceGetLuid"); - return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetLuid, luid, - deviceNodeMask, dev); +CUresult cuDeviceGetLuid(char *luid, unsigned int *deviceNodeMask, CUdevice dev) { + LOG_DEBUG("cuDeviceGetLuid"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetLuid, luid, deviceNodeMask, dev); } -CUresult cuDeviceTotalMem_v2 ( size_t* bytes, CUdevice dev ) { +CUresult cuDeviceTotalMem_v2(size_t *bytes, CUdevice dev) { LOG_DEBUG("into cuDeviceTotalMem"); ENSURE_INITIALIZED(); size_t limit = get_current_device_memory_limit(dev); @@ -97,29 +92,32 @@ CUresult cuDeviceTotalMem_v2 ( size_t* bytes, CUdevice dev ) { CUresult cuDriverGetVersion(int *driverVersion) { LOG_DEBUG("into cuDriverGetVersion__"); - - //stub dlsym to prelaod cuda functions - dlsym(RTLD_DEFAULT,"cuDriverGetVersion"); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDriverGetVersion,driverVersion); + // stub dlsym to prelaod cuda functions + dlsym(RTLD_DEFAULT, "cuDriverGetVersion"); + + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDriverGetVersion, driverVersion); //*driverVersion=11030; - if ((res==CUDA_SUCCESS) && (driverVersion!=NULL)) { - LOG_INFO("driver version=%d",*driverVersion); + if ((res == CUDA_SUCCESS) && (driverVersion != NULL)) { + LOG_INFO("driver version=%d", *driverVersion); } return res; } -CUresult cuDeviceGetTexture1DLinearMaxWidth(size_t *maxWidthInElements, CUarray_format format, unsigned numChannels, CUdevice dev){ +CUresult cuDeviceGetTexture1DLinearMaxWidth(size_t *maxWidthInElements, CUarray_format format, + unsigned numChannels, CUdevice dev) { LOG_DEBUG("cuDeviceGetTexture1DLinearMaxWidth"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceGetTexture1DLinearMaxWidth,maxWidthInElements,format,numChannels,dev); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetTexture1DLinearMaxWidth, + maxWidthInElements, format, numChannels, dev); } CUresult cuDeviceSetMemPool(CUdevice dev, CUmemoryPool pool) { LOG_DEBUG("cuDeviceSetMemPool"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceSetMemPool,dev,pool); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceSetMemPool, dev, pool); } -CUresult cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, CUflushGPUDirectRDMAWritesScope scope) { - LOG_DEBUG("cuFlushGPUDirectRDMAWrites"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuFlushGPUDirectRDMAWrites,target,scope); +CUresult cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, + CUflushGPUDirectRDMAWritesScope scope) { + LOG_DEBUG("cuFlushGPUDirectRDMAWrites"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuFlushGPUDirectRDMAWrites, target, scope); } diff --git a/src/cuda/event.c b/src/cuda/event.c index 469c199b..9ca5648b 100644 --- a/src/cuda/event.c +++ b/src/cuda/event.c @@ -1,103 +1,113 @@ -#include "include/libcuda_hook.h" #include -CUresult cuEventCreate ( CUevent* phEvent, unsigned int Flags ){ - LOG_DEBUG("cuEventCreate Event=%p",phEvent); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuEventCreate,phEvent,Flags); +#include "include/libcuda_hook.h" + +CUresult cuEventCreate(CUevent* phEvent, unsigned int Flags) { + LOG_DEBUG("cuEventCreate Event=%p", phEvent); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuEventCreate, phEvent, Flags); } -CUresult cuEventDestroy_v2 ( CUevent hEvent ){ - LOG_DEBUG("cuEventDestroy_v2 hEvent=%p",hEvent); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuEventDestroy_v2,hEvent); +CUresult cuEventDestroy_v2(CUevent hEvent) { + LOG_DEBUG("cuEventDestroy_v2 hEvent=%p", hEvent); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuEventDestroy_v2, hEvent); } -CUresult cuModuleLoad ( CUmodule* module, const char* fname ){ - LOG_DEBUG(" cuModuleLoad fname=%s",fname); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleLoad,module,fname); +CUresult cuModuleLoad(CUmodule* module, const char* fname) { + LOG_DEBUG(" cuModuleLoad fname=%s", fname); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleLoad, module, fname); } -CUresult cuModuleLoadData( CUmodule* module, const void* image){ - LOG_DEBUG("cuModuleLoadData module=%p",module); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleLoadData,module,image); +CUresult cuModuleLoadData(CUmodule* module, const void* image) { + LOG_DEBUG("cuModuleLoadData module=%p", module); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleLoadData, module, image); } -CUresult cuModuleLoadDataEx ( CUmodule* module, const void* image, unsigned int numOptions, CUjit_option* options, void** optionValues ){ - LOG_DEBUG("cuModuleLoadDataEx module=%p",module); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleLoadDataEx,module,image,numOptions,options,optionValues); +CUresult cuModuleLoadDataEx(CUmodule* module, const void* image, unsigned int numOptions, + CUjit_option* options, void** optionValues) { + LOG_DEBUG("cuModuleLoadDataEx module=%p", module); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleLoadDataEx, module, image, numOptions, + options, optionValues); } -CUresult cuModuleLoadFatBinary ( CUmodule* module, const void* fatCubin ){ - LOG_DEBUG("cuModuleLoadFatBinary module=%p",module); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleLoadFatBinary,module,fatCubin); +CUresult cuModuleLoadFatBinary(CUmodule* module, const void* fatCubin) { + LOG_DEBUG("cuModuleLoadFatBinary module=%p", module); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleLoadFatBinary, module, fatCubin); } -CUresult cuModuleGetFunction ( CUfunction* hfunc, CUmodule hmod, const char* name ){ - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleGetFunction,hfunc,hmod,name); +CUresult cuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name) { + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleGetFunction, hfunc, hmod, name); return res; } CUresult cuModuleUnload(CUmodule hmod) { LOG_DEBUG("cuModuleUnload"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleUnload,hmod); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleUnload, hmod); } -CUresult cuModuleGetGlobal_v2(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name) { - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleGetGlobal_v2,dptr,bytes,hmod,name); +CUresult cuModuleGetGlobal_v2(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name) { + CUresult res = + CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleGetGlobal_v2, dptr, bytes, hmod, name); return res; } -CUresult cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const char *name) { +CUresult cuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name) { LOG_INFO("cuModuleGetTexRef"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleGetTexRef,pTexRef,hmod,name); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleGetTexRef, pTexRef, hmod, name); } -CUresult cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, const char *name) { +CUresult cuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name) { LOG_INFO("cuModuleGetSurfRef"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleGetSurfRef,pSurfRef,hmod,name); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleGetSurfRef, pSurfRef, hmod, name); } -CUresult cuLinkAddData_v2 ( CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned int numOptions, CUjit_option* options, void** optionValues ) { +CUresult cuLinkAddData_v2(CUlinkState state, CUjitInputType type, void* data, size_t size, + const char* name, unsigned int numOptions, CUjit_option* options, + void** optionValues) { LOG_DEBUG("into cuLinkAddData_v2"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuLinkAddData_v2,state,type,data,size,name,numOptions,options,optionValues); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuLinkAddData_v2, state, type, data, size, name, + numOptions, options, optionValues); } -CUresult cuLinkCreate_v2 ( unsigned int numOptions, CUjit_option* options, void** optionValues, CUlinkState* stateOut ) { +CUresult cuLinkCreate_v2(unsigned int numOptions, CUjit_option* options, void** optionValues, + CUlinkState* stateOut) { LOG_DEBUG("into cuLinkCreate_v2"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuLinkCreate_v2,numOptions,options,optionValues,stateOut); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuLinkCreate_v2, numOptions, options, + optionValues, stateOut); } -CUresult cuLinkAddFile_v2(CUlinkState state, CUjitInputType type, const char *path, - unsigned int numOptions, CUjit_option *options, void **optionValues) { +CUresult cuLinkAddFile_v2(CUlinkState state, CUjitInputType type, const char* path, + unsigned int numOptions, CUjit_option* options, void** optionValues) { LOG_DEBUG("cuLinkAddFile_v2"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuLinkAddFile_v2,state,type,path,numOptions,options,optionValues); - } + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuLinkAddFile_v2, state, type, path, numOptions, + options, optionValues); +} -CUresult cuLinkComplete(CUlinkState state, void **cubinOut, size_t *sizeOut) { +CUresult cuLinkComplete(CUlinkState state, void** cubinOut, size_t* sizeOut) { LOG_DEBUG("cuLinkComplete"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuLinkComplete,state,cubinOut,sizeOut); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuLinkComplete, state, cubinOut, sizeOut); } CUresult cuLinkDestroy(CUlinkState state) { LOG_DEBUG("cuLinkDestroy"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuLinkDestroy,state); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuLinkDestroy, state); } -CUresult cuFuncSetCacheConfig ( CUfunction hfunc, CUfunc_cache config ){ +CUresult cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config) { LOG_INFO("cuFUncSetCacheConfig"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuFuncSetCacheConfig,hfunc,config); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuFuncSetCacheConfig, hfunc, config); } CUresult cuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config) { LOG_INFO("cuFuncSetSharedMemConfig"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuFuncSetSharedMemConfig,hfunc,config); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuFuncSetSharedMemConfig, hfunc, config); } -CUresult cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunction hfunc) { +CUresult cuFuncGetAttribute(int* pi, CUfunction_attribute attrib, CUfunction hfunc) { LOG_DEBUG("cuFuncGetAttribute"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuFuncGetAttribute,pi,attrib,hfunc); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuFuncGetAttribute, pi, attrib, hfunc); } CUresult cuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value) { LOG_DEBUG("cuFuncSetAttribute"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuFuncSetAttribute,hfunc,attrib,value); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuFuncSetAttribute, hfunc, attrib, value); } diff --git a/src/cuda/graph.c b/src/cuda/graph.c index 412743fb..04472692 100644 --- a/src/cuda/graph.c +++ b/src/cuda/graph.c @@ -1,280 +1,341 @@ #include "include/libcuda_hook.h" -CUresult cuGraphCreate(CUgraph *phGraph, unsigned int flags){ - LOG_DEBUG("cuGraphCreate"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphCreate,phGraph,flags); +CUresult cuGraphCreate(CUgraph *phGraph, unsigned int flags) { + LOG_DEBUG("cuGraphCreate"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphCreate, phGraph, flags); } -CUresult cuGraphAddKernelNode_v2(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - LOG_DEBUG("cuGraphAddKernelNode_v2"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddKernelNode_v2,phGraphNode,hGraph,dependencies,numDependencies,nodeParams); +CUresult cuGraphAddKernelNode_v2(CUgraphNode *phGraphNode, CUgraph hGraph, + const CUgraphNode *dependencies, size_t numDependencies, + const CUDA_KERNEL_NODE_PARAMS *nodeParams) { + LOG_DEBUG("cuGraphAddKernelNode_v2"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddKernelNode_v2, phGraphNode, hGraph, + dependencies, numDependencies, nodeParams); } CUresult cuGraphKernelNodeGetParams_v2(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS *nodeParams) { - LOG_DEBUG("cuGraphKernelNodeGetParams_v2"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphKernelNodeGetParams_v2,hNode,nodeParams); + LOG_DEBUG("cuGraphKernelNodeGetParams_v2"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphKernelNodeGetParams_v2, hNode, nodeParams); } -CUresult cuGraphKernelNodeSetParams_v2(CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - LOG_DEBUG("cuGraphKernelNodeSetParams_v2"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphKernelNodeSetParams_v2,hNode,nodeParams); +CUresult cuGraphKernelNodeSetParams_v2(CUgraphNode hNode, + const CUDA_KERNEL_NODE_PARAMS *nodeParams) { + LOG_DEBUG("cuGraphKernelNodeSetParams_v2"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphKernelNodeSetParams_v2, hNode, nodeParams); } -CUresult cuGraphAddMemcpyNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_MEMCPY3D *copyParams, CUcontext ctx) { - LOG_DEBUG("cuGraphAddMemcpyNode"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddMemcpyNode,phGraphNode,hGraph,dependencies,numDependencies,copyParams,ctx); +CUresult cuGraphAddMemcpyNode(CUgraphNode *phGraphNode, CUgraph hGraph, + const CUgraphNode *dependencies, size_t numDependencies, + const CUDA_MEMCPY3D *copyParams, CUcontext ctx) { + LOG_DEBUG("cuGraphAddMemcpyNode"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddMemcpyNode, phGraphNode, hGraph, + dependencies, numDependencies, copyParams, ctx); } CUresult cuGraphMemcpyNodeGetParams(CUgraphNode hNode, CUDA_MEMCPY3D *nodeParams) { - LOG_DEBUG("cuGraphMemcpyNodeGetParams"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphMemcpyNodeGetParams,hNode,nodeParams); + LOG_DEBUG("cuGraphMemcpyNodeGetParams"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphMemcpyNodeGetParams, hNode, nodeParams); } CUresult cuGraphMemcpyNodeSetParams(CUgraphNode hNode, const CUDA_MEMCPY3D *nodeParams) { - LOG_DEBUG("cuGraphMemcpyNodeSetParams"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphMemcpyNodeSetParams,hNode,nodeParams); + LOG_DEBUG("cuGraphMemcpyNodeSetParams"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphMemcpyNodeSetParams, hNode, nodeParams); } -CUresult cuGraphAddMemsetNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS *memsetParams, CUcontext ctx) { - LOG_DEBUG("cuGraphAddMemsetNode"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddMemsetNode,phGraphNode,hGraph,dependencies,numDependencies,memsetParams,ctx); +CUresult cuGraphAddMemsetNode(CUgraphNode *phGraphNode, CUgraph hGraph, + const CUgraphNode *dependencies, size_t numDependencies, + const CUDA_MEMSET_NODE_PARAMS *memsetParams, CUcontext ctx) { + LOG_DEBUG("cuGraphAddMemsetNode"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddMemsetNode, phGraphNode, hGraph, + dependencies, numDependencies, memsetParams, ctx); } CUresult cuGraphMemsetNodeGetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS *nodeParams) { - LOG_DEBUG("cuGraphMemsetNodeGetParams"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphMemsetNodeGetParams,hNode,nodeParams); + LOG_DEBUG("cuGraphMemsetNodeGetParams"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphMemsetNodeGetParams, hNode, nodeParams); } CUresult cuGraphMemsetNodeSetParams(CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS *nodeParams) { - LOG_DEBUG("cuGraphMemsetNodeSetParams"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphMemsetNodeSetParams,hNode,nodeParams); + LOG_DEBUG("cuGraphMemsetNodeSetParams"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphMemsetNodeSetParams, hNode, nodeParams); } -CUresult cuGraphAddHostNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_HOST_NODE_PARAMS *nodeParams) { - LOG_DEBUG("cuGraphAddHostNode"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddHostNode,phGraphNode,hGraph,dependencies,numDependencies,nodeParams); +CUresult cuGraphAddHostNode(CUgraphNode *phGraphNode, CUgraph hGraph, + const CUgraphNode *dependencies, size_t numDependencies, + const CUDA_HOST_NODE_PARAMS *nodeParams) { + LOG_DEBUG("cuGraphAddHostNode"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddHostNode, phGraphNode, hGraph, + dependencies, numDependencies, nodeParams); } CUresult cuGraphHostNodeGetParams(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS *nodeParams) { - LOG_DEBUG("cuGraphHostNodeGetParams"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphHostNodeGetParams,hNode,nodeParams); + LOG_DEBUG("cuGraphHostNodeGetParams"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphHostNodeGetParams, hNode, nodeParams); } CUresult cuGraphHostNodeSetParams(CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS *nodeParams) { - LOG_DEBUG("cuGraphHostNodeSetParams"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphHostNodeSetParams,hNode,nodeParams); + LOG_DEBUG("cuGraphHostNodeSetParams"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphHostNodeSetParams, hNode, nodeParams); } -CUresult cuGraphAddChildGraphNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUgraph childGraph) { - LOG_DEBUG("cuGraphAddChildGraphNode"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddChildGraphNode,phGraphNode,hGraph,dependencies,numDependencies,childGraph); +CUresult cuGraphAddChildGraphNode(CUgraphNode *phGraphNode, CUgraph hGraph, + const CUgraphNode *dependencies, size_t numDependencies, + CUgraph childGraph) { + LOG_DEBUG("cuGraphAddChildGraphNode"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddChildGraphNode, phGraphNode, hGraph, + dependencies, numDependencies, childGraph); } CUresult cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, CUgraph *phGraph) { - LOG_DEBUG("cuGraphChildGraphNodeGetGraph"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphChildGraphNodeGetGraph,hNode,phGraph); + LOG_DEBUG("cuGraphChildGraphNodeGetGraph"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphChildGraphNodeGetGraph, hNode, phGraph); } -CUresult cuGraphAddEmptyNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies) { - LOG_DEBUG("cuGraphAddEmptyNode"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddEmptyNode,phGraphNode,hGraph,dependencies,numDependencies); +CUresult cuGraphAddEmptyNode(CUgraphNode *phGraphNode, CUgraph hGraph, + const CUgraphNode *dependencies, size_t numDependencies) { + LOG_DEBUG("cuGraphAddEmptyNode"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddEmptyNode, phGraphNode, hGraph, + dependencies, numDependencies); } -CUresult cuGraphAddEventRecordNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUevent event) { - LOG_DEBUG("cuGraphAddEventRecordNode"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddEventRecordNode,phGraphNode,hGraph,dependencies,numDependencies,event); +CUresult cuGraphAddEventRecordNode(CUgraphNode *phGraphNode, CUgraph hGraph, + const CUgraphNode *dependencies, size_t numDependencies, + CUevent event) { + LOG_DEBUG("cuGraphAddEventRecordNode"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddEventRecordNode, phGraphNode, hGraph, + dependencies, numDependencies, event); } CUresult cuGraphEventRecordNodeGetEvent(CUgraphNode hNode, CUevent *event_out) { - LOG_DEBUG("cuGraphEventRecordNodeGetEvent"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphEventRecordNodeGetEvent,hNode,event_out); + LOG_DEBUG("cuGraphEventRecordNodeGetEvent"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphEventRecordNodeGetEvent, hNode, event_out); } CUresult cuGraphEventRecordNodeSetEvent(CUgraphNode hNode, CUevent event) { - LOG_DEBUG("cuGraphEventRecordNodeSetEvent"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphEventRecordNodeSetEvent,hNode,event); + LOG_DEBUG("cuGraphEventRecordNodeSetEvent"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphEventRecordNodeSetEvent, hNode, event); } -CUresult cuGraphAddEventWaitNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUevent event) { - LOG_DEBUG("cuGraphAddEventWaitNode"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddEventWaitNode,phGraphNode,hGraph,dependencies,numDependencies,event); +CUresult cuGraphAddEventWaitNode(CUgraphNode *phGraphNode, CUgraph hGraph, + const CUgraphNode *dependencies, size_t numDependencies, + CUevent event) { + LOG_DEBUG("cuGraphAddEventWaitNode"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddEventWaitNode, phGraphNode, hGraph, + dependencies, numDependencies, event); } CUresult cuGraphEventWaitNodeGetEvent(CUgraphNode hNode, CUevent *event_out) { - LOG_DEBUG("cuGraphEventWaitNodeGetEvent"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphEventWaitNodeGetEvent,hNode,event_out); + LOG_DEBUG("cuGraphEventWaitNodeGetEvent"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphEventWaitNodeGetEvent, hNode, event_out); } CUresult cuGraphEventWaitNodeSetEvent(CUgraphNode hNode, CUevent event) { - LOG_DEBUG("cuGraphEventWaitNodeSetEvent"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphEventWaitNodeSetEvent,hNode,event); + LOG_DEBUG("cuGraphEventWaitNodeSetEvent"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphEventWaitNodeSetEvent, hNode, event); } -CUresult cuGraphAddExternalSemaphoresSignalNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { - LOG_DEBUG("cuGraphAddExternalSemaphoresSignalNode"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddExternalSemaphoresSignalNode,phGraphNode,hGraph,dependencies,numDependencies,nodeParams); +CUresult cuGraphAddExternalSemaphoresSignalNode(CUgraphNode *phGraphNode, CUgraph hGraph, + const CUgraphNode *dependencies, + size_t numDependencies, + const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { + LOG_DEBUG("cuGraphAddExternalSemaphoresSignalNode"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddExternalSemaphoresSignalNode, + phGraphNode, hGraph, dependencies, numDependencies, nodeParams); } -CUresult cuGraphExternalSemaphoresSignalNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *params_out) { - LOG_DEBUG("cuGraphExternalSemaphoresSignalNodeGetParams"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphExternalSemaphoresSignalNodeGetParams,hNode,params_out); +CUresult cuGraphExternalSemaphoresSignalNodeGetParams(CUgraphNode hNode, + CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *params_out) { + LOG_DEBUG("cuGraphExternalSemaphoresSignalNodeGetParams"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphExternalSemaphoresSignalNodeGetParams, + hNode, params_out); } -CUresult cuGraphExternalSemaphoresSignalNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { - LOG_DEBUG("cuGraphExternalSemaphoresSignalNodeSetParams"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphExternalSemaphoresSignalNodeSetParams,hNode,nodeParams); +CUresult cuGraphExternalSemaphoresSignalNodeSetParams( + CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { + LOG_DEBUG("cuGraphExternalSemaphoresSignalNodeSetParams"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphExternalSemaphoresSignalNodeSetParams, + hNode, nodeParams); } -CUresult cuGraphAddExternalSemaphoresWaitNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { - LOG_DEBUG("cuGraphAddExternalSemaphoresWaitNode"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddExternalSemaphoresWaitNode,phGraphNode,hGraph,dependencies,numDependencies,nodeParams); +CUresult cuGraphAddExternalSemaphoresWaitNode(CUgraphNode *phGraphNode, CUgraph hGraph, + const CUgraphNode *dependencies, + size_t numDependencies, + const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { + LOG_DEBUG("cuGraphAddExternalSemaphoresWaitNode"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddExternalSemaphoresWaitNode, phGraphNode, + hGraph, dependencies, numDependencies, nodeParams); } -CUresult cuGraphExternalSemaphoresWaitNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS *params_out) { - LOG_DEBUG("cuGraphExternalSemaphoresWaitNodeGetParams"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphExternalSemaphoresWaitNodeGetParams,hNode,params_out); +CUresult cuGraphExternalSemaphoresWaitNodeGetParams(CUgraphNode hNode, + CUDA_EXT_SEM_WAIT_NODE_PARAMS *params_out) { + LOG_DEBUG("cuGraphExternalSemaphoresWaitNodeGetParams"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphExternalSemaphoresWaitNodeGetParams, hNode, + params_out); } -CUresult cuGraphExternalSemaphoresWaitNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { - LOG_DEBUG("cuGraphExternalSemaphoresWaitNodeSetParams"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphExternalSemaphoresWaitNodeSetParams,hNode,nodeParams); +CUresult cuGraphExternalSemaphoresWaitNodeSetParams( + CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { + LOG_DEBUG("cuGraphExternalSemaphoresWaitNodeSetParams"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphExternalSemaphoresWaitNodeSetParams, hNode, + nodeParams); } -CUresult cuGraphExecExternalSemaphoresSignalNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { - LOG_DEBUG("cuGraphExecExternalSemaphoresSignalNodeSetParams"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphExecExternalSemaphoresSignalNodeSetParams,hGraphExec,hNode,nodeParams); +CUresult cuGraphExecExternalSemaphoresSignalNodeSetParams( + CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { + LOG_DEBUG("cuGraphExecExternalSemaphoresSignalNodeSetParams"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphExecExternalSemaphoresSignalNodeSetParams, + hGraphExec, hNode, nodeParams); } -CUresult cuGraphExecExternalSemaphoresWaitNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { - LOG_DEBUG("cuGraphExecExternalSemaphoresWaitNodeSetParams"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphExecExternalSemaphoresWaitNodeSetParams,hGraphExec,hNode,nodeParams); +CUresult cuGraphExecExternalSemaphoresWaitNodeSetParams( + CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { + LOG_DEBUG("cuGraphExecExternalSemaphoresWaitNodeSetParams"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphExecExternalSemaphoresWaitNodeSetParams, + hGraphExec, hNode, nodeParams); } CUresult cuGraphClone(CUgraph *phGraphClone, CUgraph originalGraph) { - LOG_DEBUG("cuGraphClone"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphClone,phGraphClone,originalGraph); + LOG_DEBUG("cuGraphClone"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphClone, phGraphClone, originalGraph); } -CUresult cuGraphNodeFindInClone(CUgraphNode *phNode, CUgraphNode hOriginalNode, CUgraph hClonedGraph) { - LOG_DEBUG("cuGraphNodeFindInClone"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphNodeFindInClone,phNode,hOriginalNode,hClonedGraph); +CUresult cuGraphNodeFindInClone(CUgraphNode *phNode, CUgraphNode hOriginalNode, + CUgraph hClonedGraph) { + LOG_DEBUG("cuGraphNodeFindInClone"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphNodeFindInClone, phNode, hOriginalNode, + hClonedGraph); } CUresult cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType *type) { - LOG_DEBUG("cuGraphNodeGetType"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphNodeGetType,hNode,type); + LOG_DEBUG("cuGraphNodeGetType"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphNodeGetType, hNode, type); } -CUresult cuGraphGetNodes(CUgraph hGraph, CUgraphNode *nodes, size_t *numNodes){ - LOG_DEBUG("cuGraphGetNodes"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphGetNodes,hGraph,nodes,numNodes); +CUresult cuGraphGetNodes(CUgraph hGraph, CUgraphNode *nodes, size_t *numNodes) { + LOG_DEBUG("cuGraphGetNodes"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphGetNodes, hGraph, nodes, numNodes); } CUresult cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode *rootNodes, size_t *numRootNodes) { - LOG_DEBUG("cuGraphGetRootNodes"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphGetRootNodes,hGraph,rootNodes,numRootNodes); + LOG_DEBUG("cuGraphGetRootNodes"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphGetRootNodes, hGraph, rootNodes, + numRootNodes); } #if CUDA_VERSION < 13000 CUresult cuGraphGetEdges(CUgraph hGraph, CUgraphNode *from, CUgraphNode *to, size_t *numEdges) { - LOG_DEBUG("cuGraphGetEdges"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphGetEdges,hGraph,from,to,numEdges); + LOG_DEBUG("cuGraphGetEdges"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphGetEdges, hGraph, from, to, numEdges); } #endif -CUresult cuGraphGetEdges_v2(CUgraph hGraph, CUgraphNode *from, CUgraphNode *to, CUgraphEdgeData *edgeData, - size_t *numEdges) { +CUresult cuGraphGetEdges_v2(CUgraph hGraph, CUgraphNode *from, CUgraphNode *to, + CUgraphEdgeData *edgeData, size_t *numEdges) { LOG_DEBUG("cuGraphGetEdges_v2"); - return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphGetEdges_v2, hGraph, from, to, edgeData, numEdges); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphGetEdges_v2, hGraph, from, to, edgeData, + numEdges); } #if CUDA_VERSION < 13000 -CUresult cuGraphNodeGetDependencies(CUgraphNode hNode, CUgraphNode *dependencies, size_t *numDependencies) { - LOG_DEBUG("cuGraphNodeGetDependencies"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphNodeGetDependencies,hNode,dependencies,numDependencies); +CUresult cuGraphNodeGetDependencies(CUgraphNode hNode, CUgraphNode *dependencies, + size_t *numDependencies) { + LOG_DEBUG("cuGraphNodeGetDependencies"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphNodeGetDependencies, hNode, dependencies, + numDependencies); } #endif -CUresult cuGraphNodeGetDependencies_v2(CUgraphNode hNode, CUgraphNode *dependencies, CUgraphEdgeData *edgeData, - size_t *numDependencies) { +CUresult cuGraphNodeGetDependencies_v2(CUgraphNode hNode, CUgraphNode *dependencies, + CUgraphEdgeData *edgeData, size_t *numDependencies) { LOG_DEBUG("cuGraphNodeGetDependencies_v2"); - return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphNodeGetDependencies_v2, hNode, dependencies, edgeData, - numDependencies); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphNodeGetDependencies_v2, hNode, + dependencies, edgeData, numDependencies); } #if CUDA_VERSION < 13000 -CUresult cuGraphNodeGetDependentNodes(CUgraphNode hNode, CUgraphNode *dependentNodes, size_t *numDependentNodes) { - LOG_DEBUG("cuGraphNodeGetDependentNodes"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphNodeGetDependentNodes,hNode,dependentNodes,numDependentNodes); +CUresult cuGraphNodeGetDependentNodes(CUgraphNode hNode, CUgraphNode *dependentNodes, + size_t *numDependentNodes) { + LOG_DEBUG("cuGraphNodeGetDependentNodes"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphNodeGetDependentNodes, hNode, + dependentNodes, numDependentNodes); } #endif -CUresult cuGraphNodeGetDependentNodes_v2(CUgraphNode hNode, CUgraphNode *dependentNodes, CUgraphEdgeData *edgeData, - size_t *numDependentNodes) { +CUresult cuGraphNodeGetDependentNodes_v2(CUgraphNode hNode, CUgraphNode *dependentNodes, + CUgraphEdgeData *edgeData, size_t *numDependentNodes) { LOG_DEBUG("cuGraphNodeGetDependentNodes_v2"); - return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphNodeGetDependentNodes_v2, hNode, dependentNodes, edgeData, - numDependentNodes); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphNodeGetDependentNodes_v2, hNode, + dependentNodes, edgeData, numDependentNodes); } #if CUDA_VERSION < 13000 -CUresult cuGraphAddDependencies(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to, size_t numDependencies) { - LOG_DEBUG("cuGraphAddDependencies"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddDependencies,hGraph,from,to,numDependencies); +CUresult cuGraphAddDependencies(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to, + size_t numDependencies) { + LOG_DEBUG("cuGraphAddDependencies"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddDependencies, hGraph, from, to, + numDependencies); } #endif CUresult cuGraphAddDependencies_v2(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to, const CUgraphEdgeData *edgeData, size_t numDependencies) { LOG_DEBUG("cuGraphAddDependencies_v2"); - return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddDependencies_v2, hGraph, from, to, edgeData, - numDependencies); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddDependencies_v2, hGraph, from, to, + edgeData, numDependencies); } #if CUDA_VERSION < 13000 -CUresult cuGraphRemoveDependencies(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to, size_t numDependencies) { - LOG_DEBUG("cuGraphRemoveDependencies"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphRemoveDependencies,hGraph,from,to,numDependencies); +CUresult cuGraphRemoveDependencies(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to, + size_t numDependencies) { + LOG_DEBUG("cuGraphRemoveDependencies"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphRemoveDependencies, hGraph, from, to, + numDependencies); } #endif -CUresult cuGraphRemoveDependencies_v2(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to, - const CUgraphEdgeData *edgeData, size_t numDependencies) { +CUresult cuGraphRemoveDependencies_v2(CUgraph hGraph, const CUgraphNode *from, + const CUgraphNode *to, const CUgraphEdgeData *edgeData, + size_t numDependencies) { LOG_DEBUG("cuGraphRemoveDependencies_v2"); - return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphRemoveDependencies_v2, hGraph, from, to, edgeData, - numDependencies); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphRemoveDependencies_v2, hGraph, from, to, + edgeData, numDependencies); } CUresult cuGraphDestroyNode(CUgraphNode hNode) { - LOG_DEBUG("cuGraphDestroyNode"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphDestroyNode,hNode); + LOG_DEBUG("cuGraphDestroyNode"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphDestroyNode, hNode); } -CUresult cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, CUgraphNode *phErrorNode, char *logBuffer, size_t bufferSize) { - LOG_DEBUG("cuGraphInstantiate"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphInstantiate,phGraphExec,hGraph,phErrorNode,logBuffer,bufferSize); +CUresult cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, CUgraphNode *phErrorNode, + char *logBuffer, size_t bufferSize) { + LOG_DEBUG("cuGraphInstantiate"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphInstantiate, phGraphExec, hGraph, + phErrorNode, logBuffer, bufferSize); } -CUresult cuGraphInstantiateWithFlags(CUgraphExec *phGraphExec, CUgraph hGraph, unsigned long long flags) { - LOG_DEBUG("cuGraphInstantiateWithFlags"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphInstantiateWithFlags,phGraphExec,hGraph,flags); +CUresult cuGraphInstantiateWithFlags(CUgraphExec *phGraphExec, CUgraph hGraph, + unsigned long long flags) { + LOG_DEBUG("cuGraphInstantiateWithFlags"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphInstantiateWithFlags, phGraphExec, hGraph, + flags); } CUresult cuGraphUpload(CUgraphExec hGraphExec, CUstream hStream) { - LOG_DEBUG("cuGraphUpload"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphUpload,hGraphExec,hStream); + LOG_DEBUG("cuGraphUpload"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphUpload, hGraphExec, hStream); } CUresult cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) { - LOG_DEBUG("cuGraphLaunch"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphLaunch,hGraphExec,hStream); + LOG_DEBUG("cuGraphLaunch"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphLaunch, hGraphExec, hStream); } CUresult cuGraphExecDestroy(CUgraphExec hGraphExec) { - LOG_DEBUG("cuGraphExecDestroy"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphExecDestroy,hGraphExec); + LOG_DEBUG("cuGraphExecDestroy"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphExecDestroy, hGraphExec); } CUresult cuGraphDestroy(CUgraph hGraph) { - LOG_DEBUG("cuGraphDestroy"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphDestroy,hGraph); + LOG_DEBUG("cuGraphDestroy"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphDestroy, hGraph); } diff --git a/src/cuda/hook.c b/src/cuda/hook.c index d2b143a9..3a81e770 100644 --- a/src/cuda/hook.c +++ b/src/cuda/hook.c @@ -1,14 +1,14 @@ -#include "include/libcuda_hook.h" #include + +#include "include/libcuda_hook.h" #include "include/libvgpu.h" #include "include/multi_func_hook.h" - -typedef void* (*fp_dlsym)(void*, const char*); +typedef void *(*fp_dlsym)(void *, const char *); extern fp_dlsym real_dlsym; cuda_entry_t cuda_library_entry[] = { - /* Init Part */ + /* Init Part */ {.name = "cuInit"}, /* Device Part */ {.name = "cuDeviceGetAttribute"}, @@ -236,9 +236,9 @@ cuda_entry_t cuda_library_entry[] = { int prior_function(char tmp[500]) { char *pos = tmp + strlen(tmp) - 3; - if (pos[0]=='_' && pos[1]=='v') { - if (pos[2]=='2') - pos[0]='\0'; + if (pos[0] == '_' && pos[1] == 'v') { + if (pos[2] == '2') + pos[0] = '\0'; else pos[2]--; return 1; @@ -254,7 +254,7 @@ void load_cuda_libraries() { LOG_INFO("Start hijacking"); - snprintf(cuda_filename, FILENAME_MAX - 1, "%s","libcuda.so.1"); + snprintf(cuda_filename, FILENAME_MAX - 1, "%s", "libcuda.so.1"); cuda_filename[FILENAME_MAX - 1] = '\0'; table = dlopen(cuda_filename, RTLD_NOW | RTLD_NODELETE); @@ -263,55 +263,54 @@ void load_cuda_libraries() { } for (i = 0; i < CUDA_ENTRY_END; i++) { - LOG_DEBUG("LOADING %s %d",cuda_library_entry[i].name,i); + LOG_DEBUG("LOADING %s %d", cuda_library_entry[i].name, i); cuda_library_entry[i].fn_ptr = real_dlsym(table, cuda_library_entry[i].name); if (!cuda_library_entry[i].fn_ptr) { - cuda_library_entry[i].fn_ptr=real_dlsym(RTLD_NEXT,cuda_library_entry[i].name); - if (!cuda_library_entry[i].fn_ptr){ - LOG_INFO("can't find function %s in %s", cuda_library_entry[i].name,cuda_filename); - memset(tmpfunc,0,500); - strcpy(tmpfunc,cuda_library_entry[i].name); + cuda_library_entry[i].fn_ptr = real_dlsym(RTLD_NEXT, cuda_library_entry[i].name); + if (!cuda_library_entry[i].fn_ptr) { + LOG_INFO("can't find function %s in %s", cuda_library_entry[i].name, cuda_filename); + memset(tmpfunc, 0, 500); + strcpy(tmpfunc, cuda_library_entry[i].name); while (prior_function(tmpfunc)) { - cuda_library_entry[i].fn_ptr=real_dlsym(RTLD_NEXT,tmpfunc); + cuda_library_entry[i].fn_ptr = real_dlsym(RTLD_NEXT, tmpfunc); if (cuda_library_entry[i].fn_ptr) { - LOG_INFO("found prior function %s",tmpfunc); + LOG_INFO("found prior function %s", tmpfunc); break; - } + } } } } } LOG_INFO("loaded_cuda_libraries"); - if (cuda_library_entry[0].fn_ptr==NULL){ + if (cuda_library_entry[0].fn_ptr == NULL) { LOG_WARN("is NULL"); } dlclose(table); } - // find func by cuda version -const char* get_real_func_name(const char* base_name,int cuda_version) { - int i = 0; - for (i = 0; i < sizeof(g_func_map)/sizeof(g_func_map[0]); ++i) { - CudaFuncMapEntry *entry = &g_func_map[i]; - // check fun name - if (strcmp(entry->func_name, base_name) != 0) continue; - // check cuda version - if (cuda_version >= entry->min_ver && cuda_version <= entry->max_ver) { - return entry->real_name; +const char *get_real_func_name(const char *base_name, int cuda_version) { + int i = 0; + for (i = 0; i < sizeof(g_func_map) / sizeof(g_func_map[0]); ++i) { + CudaFuncMapEntry *entry = &g_func_map[i]; + // check fun name + if (strcmp(entry->func_name, base_name) != 0) continue; + // check cuda version + if (cuda_version >= entry->min_ver && cuda_version <= entry->max_ver) { + return entry->real_name; + } } - } - return NULL; // if not found + return NULL; // if not found } -void* find_real_symbols_in_table(const char *symbol) { - void *pfn; - //this symbol always has suffix like _v2,_v3 - pfn = __dlsym_hook_section(NULL,symbol); - if (pfn!=NULL) { - return pfn; - } - return NULL; +void *find_real_symbols_in_table(const char *symbol) { + void *pfn; + // this symbol always has suffix like _v2,_v3 + pfn = __dlsym_hook_section(NULL, symbol); + if (pfn != NULL) { + return pfn; + } + return NULL; } void *find_symbols_in_table(const char *symbol) { @@ -321,101 +320,110 @@ void *find_symbols_in_table(const char *symbol) { if (strncmp(symbol, "cuGraph", 7) == 0) { return NULL; } - strcpy(symbol_v,symbol); - strcat(symbol_v,"_v3"); - pfn = __dlsym_hook_section(NULL,symbol_v); - if (pfn!=NULL) { + strcpy(symbol_v, symbol); + strcat(symbol_v, "_v3"); + pfn = __dlsym_hook_section(NULL, symbol_v); + if (pfn != NULL) { return pfn; } - symbol_v[strlen(symbol_v)-1]='2'; - pfn = __dlsym_hook_section(NULL,symbol_v); - if (pfn!=NULL) { + symbol_v[strlen(symbol_v) - 1] = '2'; + pfn = __dlsym_hook_section(NULL, symbol_v); + if (pfn != NULL) { return pfn; } - pfn = __dlsym_hook_section(NULL,symbol); - if (pfn!=NULL) { + pfn = __dlsym_hook_section(NULL, symbol); + if (pfn != NULL) { return pfn; } return NULL; } -void *find_symbols_in_table_by_cudaversion(const char *symbol,int cudaVersion) { - void *pfn; - const char *real_symbol; - real_symbol = get_real_func_name(symbol,cudaVersion); - if (real_symbol == NULL) { - // if not find in multi func version def, use origin logic - pfn = find_symbols_in_table(symbol); - } else { - pfn = find_real_symbols_in_table(real_symbol); - } - return pfn; +void *find_symbols_in_table_by_cudaversion(const char *symbol, int cudaVersion) { + void *pfn; + const char *real_symbol; + real_symbol = get_real_func_name(symbol, cudaVersion); + if (real_symbol == NULL) { + // if not find in multi func version def, use origin logic + pfn = find_symbols_in_table(symbol); + } else { + pfn = find_real_symbols_in_table(real_symbol); + } + return pfn; } +CUresult (*cuGetProcAddress_real)(const char *symbol, void **pfn, int cudaVersion, + cuuint64_t flags); -CUresult (*cuGetProcAddress_real) ( const char* symbol, void** pfn, int cudaVersion, cuuint64_t flags ); - -CUresult _cuGetProcAddress ( const char* symbol, void** pfn, int cudaVersion, cuuint64_t flags ) { - LOG_INFO("into _cuGetProcAddress symbol=%s:%d",symbol,cudaVersion); +CUresult _cuGetProcAddress(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags) { + LOG_INFO("into _cuGetProcAddress symbol=%s:%d", symbol, cudaVersion); *pfn = find_symbols_in_table_by_cudaversion(symbol, cudaVersion); - if (*pfn==NULL){ - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuGetProcAddress,symbol,pfn,cudaVersion,flags); + if (*pfn == NULL) { + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuGetProcAddress, symbol, pfn, + cudaVersion, flags); return res; - }else{ - LOG_DEBUG("found symbol %s",symbol); + } else { + LOG_DEBUG("found symbol %s", symbol); return CUDA_SUCCESS; } } -CUresult cuGetProcAddress ( const char* symbol, void** pfn, int cudaVersion, cuuint64_t flags ) { - LOG_INFO("into cuGetProcAddress symbol=%s:%d",symbol,cudaVersion); +CUresult cuGetProcAddress(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags) { + LOG_INFO("into cuGetProcAddress symbol=%s:%d", symbol, cudaVersion); *pfn = find_symbols_in_table_by_cudaversion(symbol, cudaVersion); - if (strcmp(symbol,"cuGetProcAddress")==0) { - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuGetProcAddress,symbol,pfn,cudaVersion,flags); - if (res==CUDA_SUCCESS) { - cuGetProcAddress_real=*pfn; - *pfn=_cuGetProcAddress; + if (strcmp(symbol, "cuGetProcAddress") == 0) { + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuGetProcAddress, symbol, pfn, + cudaVersion, flags); + if (res == CUDA_SUCCESS) { + cuGetProcAddress_real = *pfn; + *pfn = _cuGetProcAddress; } return res; } - if (*pfn==NULL){ - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuGetProcAddress,symbol,pfn,cudaVersion,flags); + if (*pfn == NULL) { + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuGetProcAddress, symbol, pfn, + cudaVersion, flags); return res; - }else{ - LOG_DEBUG("found symbol %s",symbol); + } else { + LOG_DEBUG("found symbol %s", symbol); return CUDA_SUCCESS; } } -CUresult _cuGetProcAddress_v2(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags, CUdriverProcAddressQueryResult *symbolStatus){ - LOG_INFO("into _cuGetProcAddress_v2 symbol=%s:%d",symbol,cudaVersion); +CUresult _cuGetProcAddress_v2(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags, + CUdriverProcAddressQueryResult *symbolStatus) { + LOG_INFO("into _cuGetProcAddress_v2 symbol=%s:%d", symbol, cudaVersion); *pfn = find_symbols_in_table_by_cudaversion(symbol, cudaVersion); - if (*pfn==NULL){ - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuGetProcAddress_v2,symbol,pfn,cudaVersion,flags,symbolStatus); + if (*pfn == NULL) { + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuGetProcAddress_v2, symbol, pfn, + cudaVersion, flags, symbolStatus); return res; - }else{ - LOG_DEBUG("found symbol %s",symbol); + } else { + LOG_DEBUG("found symbol %s", symbol); return CUDA_SUCCESS; - } + } } -CUresult cuGetProcAddress_v2(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags, CUdriverProcAddressQueryResult *symbolStatus){ - LOG_INFO("into cuGetProcAddress_v2 symbol=%s:%d",symbol,cudaVersion); +CUresult cuGetProcAddress_v2(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags, + CUdriverProcAddressQueryResult *symbolStatus) { + LOG_INFO("into cuGetProcAddress_v2 symbol=%s:%d", symbol, cudaVersion); *pfn = find_symbols_in_table_by_cudaversion(symbol, cudaVersion); - if (strcmp(symbol,"cuGetProcAddress_v2")==0) { - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuGetProcAddress_v2,symbol,pfn,cudaVersion,flags,symbolStatus); - if (res==CUDA_SUCCESS) { - cuGetProcAddress_real=*pfn; - *pfn=_cuGetProcAddress_v2; + if (strcmp(symbol, "cuGetProcAddress_v2") == 0) { + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuGetProcAddress_v2, symbol, pfn, + cudaVersion, flags, symbolStatus); + if (res == CUDA_SUCCESS) { + cuGetProcAddress_real = *pfn; + *pfn = _cuGetProcAddress_v2; } return res; } - if (*pfn==NULL){ - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuGetProcAddress_v2,symbol,pfn,cudaVersion,flags,symbolStatus); + if (*pfn == NULL) { + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuGetProcAddress_v2, symbol, pfn, + cudaVersion, flags, symbolStatus); return res; - }else{ - LOG_DEBUG("found symbol %s",symbol); + } else { + LOG_DEBUG("found symbol %s", symbol); void *optr; - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGetProcAddress_v2,symbol,&optr,cudaVersion,flags,symbolStatus); - } + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGetProcAddress_v2, symbol, &optr, + cudaVersion, flags, symbolStatus); + } } diff --git a/src/cuda/memory.c b/src/cuda/memory.c old mode 100755 new mode 100644 index 00857f30..3375de49 --- a/src/cuda/memory.c +++ b/src/cuda/memory.c @@ -39,25 +39,25 @@ const size_t cuarray_format_bytes[33] = { 0, // 0x1c 0, // 0x1d 0, // 0x1e - 0, // 0x1f + 0, // 0x1f 4 // CU_AD_FORMAT_FLOAT = 0x20 }; -extern size_t round_up(size_t size,size_t align); +extern size_t round_up(size_t size, size_t align); extern void rate_limiter(int grids, int blocks); int check_oom() { -// return 0; + // return 0; CUdevice dev; CHECK_DRV_API(cuCtxGetDevice(&dev)); - return oom_check(dev,0); + return oom_check(dev, 0); } -uint64_t compute_3d_array_alloc_bytes(const CUDA_ARRAY3D_DESCRIPTOR* desc) { - if (desc==NULL) { +uint64_t compute_3d_array_alloc_bytes(const CUDA_ARRAY3D_DESCRIPTOR *desc) { + if (desc == NULL) { LOG_WARN("compute_3d_array_alloc_bytes desc is null"); - }else{ - LOG_DEBUG("compute_3d_array_alloc_bytes height=%ld width=%ld",desc->Height,desc->Width); + } else { + LOG_DEBUG("compute_3d_array_alloc_bytes height=%ld width=%ld", desc->Height, desc->Width); } uint64_t bytes = desc->Width * desc->NumChannels; if (desc->Height != 0) { @@ -73,12 +73,11 @@ uint64_t compute_3d_array_alloc_bytes(const CUDA_ARRAY3D_DESCRIPTOR* desc) { return bytes; } - -uint64_t compute_array_alloc_bytes(const CUDA_ARRAY_DESCRIPTOR* desc) { - if (desc==NULL) { +uint64_t compute_array_alloc_bytes(const CUDA_ARRAY_DESCRIPTOR *desc) { + if (desc == NULL) { LOG_WARN("compute_array_alloc_bytes desc is null"); - }else{ - LOG_DEBUG("compute_array_alloc_bytes height=%ld width=%ld",desc->Height,desc->Width); + } else { + LOG_DEBUG("compute_array_alloc_bytes height=%ld width=%ld", desc->Height, desc->Width); } uint64_t bytes = desc->Width * desc->NumChannels; @@ -92,97 +91,95 @@ uint64_t compute_array_alloc_bytes(const CUDA_ARRAY_DESCRIPTOR* desc) { return bytes; } -CUresult cuArray3DCreate_v2(CUarray* arr, const CUDA_ARRAY3D_DESCRIPTOR* desc) { +CUresult cuArray3DCreate_v2(CUarray *arr, const CUDA_ARRAY3D_DESCRIPTOR *desc) { LOG_DEBUG("cuArray3DCreate_v2"); /*uint64_t bytes*/ compute_3d_array_alloc_bytes(desc); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuArray3DCreate_v2, arr, desc); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuArray3DCreate_v2, arr, desc); if (res != CUDA_SUCCESS) { return res; } return res; } - -CUresult cuArrayCreate_v2(CUarray* arr, const CUDA_ARRAY_DESCRIPTOR* desc) { +CUresult cuArrayCreate_v2(CUarray *arr, const CUDA_ARRAY_DESCRIPTOR *desc) { LOG_DEBUG("cuArrayCreate_v2"); /*uint64_t bytes*/ compute_array_alloc_bytes(desc); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuArrayCreate_v2, arr, desc); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuArrayCreate_v2, arr, desc); if (res != CUDA_SUCCESS) { return res; } return res; } - CUresult cuArrayDestroy(CUarray arr) { CUDA_ARRAY3D_DESCRIPTOR desc; LOG_DEBUG("cuArrayDestroy"); CHECK_DRV_API(cuArray3DGetDescriptor(&desc, arr)); /*uint64_t bytes*/ compute_3d_array_alloc_bytes(&desc); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuArrayDestroy, arr); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuArrayDestroy, arr); return res; } -CUresult cuMemoryAllocate(CUdeviceptr* dptr, size_t bytesize, void* data) { +CUresult cuMemoryAllocate(CUdeviceptr *dptr, size_t bytesize, void *data) { CUresult res; - res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemAlloc_v2,dptr,bytesize); + res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAlloc_v2, dptr, bytesize); return res; } -CUresult cuMemAlloc_v2(CUdeviceptr* dptr, size_t bytesize) { - LOG_INFO("into cuMemAllocing_v2 dptr=%p bytesize=%ld",dptr,bytesize); +CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize) { + LOG_INFO("into cuMemAllocing_v2 dptr=%p bytesize=%ld", dptr, bytesize); ENSURE_RUNNING(); - CUresult res = allocate_raw(dptr,bytesize); - if (res!=CUDA_SUCCESS) - return res; - LOG_INFO("res=%d, cuMemAlloc_v2 success dptr=%p bytesize=%lu",0,(void *)*dptr,bytesize); + CUresult res = allocate_raw(dptr, bytesize); + if (res != CUDA_SUCCESS) return res; + LOG_INFO("res=%d, cuMemAlloc_v2 success dptr=%p bytesize=%lu", 0, (void *)*dptr, bytesize); return CUDA_SUCCESS; } -CUresult cuMemAllocHost_v2(void** hptr, size_t bytesize) { - LOG_DEBUG("cuMemAllocHost_v2 hptr=%p bytesize=%ld",hptr,bytesize); +CUresult cuMemAllocHost_v2(void **hptr, size_t bytesize) { + LOG_DEBUG("cuMemAllocHost_v2 hptr=%p bytesize=%ld", hptr, bytesize); ENSURE_RUNNING(); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemAllocHost_v2, hptr, bytesize); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocHost_v2, hptr, bytesize); if (res != CUDA_SUCCESS) { return res; } if (check_oom()) { - CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemFreeHost, *hptr); + CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemFreeHost, *hptr); return CUDA_ERROR_OUT_OF_MEMORY; } return res; } -CUresult cuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned int flags) { - LOG_DEBUG("cuMemAllocManaged dptr=%p bytesize=%ld",dptr,bytesize); +CUresult cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize, unsigned int flags) { + LOG_DEBUG("cuMemAllocManaged dptr=%p bytesize=%ld", dptr, bytesize); ENSURE_RUNNING(); CUdevice dev; CHECK_DRV_API(cuCtxGetDevice(&dev)); - if (oom_check(dev,bytesize)){ + if (oom_check(dev, bytesize)) { return CUDA_ERROR_OUT_OF_MEMORY; } - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemAllocManaged, dptr, bytesize, flags); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocManaged, dptr, bytesize, flags); if (res == CUDA_SUCCESS) { add_chunk_only(*dptr, bytesize, dev); } return res; } -CUresult cuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, - size_t Height, unsigned int ElementSizeBytes) { - LOG_DEBUG("cuMemAllocPitch_v2 dptr=%p (%ld,%ld)",dptr,WidthInBytes,Height); +CUresult cuMemAllocPitch_v2(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, size_t Height, + unsigned int ElementSizeBytes) { + LOG_DEBUG("cuMemAllocPitch_v2 dptr=%p (%ld,%ld)", dptr, WidthInBytes, Height); size_t guess_pitch = (((WidthInBytes - 1) / ElementSizeBytes) + 1) * ElementSizeBytes; size_t bytesize = guess_pitch * Height; ENSURE_RUNNING(); CUdevice dev; CHECK_DRV_API(cuCtxGetDevice(&dev)); - if (oom_check(dev,bytesize)){ + if (oom_check(dev, bytesize)) { return CUDA_ERROR_OUT_OF_MEMORY; } - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemAllocPitch_v2, dptr, pPitch, WidthInBytes, Height, ElementSizeBytes); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocPitch_v2, dptr, pPitch, + WidthInBytes, Height, ElementSizeBytes); if (res == CUDA_SUCCESS) { add_chunk_only(*dptr, bytesize, dev); } @@ -190,69 +187,67 @@ CUresult cuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInByt } CUresult cuMemFree_v2(CUdeviceptr dptr) { - LOG_DEBUG("cuMemFree_v2 dptr=%llx",dptr); + LOG_DEBUG("cuMemFree_v2 dptr=%llx", dptr); if (dptr == 0) { // NULL return CUDA_SUCCESS; } CUresult res = free_raw(dptr); - LOG_INFO("after free_raw dptr=%p res=%d",(void *)dptr,res); + LOG_INFO("after free_raw dptr=%p res=%d", (void *)dptr, res); return res; } - -CUresult cuMemFreeHost(void* hptr) { +CUresult cuMemFreeHost(void *hptr) { /*CUdeviceptr dptr;*/ /*CHECK_DRV_API(cuMemHostGetDevicePointer(&dptr, hptr, 0));*/ /*size_t bytesize;*/ /*CHECK_DRV_API(cuMemGetAddressRange(NULL, &bytesize, dptr));*/ - LOG_DEBUG("cuMemFreeHost_v2 hptr=%p",hptr); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemFreeHost, hptr); + LOG_DEBUG("cuMemFreeHost_v2 hptr=%p", hptr); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemFreeHost, hptr); return res; } -CUresult cuMemHostAlloc(void** hptr, size_t bytesize, unsigned int flags) { - LOG_DEBUG("cuMemHostAlloc hptr=%p bytesize=%lu",hptr,bytesize); +CUresult cuMemHostAlloc(void **hptr, size_t bytesize, unsigned int flags) { + LOG_DEBUG("cuMemHostAlloc hptr=%p bytesize=%lu", hptr, bytesize); ENSURE_RUNNING(); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemHostAlloc, hptr, bytesize, flags); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostAlloc, hptr, bytesize, flags); if (res != CUDA_SUCCESS) { return res; } if (check_oom()) { - CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemFreeHost, *hptr); + CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemFreeHost, *hptr); *hptr = NULL; return CUDA_ERROR_OUT_OF_MEMORY; } return res; } - -CUresult cuMemHostRegister_v2(void* hptr, size_t bytesize, unsigned int flags) { +CUresult cuMemHostRegister_v2(void *hptr, size_t bytesize, unsigned int flags) { /*int trackable = 1;*/ /*if (flags != CU_MEMHOSTREGISTER_DEVICEMAP) {*/ /* fprintf(stderr, "only CU_MEMHOSTREGISTER_DEVICEMAP can be freed, current=%u\n", flags);*/ /* trackable = 0;*/ /*}*/ // TODO: process flags properly - LOG_DEBUG("cuMemHostRegister_v2 hptr=%p bytesize=%ld",hptr,bytesize); + LOG_DEBUG("cuMemHostRegister_v2 hptr=%p bytesize=%ld", hptr, bytesize); CUdevice dev; cuCtxGetDevice(&dev); ENSURE_RUNNING(); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemHostRegister_v2, hptr, bytesize, flags); - LOG_DEBUG("cuMemHostRegister_v2 returned :%d(%p:%ld)",res,hptr,bytesize); + CUresult res = + CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostRegister_v2, hptr, bytesize, flags); + LOG_DEBUG("cuMemHostRegister_v2 returned :%d(%p:%ld)", res, hptr, bytesize); if (res != CUDA_SUCCESS) { return res; } if (check_oom()) { - CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemHostUnregister, hptr); + CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostUnregister, hptr); return CUDA_ERROR_OUT_OF_MEMORY; } - //LOG_WARN("222:%d(%p:%ld)",res,hptr,bytesize); + // LOG_WARN("222:%d(%p:%ld)",res,hptr,bytesize); return res; - //return CUDA_SUCCESS; + // return CUDA_SUCCESS; } - -CUresult cuMemHostUnregister(void* hptr) { +CUresult cuMemHostUnregister(void *hptr) { /*CUdeviceptr dptr;*/ /*CUresult flag = cuMemHostGetDevicePointer(&dptr, hptr, 0);*/ /*size_t bytesize = 0;*/ @@ -260,246 +255,274 @@ CUresult cuMemHostUnregister(void* hptr) { /* // only device map registry is trackable*/ /* CHECK_DRV_API(cuMemGetAddressRange(NULL, &bytesize, dptr));*/ /*}*/ - LOG_DEBUG("cuMemHostUnregister hptr=%p",hptr); + LOG_DEBUG("cuMemHostUnregister hptr=%p", hptr); ENSURE_RUNNING(); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemHostUnregister, hptr); - + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostUnregister, hptr); + /*if (flag == CUDA_SUCCESS && bytesize > 0) {*/ /* // only device map registry is trackable*/ /* DECL_MEMORY_ON_SUCCESS(res, bytesize);*/ /*}*/ - //return CUDA_SUCCESS; + // return CUDA_SUCCESS; return res; } - -CUresult cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount ){ - LOG_DEBUG("cuMemcpy dst=%llx src=%llx count=%lu",dst,src,ByteCount); +CUresult cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount) { + LOG_DEBUG("cuMemcpy dst=%llx src=%llx count=%lu", dst, src, ByteCount); ENSURE_RUNNING(); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpy,dst,src,ByteCount); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpy, dst, src, ByteCount); return res; } -CUresult cuPointerGetAttribute ( void* data, CUpointer_attribute attribute, CUdeviceptr ptr ){ - LOG_DEBUG("cuPointGetAttribute data=%p attribute=%d ptr=%llx", data, (int)attribute,ptr); +CUresult cuPointerGetAttribute(void *data, CUpointer_attribute attribute, CUdeviceptr ptr) { + LOG_DEBUG("cuPointGetAttribute data=%p attribute=%d ptr=%llx", data, (int)attribute, ptr); ENSURE_RUNNING(); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuPointerGetAttribute,data,attribute,ptr); + CUresult res = + CUDA_OVERRIDE_CALL(cuda_library_entry, cuPointerGetAttribute, data, attribute, ptr); return res; } -CUresult cuPointerGetAttributes ( unsigned int numAttributes, CUpointer_attribute* attributes, void** data, CUdeviceptr ptr ) { +CUresult cuPointerGetAttributes(unsigned int numAttributes, CUpointer_attribute *attributes, + void **data, CUdeviceptr ptr) { LOG_DEBUG("cuPointGetAttribute data=%p ptr=%llx", data, ptr); ENSURE_RUNNING(); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuPointerGetAttributes,numAttributes,attributes,data,ptr); - int cur=0; - for (cur=0;cur *total) ? *total : limit; *free = (actual_limit > usage) ? (actual_limit - usage) : 0; *total = actual_limit; - LOG_INFO("after free=%ld total=%ld limit=%ld usage=%ld", - *free, *total, limit, usage); + LOG_INFO("after free=%ld total=%ld limit=%ld usage=%ld", *free, *total, limit, usage); return CUDA_SUCCESS; } } #endif -CUresult cuMipmappedArrayCreate(CUmipmappedArray* pHandle, - const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, - unsigned int numMipmapLevels) { +CUresult cuMipmappedArrayCreate(CUmipmappedArray *pHandle, + const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc, + unsigned int numMipmapLevels) { // TODO: compute bytesize LOG_DEBUG("cuMipmappedArrayCreate\n"); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMipmappedArrayCreate, pHandle, pMipmappedArrayDesc, numMipmapLevels); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMipmappedArrayCreate, pHandle, + pMipmappedArrayDesc, numMipmapLevels); if (res != CUDA_SUCCESS) { return res; } if (check_oom()) { - CUDA_OVERRIDE_CALL(cuda_library_entry,cuMipmappedArrayDestroy, *pHandle); + CUDA_OVERRIDE_CALL(cuda_library_entry, cuMipmappedArrayDestroy, *pHandle); return CUDA_ERROR_OUT_OF_MEMORY; } return res; @@ -549,53 +571,66 @@ CUresult cuMipmappedArrayCreate(CUmipmappedArray* pHandle, CUresult cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) { // TODO: compute bytesize LOG_DEBUG("cuMipmappedArrayDestroy\n"); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMipmappedArrayDestroy, hMipmappedArray); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMipmappedArrayDestroy, hMipmappedArray); return res; } -CUresult cuLaunchKernel ( CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams, void** extra ){ +CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, + unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, + unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, + void **kernelParams, void **extra) { ENSURE_RUNNING(); pre_launch_kernel(); - if (pidfound==1){ - rate_limiter(gridDimX * gridDimY * gridDimZ, - blockDimX * blockDimY * blockDimZ); + if (pidfound == 1) { + rate_limiter(gridDimX * gridDimY * gridDimZ, blockDimX * blockDimY * blockDimZ); } - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuLaunchKernel,f,gridDimX,gridDimY,gridDimZ,blockDimX,blockDimY,blockDimZ,sharedMemBytes,hStream,kernelParams,extra); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuLaunchKernel, f, gridDimX, gridDimY, + gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, + hStream, kernelParams, extra); return res; } -CUresult cuLaunchKernelEx(const CUlaunchConfig *config, CUfunction f, void **kernelParams, void **extra) { +CUresult cuLaunchKernelEx(const CUlaunchConfig *config, CUfunction f, void **kernelParams, + void **extra) { ENSURE_RUNNING(); pre_launch_kernel(); - if (pidfound==1){ + if (pidfound == 1) { rate_limiter(config->gridDimX * config->gridDimY * config->gridDimZ, - config->blockDimX * config->blockDimY * config->blockDimZ); + config->blockDimX * config->blockDimY * config->blockDimZ); } - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuLaunchKernelEx,config,f,kernelParams,extra); + CUresult res = + CUDA_OVERRIDE_CALL(cuda_library_entry, cuLaunchKernelEx, config, f, kernelParams, extra); return res; } -CUresult cuLaunchCooperativeKernel ( CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams ){ +CUresult cuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, + unsigned int gridDimZ, unsigned int blockDimX, + unsigned int blockDimY, unsigned int blockDimZ, + unsigned int sharedMemBytes, CUstream hStream, + void **kernelParams) { ENSURE_RUNNING(); pre_launch_kernel(); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuLaunchCooperativeKernel,f,gridDimX,gridDimY,gridDimZ,blockDimX,blockDimY,blockDimZ,sharedMemBytes,hStream,kernelParams); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuLaunchCooperativeKernel, f, gridDimX, + gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams); return res; } CUresult cuMemoryFree(CUdeviceptr dptr) { - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemFree_v2,dptr); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemFree_v2, dptr); return res; } -CUresult cuMemAddressReserve(CUdeviceptr* ptr, size_t size, - size_t alignment, CUdeviceptr addr, unsigned long long flags ) { - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, - cuMemAddressReserve, ptr, size, alignment, addr, flags); +CUresult cuMemAddressReserve(CUdeviceptr *ptr, size_t size, size_t alignment, CUdeviceptr addr, + unsigned long long flags) { + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAddressReserve, ptr, size, alignment, + addr, flags); LOG_INFO("cuMemAddressReserve:%lx %llx", size, *ptr); return res; } -CUresult cuMemCreate ( CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags ) { +CUresult cuMemCreate(CUmemGenericAllocationHandle *handle, size_t size, + const CUmemAllocationProp *prop, unsigned long long flags) { LOG_INFO("cuMemCreate:%lld:%d", size, prop->location.id); ENSURE_RUNNING(); CUdevice dev; @@ -606,8 +641,7 @@ CUresult cuMemCreate ( CUmemGenericAllocationHandle* handle, size_t size, const if (do_oom_check && oom_check(dev, size)) { return CUDA_ERROR_OUT_OF_MEMORY; } - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, - cuMemCreate, handle, size, prop, flags); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemCreate, handle, size, prop, flags); if (do_oom_check && res == CUDA_SUCCESS) { add_chunk_only(*handle, size, dev); } @@ -623,109 +657,117 @@ CUresult cuMemRelease(CUmemGenericAllocationHandle handle) { return res; } -CUresult cuMemMap( CUdeviceptr ptr, size_t size, size_t offset, CUmemGenericAllocationHandle handle, unsigned long long flags ) { +CUresult cuMemMap(CUdeviceptr ptr, size_t size, size_t offset, CUmemGenericAllocationHandle handle, + unsigned long long flags) { LOG_INFO("cuMemMap:%lld(%llx,%llx)", size, ptr, offset); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemMap,ptr,size,offset,handle,flags); + CUresult res = + CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemMap, ptr, size, offset, handle, flags); return res; } -CUresult cuMemImportFromShareableHandle(CUmemGenericAllocationHandle* handle, - void* osHandle, CUmemAllocationHandleType shHandleType) { +CUresult cuMemImportFromShareableHandle(CUmemGenericAllocationHandle *handle, void *osHandle, + CUmemAllocationHandleType shHandleType) { LOG_INFO("cuMemImportFromSharableHandle"); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, - cuMemImportFromShareableHandle, handle, osHandle, shHandleType); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemImportFromShareableHandle, handle, + osHandle, shHandleType); return res; } CUresult cuMemAllocAsync(CUdeviceptr *dptr, size_t bytesize, CUstream hStream) { - LOG_DEBUG("cuMemAllocAsync:%ld",bytesize); - return allocate_async_raw(dptr,bytesize,hStream); + LOG_DEBUG("cuMemAllocAsync:%ld", bytesize); + return allocate_async_raw(dptr, bytesize, hStream); } CUresult cuMemFreeAsync(CUdeviceptr dptr, CUstream hStream) { - LOG_DEBUG("cuMemFreeAsync dptr=%llx",dptr); + LOG_DEBUG("cuMemFreeAsync dptr=%llx", dptr); if (dptr == 0) { // NULL return CUDA_SUCCESS; } - CUresult res = free_raw_async(dptr,hStream); - //CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemFreeAsync,dptr,hStream); - LOG_DEBUG("after free_raw_async dptr=%p res=%d",(void *)dptr,res); + CUresult res = free_raw_async(dptr, hStream); + // CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemFreeAsync,dptr,hStream); + LOG_DEBUG("after free_raw_async dptr=%p res=%d", (void *)dptr, res); return res; } -CUresult cuMemHostGetDevicePointer_v2(CUdeviceptr *pdptr, void *p, unsigned int Flags){ +CUresult cuMemHostGetDevicePointer_v2(CUdeviceptr *pdptr, void *p, unsigned int Flags) { LOG_INFO("cuMemHostGetDevicePointer"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemHostGetDevicePointer_v2,pdptr,p,Flags); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostGetDevicePointer_v2, pdptr, p, Flags); } -CUresult cuMemHostGetFlags(unsigned int *pFlags, void *p){ +CUresult cuMemHostGetFlags(unsigned int *pFlags, void *p) { LOG_INFO("cuMemHostGetFlags"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemHostGetFlags,pFlags,p); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostGetFlags, pFlags, p); } -CUresult cuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep){ +CUresult cuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep) { LOG_DEBUG("cuMemPoolTrimTo"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolTrimTo,pool,minBytesToKeep); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolTrimTo, pool, minBytesToKeep); } CUresult cuMemPoolSetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void *value) { LOG_DEBUG("cuMemPoolSetAttribute"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolSetAttribute,pool,attr,value); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolSetAttribute, pool, attr, value); } CUresult cuMemPoolGetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void *value) { - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolGetAttribute,pool,attr,value); - LOG_INFO("cuMemPoolGetAttribute %d %ld",attr,*(long *)value); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolGetAttribute, pool, attr, value); + LOG_INFO("cuMemPoolGetAttribute %d %ld", attr, *(long *)value); return res; } CUresult cuMemPoolSetAccess(CUmemoryPool pool, const CUmemAccessDesc *map, size_t count) { LOG_DEBUG("cuMemPoolSetAccess"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolSetAccess,pool,map,count); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolSetAccess, pool, map, count); } -CUresult cuMemPoolGetAccess(CUmemAccess_flags *flags, CUmemoryPool memPool, CUmemLocation *location) { +CUresult cuMemPoolGetAccess(CUmemAccess_flags *flags, CUmemoryPool memPool, + CUmemLocation *location) { LOG_DEBUG("cuMemPoolGetAccess"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolGetAccess,flags,memPool,location); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolGetAccess, flags, memPool, location); } CUresult cuMemPoolCreate(CUmemoryPool *pool, const CUmemPoolProps *poolProps) { LOG_DEBUG("cuMemPoolCreate"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolCreate,pool,poolProps); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolCreate, pool, poolProps); } CUresult cuMemPoolDestroy(CUmemoryPool pool) { LOG_DEBUG("cuMemPoolDestroy"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolDestroy,pool); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolDestroy, pool); } -CUresult cuMemAllocFromPoolAsync(CUdeviceptr *dptr, size_t bytesize, CUmemoryPool pool, CUstream hStream) { +CUresult cuMemAllocFromPoolAsync(CUdeviceptr *dptr, size_t bytesize, CUmemoryPool pool, + CUstream hStream) { LOG_DEBUG("cuMemAllocFromPoolAsync"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemAllocFromPoolAsync,dptr,bytesize,pool,hStream); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocFromPoolAsync, dptr, bytesize, pool, + hStream); } -CUresult cuMemPoolExportToShareableHandle(void *handle_out, CUmemoryPool pool, CUmemAllocationHandleType handleType, unsigned long long flags) { +CUresult cuMemPoolExportToShareableHandle(void *handle_out, CUmemoryPool pool, + CUmemAllocationHandleType handleType, + unsigned long long flags) { LOG_DEBUG("cuMemPoolExportToShareableHandle"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolExportToShareableHandle,handle_out,pool,handleType,flags); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolExportToShareableHandle, handle_out, + pool, handleType, flags); } -CUresult cuMemPoolImportFromShareableHandle( - CUmemoryPool *pool_out, - void *handle, - CUmemAllocationHandleType handleType, - unsigned long long flags) { - LOG_DEBUG("cuMemPoolImportFromShareableHandle"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolImportFromShareableHandle,pool_out,handle,handleType,flags); - } +CUresult cuMemPoolImportFromShareableHandle(CUmemoryPool *pool_out, void *handle, + CUmemAllocationHandleType handleType, + unsigned long long flags) { + LOG_DEBUG("cuMemPoolImportFromShareableHandle"); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolImportFromShareableHandle, pool_out, + handle, handleType, flags); +} CUresult cuMemPoolExportPointer(CUmemPoolPtrExportData *shareData_out, CUdeviceptr ptr) { LOG_DEBUG("cuMemPoolExportPointer"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolExportPointer,shareData_out,ptr); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolExportPointer, shareData_out, ptr); } -CUresult cuMemPoolImportPointer(CUdeviceptr *ptr_out, CUmemoryPool pool, CUmemPoolPtrExportData *shareData) { +CUresult cuMemPoolImportPointer(CUdeviceptr *ptr_out, CUmemoryPool pool, + CUmemPoolPtrExportData *shareData) { LOG_DEBUG("cuMemPoolImportPointer"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolImportPointer,ptr_out,pool,shareData); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolImportPointer, ptr_out, pool, shareData); } /* CUresult cuMemcpy2D(const CUDA_MEMCPY2D *pCopy) { @@ -745,7 +787,7 @@ CUresult cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy) { CUresult cuMemcpy2DUnaligned_v2(const CUDA_MEMCPY2D *pCopy) { LOG_DEBUG("cuMemcpy2DUnaligned_v2"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpy2DUnaligned_v2,pCopy); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpy2DUnaligned_v2, pCopy); } /* CUresult cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream) { @@ -755,7 +797,7 @@ CUresult cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream) { CUresult cuMemcpy2DAsync_v2(const CUDA_MEMCPY2D *pCopy, CUstream hStream) { LOG_DEBUG("cuMemcpy2DAsync_v2"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpy2DAsync,pCopy,hStream); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpy2DAsync, pCopy, hStream); } /* @@ -766,7 +808,7 @@ CUresult cuMemcpy3D(const CUDA_MEMCPY3D *pCopy) { CUresult cuMemcpy3D_v2(const CUDA_MEMCPY3D *pCopy) { LOG_DEBUG("cuMemcpy3D_v2"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpy3D_v2,pCopy); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpy3D_v2, pCopy); } /* CUresult cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream) { @@ -776,79 +818,101 @@ CUresult cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream) { CUresult cuMemcpy3DAsync_v2(const CUDA_MEMCPY3D *pCopy, CUstream hStream) { LOG_DEBUG("cuMemcpy3DAsync_v2"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpy3DAsync_v2,pCopy,hStream); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpy3DAsync_v2, pCopy, hStream); } CUresult cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER *pCopy) { LOG_DEBUG("cuMemcpy3DPeer"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpy3DPeer,pCopy); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpy3DPeer, pCopy); } CUresult cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER *pCopy, CUstream hStream) { LOG_DEBUG("cuMemcpy3DPeerAsync"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpy3DPeerAsync,pCopy,hStream); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpy3DPeerAsync, pCopy, hStream); } #if CUDA_VERSION < 13000 -CUresult cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, CUdevice dstDevice, CUstream hStream) { +CUresult cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, CUdevice dstDevice, + CUstream hStream) { LOG_DEBUG("cuMemPrefetchAsync"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPrefetchAsync,devPtr,count,dstDevice,hStream); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPrefetchAsync, devPtr, count, dstDevice, + hStream); } #endif -CUresult cuMemPrefetchAsync_v2(CUdeviceptr devPtr, size_t count, CUmemLocation location, unsigned int flags, - CUstream hStream) { +CUresult cuMemPrefetchAsync_v2(CUdeviceptr devPtr, size_t count, CUmemLocation location, + unsigned int flags, CUstream hStream) { LOG_DEBUG("cuMemPrefetchAsync_v2"); - return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPrefetchAsync_v2, devPtr, count, location, flags, hStream); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPrefetchAsync_v2, devPtr, count, location, + flags, hStream); } -CUresult cuMemRangeGetAttribute(void *data, size_t dataSize, CUmem_range_attribute attribute, CUdeviceptr devPtr, size_t count) { +CUresult cuMemRangeGetAttribute(void *data, size_t dataSize, CUmem_range_attribute attribute, + CUdeviceptr devPtr, size_t count) { LOG_DEBUG("cuMemRangeGetAttribute"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemRangeGetAttribute,data,dataSize,attribute,devPtr,count); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemRangeGetAttribute, data, dataSize, attribute, + devPtr, count); } -CUresult cuMemRangeGetAttributes(void **data, size_t *dataSizes, CUmem_range_attribute *attributes, size_t numAttributes, CUdeviceptr devPtr, size_t count) { +CUresult cuMemRangeGetAttributes(void **data, size_t *dataSizes, CUmem_range_attribute *attributes, + size_t numAttributes, CUdeviceptr devPtr, size_t count) { LOG_DEBUG("cuMemRangeGetAttributes"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemRangeGetAttributes,data,dataSizes,attributes,numAttributes,devPtr,count); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemRangeGetAttributes, data, dataSizes, + attributes, numAttributes, devPtr, count); } /* External Resource Management */ -CUresult cuImportExternalMemory(CUexternalMemory *extMem_out, const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *memHandleDesc) { +CUresult cuImportExternalMemory(CUexternalMemory *extMem_out, + const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *memHandleDesc) { LOG_DEBUG("cuImportExternalMemory"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuImportExternalMemory,extMem_out,memHandleDesc); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuImportExternalMemory, extMem_out, + memHandleDesc); } -CUresult cuExternalMemoryGetMappedBuffer(CUdeviceptr *devPtr, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *bufferDesc) { +CUresult cuExternalMemoryGetMappedBuffer(CUdeviceptr *devPtr, CUexternalMemory extMem, + const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *bufferDesc) { LOG_DEBUG("cuExternalMemoryGetMappedBuffer"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuExternalMemoryGetMappedBuffer,devPtr,extMem,bufferDesc); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuExternalMemoryGetMappedBuffer, devPtr, extMem, + bufferDesc); } -CUresult cuExternalMemoryGetMappedMipmappedArray(CUmipmappedArray *mipmap, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *mipmapDesc) { +CUresult cuExternalMemoryGetMappedMipmappedArray( + CUmipmappedArray *mipmap, CUexternalMemory extMem, + const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *mipmapDesc) { LOG_DEBUG("cuExternalMemoryGetMappedMipmappedArray"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuExternalMemoryGetMappedMipmappedArray,mipmap,extMem,mipmapDesc); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuExternalMemoryGetMappedMipmappedArray, mipmap, + extMem, mipmapDesc); } CUresult cuDestroyExternalMemory(CUexternalMemory extMem) { LOG_DEBUG("cuDestroyExternalMemory"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuDestroyExternalMemory,extMem); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDestroyExternalMemory, extMem); } -CUresult cuImportExternalSemaphore(CUexternalSemaphore *extSem_out, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *semHandleDesc) { +CUresult cuImportExternalSemaphore(CUexternalSemaphore *extSem_out, + const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *semHandleDesc) { LOG_DEBUG("cuImportExternalSemaphore"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuImportExternalSemaphore,extSem_out,semHandleDesc); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuImportExternalSemaphore, extSem_out, + semHandleDesc); } -CUresult cuSignalExternalSemaphoresAsync(const CUexternalSemaphore *extSemArray, const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray, unsigned int numExtSems, CUstream stream) { +CUresult cuSignalExternalSemaphoresAsync(const CUexternalSemaphore *extSemArray, + const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray, + unsigned int numExtSems, CUstream stream) { LOG_DEBUG("cuSignalExternalSemaphoresAsync"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuSignalExternalSemaphoresAsync,extSemArray,paramsArray,numExtSems,stream); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuSignalExternalSemaphoresAsync, extSemArray, + paramsArray, numExtSems, stream); } -CUresult cuWaitExternalSemaphoresAsync(const CUexternalSemaphore *extSemArray, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray, unsigned int numExtSems, CUstream stream) { +CUresult cuWaitExternalSemaphoresAsync(const CUexternalSemaphore *extSemArray, + const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray, + unsigned int numExtSems, CUstream stream) { LOG_DEBUG("cuWaitExternalSemaphoresAsync"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuWaitExternalSemaphoresAsync,extSemArray,paramsArray,numExtSems,stream); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuWaitExternalSemaphoresAsync, extSemArray, + paramsArray, numExtSems, stream); } CUresult cuDestroyExternalSemaphore(CUexternalSemaphore extSem) { LOG_DEBUG("cuDestroyExternalSemaphore"); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuDestroyExternalSemaphore,extSem); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDestroyExternalSemaphore, extSem); } diff --git a/src/cuda/stream.c b/src/cuda/stream.c old mode 100755 new mode 100644 index a8643e14..c37289d7 --- a/src/cuda/stream.c +++ b/src/cuda/stream.c @@ -1,18 +1,18 @@ #include "include/libcuda_hook.h" -CUresult cuStreamCreate(CUstream *phstream, unsigned int flags){ - LOG_INFO("cuStreamCreate %p",phstream); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuStreamCreate,phstream,flags); +CUresult cuStreamCreate(CUstream *phstream, unsigned int flags) { + LOG_INFO("cuStreamCreate %p", phstream); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuStreamCreate, phstream, flags); return res; } -CUresult cuStreamDestroy_v2 ( CUstream hStream ){ - LOG_DEBUG("cuStreamDestroy_v2 %p",hStream); - return CUDA_OVERRIDE_CALL(cuda_library_entry,cuStreamDestroy_v2,hStream); +CUresult cuStreamDestroy_v2(CUstream hStream) { + LOG_DEBUG("cuStreamDestroy_v2 %p", hStream); + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuStreamDestroy_v2, hStream); } -CUresult cuStreamSynchronize(CUstream hstream){ - LOG_DEBUG("cuStreamSync %p",hstream); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuStreamSynchronize,hstream); +CUresult cuStreamSynchronize(CUstream hstream) { + LOG_DEBUG("cuStreamSync %p", hstream); + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuStreamSynchronize, hstream); return res; } diff --git a/src/include/libcuda_hook.h b/src/include/libcuda_hook.h index 1e5cb777..ed5eaf92 100644 --- a/src/include/libcuda_hook.h +++ b/src/include/libcuda_hook.h @@ -1,23 +1,24 @@ #ifndef __LIBCUDA_HOOK_H__ #define __LIBCUDA_HOOK_H__ +#include +#include #include #include #include #include #include #include -#include #include -#include #define NVML_NO_UNVERSIONED_FUNC_DEFS #include #include + #include "include/log_utils.h" typedef struct { - void *fn_ptr; - char *name; + void *fn_ptr; + char *name; } cuda_entry_t; #define FILENAME_MAX 4096 @@ -30,15 +31,15 @@ typedef CUresult (*cuda_sym_t)(); #define CUDA_FIND_ENTRY(table, sym) ({ (table)[CUDA_OVERRIDE_ENUM(sym)].fn_ptr; }) -#define CUDA_OVERRIDE_CALL(table, sym, ...) \ - ({ \ - LOG_DEBUG("Hijacking %s", #sym); \ - cuda_sym_t _entry = (cuda_sym_t)CUDA_FIND_ENTRY(table, sym); \ - if (_entry == NULL) { \ - LOG_ERROR("Hijack failed: %s is NULL", #sym); \ - } \ - _entry(__VA_ARGS__); \ - }) +#define CUDA_OVERRIDE_CALL(table, sym, ...) \ + ({ \ + LOG_DEBUG("Hijacking %s", #sym); \ + cuda_sym_t _entry = (cuda_sym_t)CUDA_FIND_ENTRY(table, sym); \ + if (_entry == NULL) { \ + LOG_ERROR("Hijack failed: %s is NULL", #sym); \ + } \ + _entry(__VA_ARGS__); \ + }) typedef enum { /* cuInit Part */ @@ -87,7 +88,7 @@ typedef enum { CUDA_OVERRIDE_ENUM(cuCtxSetLimit), CUDA_OVERRIDE_ENUM(cuCtxSetSharedMemConfig), CUDA_OVERRIDE_ENUM(cuCtxSynchronize), - //CUDA_OVERRIDE_ENUM(cuCtxEnablePeerAccess), + // CUDA_OVERRIDE_ENUM(cuCtxEnablePeerAccess), CUDA_OVERRIDE_ENUM(cuGetExportTable), /* cuStream Part */ @@ -268,14 +269,14 @@ typedef enum { CUDA_OVERRIDE_ENUM(cuGetProcAddress), CUDA_OVERRIDE_ENUM(cuGetProcAddress_v2), CUDA_ENTRY_END -}cuda_override_enum_t; +} cuda_override_enum_t; extern cuda_entry_t cuda_library_entry[]; #endif #undef cuGetProcAddress -CUresult cuGetProcAddress( const char* symbol, void** pfn, int cudaVersion, cuuint64_t flags ); +CUresult cuGetProcAddress(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags); #undef cuGraphInstantiate -CUresult cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, CUgraphNode *phErrorNode, char *logBuffer, size_t bufferSize); - +CUresult cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, CUgraphNode *phErrorNode, + char *logBuffer, size_t bufferSize); diff --git a/src/include/libnvml_hook.h b/src/include/libnvml_hook.h index 0e0b564e..03dd1efe 100644 --- a/src/include/libnvml_hook.h +++ b/src/include/libnvml_hook.h @@ -1,19 +1,20 @@ #ifndef __LIBNVML_HOOK_H__ #define __LIBNVML_HOOK_H__ +#include +#include +#include #include #include +#include #include #include #include #include -#include #include -#include -#include -#include -#include "include/nvml-subset.h" + #include "include/log_utils.h" +#include "include/nvml-subset.h" #include "include/nvml_prefix.h" #define FILENAME_MAX 4096 @@ -24,512 +25,512 @@ typedef nvmlReturn_t (*driver_sym_t)(); #define NVML_FIND_ENTRY(table, sym) ({ (table)[NVML_OVERRIDE_ENUM(sym)].fn_ptr; }) -#define NVML_OVERRIDE_CALL(table, sym, ...) \ - ({ \ - LOG_DEBUG("Hijacking %s", #sym); \ - driver_sym_t _entry = NVML_FIND_ENTRY(table, sym); \ - _entry(__VA_ARGS__); \ - }) +#define NVML_OVERRIDE_CALL(table, sym, ...) \ + ({ \ + LOG_DEBUG("Hijacking %s", #sym); \ + driver_sym_t _entry = NVML_FIND_ENTRY(table, sym); \ + _entry(__VA_ARGS__); \ + }) -#define NVML_OVERRIDE_CALL_NO_LOG(table, sym, ...) \ - ({ \ - driver_sym_t _entry = NVML_FIND_ENTRY(table, sym); \ - _entry(__VA_ARGS__); \ - }) +#define NVML_OVERRIDE_CALL_NO_LOG(table, sym, ...) \ + ({ \ + driver_sym_t _entry = NVML_FIND_ENTRY(table, sym); \ + _entry(__VA_ARGS__); \ + }) /** * NVML management library enumerator entry */ typedef enum { - /** nvmlInit */ - NVML_OVERRIDE_ENUM(nvmlInit), - /** nvmlShutdown */ - NVML_OVERRIDE_ENUM(nvmlShutdown), - /** nvmlErrorString */ - NVML_OVERRIDE_ENUM(nvmlErrorString), - /** nvmlDeviceGetHandleByIndex */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByIndex), - /** nvmlDeviceGetComputeRunningProcesses */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetComputeRunningProcesses), - /** nvmlDeviceGetPciInfo */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetPciInfo), - /** nvmlDeviceGetProcessUtilization */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetProcessUtilization), - /** nvmlDeviceGetCount */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetCount), - /** nvmlDeviceClearAccountingPids */ - NVML_OVERRIDE_ENUM(nvmlDeviceClearAccountingPids), - /** nvmlDeviceClearCpuAffinity */ - NVML_OVERRIDE_ENUM(nvmlDeviceClearCpuAffinity), - /** nvmlDeviceClearEccErrorCounts */ - NVML_OVERRIDE_ENUM(nvmlDeviceClearEccErrorCounts), - /** nvmlDeviceDiscoverGpus */ - NVML_OVERRIDE_ENUM(nvmlDeviceDiscoverGpus), - /** nvmlDeviceFreezeNvLinkUtilizationCounter */ - NVML_OVERRIDE_ENUM(nvmlDeviceFreezeNvLinkUtilizationCounter), - /** nvmlDeviceGetAccountingBufferSize */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetAccountingBufferSize), - /** nvmlDeviceGetAccountingMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetAccountingMode), - /** nvmlDeviceGetAccountingPids */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetAccountingPids), - /** nvmlDeviceGetAccountingStats */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetAccountingStats), - /** nvmlDeviceGetActiveVgpus */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetActiveVgpus), - /** nvmlDeviceGetAPIRestriction */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetAPIRestriction), - /** nvmlDeviceGetApplicationsClock */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetApplicationsClock), - /** nvmlDeviceGetAutoBoostedClocksEnabled */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetAutoBoostedClocksEnabled), - /** nvmlDeviceGetBAR1MemoryInfo */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetBAR1MemoryInfo), - /** nvmlDeviceGetBoardId */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetBoardId), - /** nvmlDeviceGetBoardPartNumber */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetBoardPartNumber), - /** nvmlDeviceGetBrand */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetBrand), - /** nvmlDeviceGetBridgeChipInfo */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetBridgeChipInfo), - /** nvmlDeviceGetClock */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetClock), - /** nvmlDeviceGetClockInfo */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetClockInfo), - /** nvmlDeviceGetComputeMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetComputeMode), - /** nvmlDeviceGetCount_v2 */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetCount_v2), - /** nvmlDeviceGetCpuAffinity */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetCpuAffinity), - /** nvmlDeviceGetCreatableVgpus */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetCreatableVgpus), - /** nvmlDeviceGetCudaComputeCapability */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetCudaComputeCapability), - /** nvmlDeviceGetCurrentClocksThrottleReasons */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetCurrentClocksThrottleReasons), - /** nvmlDeviceGetCurrPcieLinkGeneration */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetCurrPcieLinkGeneration), - /** nvmlDeviceGetCurrPcieLinkWidth */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetCurrPcieLinkWidth), - /** nvmlDeviceGetDecoderUtilization */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetDecoderUtilization), - /** nvmlDeviceGetDefaultApplicationsClock */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetDefaultApplicationsClock), - /** nvmlDeviceGetDetailedEccErrors */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetDetailedEccErrors), - /** nvmlDeviceGetDisplayActive */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetDisplayActive), - /** nvmlDeviceGetDisplayMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetDisplayMode), - /** nvmlDeviceGetDriverModel */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetDriverModel), - /** nvmlDeviceGetEccMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetEccMode), - /** nvmlDeviceGetEncoderCapacity */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetEncoderCapacity), - /** nvmlDeviceGetEncoderSessions */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetEncoderSessions), - /** nvmlDeviceGetEncoderStats */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetEncoderStats), - /** nvmlDeviceGetEncoderUtilization */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetEncoderUtilization), - /** nvmlDeviceGetEnforcedPowerLimit */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetEnforcedPowerLimit), - /** nvmlDeviceGetFanSpeed */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetFanSpeed), - /** nvmlDeviceGetFanSpeed_v2 */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetFanSpeed_v2), - /** nvmlDeviceGetFieldValues */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetFieldValues), - /** nvmlDeviceGetGpuOperationMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuOperationMode), - /** nvmlDeviceGetGraphicsRunningProcesses */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetGraphicsRunningProcesses), - /** nvmlDeviceGetGridLicensableFeatures */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetGridLicensableFeatures), - /** nvmlDeviceGetHandleByIndex_v2 */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByIndex_v2), - /** nvmlDeviceGetHandleByPciBusId */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByPciBusId), - /** nvmlDeviceGetHandleByPciBusId_v2 */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByPciBusId_v2), - /** nvmlDeviceGetHandleBySerial */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleBySerial), - /** nvmlDeviceGetHandleByUUID */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByUUID), - /** nvmlDeviceGetIndex */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetIndex), - /** nvmlDeviceGetInforomConfigurationChecksum */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetInforomConfigurationChecksum), - /** nvmlDeviceGetInforomImageVersion */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetInforomImageVersion), - /** nvmlDeviceGetInforomVersion */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetInforomVersion), - /** nvmlDeviceGetMaxClockInfo */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxClockInfo), - /** nvmlDeviceGetMaxCustomerBoostClock */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxCustomerBoostClock), - /** nvmlDeviceGetMaxPcieLinkGeneration */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxPcieLinkGeneration), - /** nvmlDeviceGetMaxPcieLinkWidth */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxPcieLinkWidth), - /** nvmlDeviceGetMemoryErrorCounter */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetMemoryErrorCounter), - /** nvmlDeviceGetMemoryInfo */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetMemoryInfo), - /** nvmlDeviceGetMemoryInfo_v2 */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetMemoryInfo_v2), - /** nvmlDeviceGetMinorNumber */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetMinorNumber), - /** nvmlDeviceGetMPSComputeRunningProcesses */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetMPSComputeRunningProcesses), - /** nvmlDeviceGetMultiGpuBoard */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetMultiGpuBoard), - /** nvmlDeviceGetName */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetName), - /** nvmlDeviceGetNvLinkCapability */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkCapability), - /** nvmlDeviceGetNvLinkErrorCounter */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkErrorCounter), - /** nvmlDeviceGetNvLinkRemotePciInfo */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkRemotePciInfo), - /** nvmlDeviceGetNvLinkRemotePciInfo_v2 */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkRemotePciInfo_v2), - /** nvmlDeviceGetNvLinkState */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkState), - /** nvmlDeviceGetNvLinkUtilizationControl */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkUtilizationControl), - /** nvmlDeviceGetNvLinkUtilizationCounter */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkUtilizationCounter), - /** nvmlDeviceGetNvLinkVersion */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkVersion), - /** nvmlDeviceGetP2PStatus */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetP2PStatus), - /** nvmlDeviceGetPcieReplayCounter */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetPcieReplayCounter), - /** nvmlDeviceGetPcieThroughput */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetPcieThroughput), - /** nvmlDeviceGetPciInfo_v2 */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetPciInfo_v2), - /** nvmlDeviceGetPciInfo_v3 */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetPciInfo_v3), - /** nvmlDeviceGetPerformanceState */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetPerformanceState), - /** nvmlDeviceGetPersistenceMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetPersistenceMode), - /** nvmlDeviceGetPowerManagementDefaultLimit */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerManagementDefaultLimit), - /** nvmlDeviceGetPowerManagementLimit */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerManagementLimit), - /** nvmlDeviceGetPowerManagementLimitConstraints */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerManagementLimitConstraints), - /** nvmlDeviceGetPowerManagementMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerManagementMode), - /** nvmlDeviceGetPowerState */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerState), - /** nvmlDeviceGetPowerUsage */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerUsage), - /** nvmlDeviceGetRetiredPages */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetRetiredPages), - /** nvmlDeviceGetRetiredPagesPendingStatus */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetRetiredPagesPendingStatus), - /** nvmlDeviceGetSamples */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetSamples), - /** nvmlDeviceGetSerial */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetSerial), - /** nvmlDeviceGetSupportedClocksThrottleReasons */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedClocksThrottleReasons), - /** nvmlDeviceGetSupportedEventTypes */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedEventTypes), - /** nvmlDeviceGetSupportedGraphicsClocks */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedGraphicsClocks), - /** nvmlDeviceGetSupportedMemoryClocks */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedMemoryClocks), - /** nvmlDeviceGetSupportedVgpus */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedVgpus), - /** nvmlDeviceGetTemperature */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetTemperature), - /** nvmlDeviceGetTemperatureThreshold */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetTemperatureThreshold), - /** nvmlDeviceGetTopologyCommonAncestor */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetTopologyCommonAncestor), - /** nvmlDeviceGetTopologyNearestGpus */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetTopologyNearestGpus), - /** nvmlDeviceGetTotalEccErrors */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetTotalEccErrors), - /** nvmlDeviceGetTotalEnergyConsumption */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetTotalEnergyConsumption), - /** nvmlDeviceGetUtilizationRates */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetUtilizationRates), - /** nvmlDeviceGetUUID */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetUUID), - /** nvmlDeviceGetVbiosVersion */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetVbiosVersion), - /** nvmlDeviceGetVgpuMetadata */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetVgpuMetadata), - /** nvmlDeviceGetVgpuProcessUtilization */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetVgpuProcessUtilization), - /** nvmlDeviceGetVgpuUtilization */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetVgpuUtilization), - /** nvmlDeviceGetViolationStatus */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetViolationStatus), - /** nvmlDeviceGetVirtualizationMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetVirtualizationMode), - /** nvmlDeviceModifyDrainState */ - NVML_OVERRIDE_ENUM(nvmlDeviceModifyDrainState), - /** nvmlDeviceOnSameBoard */ - NVML_OVERRIDE_ENUM(nvmlDeviceOnSameBoard), - /** nvmlDeviceQueryDrainState */ - NVML_OVERRIDE_ENUM(nvmlDeviceQueryDrainState), - /** nvmlDeviceRegisterEvents */ - NVML_OVERRIDE_ENUM(nvmlDeviceRegisterEvents), - /** nvmlDeviceRemoveGpu */ - NVML_OVERRIDE_ENUM(nvmlDeviceRemoveGpu), - /** nvmlDeviceRemoveGpu_v2 */ - NVML_OVERRIDE_ENUM(nvmlDeviceRemoveGpu_v2), - /** nvmlDeviceResetApplicationsClocks */ - NVML_OVERRIDE_ENUM(nvmlDeviceResetApplicationsClocks), - /** nvmlDeviceResetNvLinkErrorCounters */ - NVML_OVERRIDE_ENUM(nvmlDeviceResetNvLinkErrorCounters), - /** nvmlDeviceResetNvLinkUtilizationCounter */ - NVML_OVERRIDE_ENUM(nvmlDeviceResetNvLinkUtilizationCounter), - /** nvmlDeviceSetAccountingMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetAccountingMode), - /** nvmlDeviceSetAPIRestriction */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetAPIRestriction), - /** nvmlDeviceSetApplicationsClocks */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetApplicationsClocks), - /** nvmlDeviceSetAutoBoostedClocksEnabled */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetAutoBoostedClocksEnabled), - /** nvmlDeviceSetComputeMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetComputeMode), - /** nvmlDeviceSetCpuAffinity */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetCpuAffinity), - /** nvmlDeviceSetDefaultAutoBoostedClocksEnabled */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetDefaultAutoBoostedClocksEnabled), - /** nvmlDeviceSetDriverModel */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetDriverModel), - /** nvmlDeviceSetEccMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetEccMode), - /** nvmlDeviceSetGpuOperationMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetGpuOperationMode), - /** nvmlDeviceSetNvLinkUtilizationControl */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetNvLinkUtilizationControl), - /** nvmlDeviceSetPersistenceMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetPersistenceMode), - /** nvmlDeviceSetPowerManagementLimit */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetPowerManagementLimit), - /** nvmlDeviceSetVirtualizationMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetVirtualizationMode), - /** nvmlDeviceValidateInforom */ - NVML_OVERRIDE_ENUM(nvmlDeviceValidateInforom), - /** nvmlEventSetCreate */ - NVML_OVERRIDE_ENUM(nvmlEventSetCreate), - /** nvmlEventSetFree */ - NVML_OVERRIDE_ENUM(nvmlEventSetFree), - /** nvmlEventSetWait */ - NVML_OVERRIDE_ENUM(nvmlEventSetWait), - /** nvmlGetVgpuCompatibility */ - NVML_OVERRIDE_ENUM(nvmlGetVgpuCompatibility), - /** nvmlInit_v2 */ - NVML_OVERRIDE_ENUM(nvmlInit_v2), - /** nvmlInitWithFlags */ - NVML_OVERRIDE_ENUM(nvmlInitWithFlags), - /** nvmlInternalGetExportTable */ - NVML_OVERRIDE_ENUM(nvmlInternalGetExportTable), - /** nvmlSystemGetCudaDriverVersion */ - NVML_OVERRIDE_ENUM(nvmlSystemGetCudaDriverVersion), - /** nvmlSystemGetCudaDriverVersion_v2 */ - NVML_OVERRIDE_ENUM(nvmlSystemGetCudaDriverVersion_v2), - /** nvmlSystemGetDriverVersion */ - NVML_OVERRIDE_ENUM(nvmlSystemGetDriverVersion), - /** nvmlSystemGetHicVersion */ - NVML_OVERRIDE_ENUM(nvmlSystemGetHicVersion), - /** nvmlSystemGetNVMLVersion */ - NVML_OVERRIDE_ENUM(nvmlSystemGetNVMLVersion), - /** nvmlSystemGetProcessName */ - NVML_OVERRIDE_ENUM(nvmlSystemGetProcessName), - /** nvmlSystemGetTopologyGpuSet */ - NVML_OVERRIDE_ENUM(nvmlSystemGetTopologyGpuSet), - /** nvmlUnitGetCount */ - NVML_OVERRIDE_ENUM(nvmlUnitGetCount), - /** nvmlUnitGetDevices */ - NVML_OVERRIDE_ENUM(nvmlUnitGetDevices), - /** nvmlUnitGetFanSpeedInfo */ - NVML_OVERRIDE_ENUM(nvmlUnitGetFanSpeedInfo), - /** nvmlUnitGetHandleByIndex */ - NVML_OVERRIDE_ENUM(nvmlUnitGetHandleByIndex), - /** nvmlUnitGetLedState */ - NVML_OVERRIDE_ENUM(nvmlUnitGetLedState), - /** nvmlUnitGetPsuInfo */ - NVML_OVERRIDE_ENUM(nvmlUnitGetPsuInfo), - /** nvmlUnitGetTemperature */ - NVML_OVERRIDE_ENUM(nvmlUnitGetTemperature), - /** nvmlUnitGetUnitInfo */ - NVML_OVERRIDE_ENUM(nvmlUnitGetUnitInfo), - /** nvmlUnitSetLedState */ - NVML_OVERRIDE_ENUM(nvmlUnitSetLedState), - /** nvmlVgpuInstanceGetEncoderCapacity */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetEncoderCapacity), - /** nvmlVgpuInstanceGetEncoderSessions */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetEncoderSessions), - /** nvmlVgpuInstanceGetEncoderStats */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetEncoderStats), - /** nvmlVgpuInstanceGetFbUsage */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetFbUsage), - /** nvmlVgpuInstanceGetFrameRateLimit */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetFrameRateLimit), - /** nvmlVgpuInstanceGetLicenseStatus */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetLicenseStatus), - /** nvmlVgpuInstanceGetMetadata */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetMetadata), - /** nvmlVgpuInstanceGetType */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetType), - /** nvmlVgpuInstanceGetUUID */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetUUID), - /** nvmlVgpuInstanceGetVmDriverVersion */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetVmDriverVersion), - /** nvmlVgpuInstanceGetVmID */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetVmID), - /** nvmlVgpuInstanceSetEncoderCapacity */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceSetEncoderCapacity), - /** nvmlVgpuTypeGetClass */ - NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetClass), - /** nvmlVgpuTypeGetDeviceID */ - NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetDeviceID), - /** nvmlVgpuTypeGetFramebufferSize */ - NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetFramebufferSize), - /** nvmlVgpuTypeGetFrameRateLimit */ - NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetFrameRateLimit), - /** nvmlVgpuTypeGetLicense */ - NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetLicense), - /** nvmlVgpuTypeGetMaxInstances */ - NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetMaxInstances), - /** nvmlVgpuTypeGetName */ - NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetName), - /** nvmlVgpuTypeGetNumDisplayHeads */ - NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetNumDisplayHeads), - /** nvmlVgpuTypeGetResolution */ - NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetResolution), - /** nvmlDeviceGetFBCSessions */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetFBCSessions), - /** nvmlDeviceGetFBCStats */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetFBCStats), - /** nvmlDeviceGetGridLicensableFeatures_v2 */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetGridLicensableFeatures_v2), - /** nvmlDeviceGetRetiredPages_v2 */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetRetiredPages_v2), - /** nvmlDeviceResetGpuLockedClocks */ - NVML_OVERRIDE_ENUM(nvmlDeviceResetGpuLockedClocks), - /** nvmlDeviceSetGpuLockedClocks */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetGpuLockedClocks), - /** nvmlGetBlacklistDeviceCount */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetAccountingMode), - /** nvmlVgpuInstanceGetAccountingPids */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetAccountingPids), - /** nvmlVgpuInstanceGetAccountingStats */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetAccountingStats), - /** nvmlVgpuInstanceGetFBCSessions */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetFBCSessions), - /** nvmlVgpuInstanceGetFBCStats */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetFBCStats), - /** nvmlVgpuTypeGetMaxInstancesPerVm */ - NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetMaxInstancesPerVm), - /** nvmlGetVgpuVersion */ - NVML_OVERRIDE_ENUM(nvmlGetVgpuVersion), - /** nvmlSetVgpuVersion */ - NVML_OVERRIDE_ENUM(nvmlSetVgpuVersion), - /** nvmlDeviceGetGridLicensableFeatures_v3 */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetGridLicensableFeatures_v3), - /** nvmlDeviceGetHostVgpuMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetHostVgpuMode), - /** nvmlDeviceGetPgpuMetadataString */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetPgpuMetadataString), - /** nvmlVgpuInstanceGetEccMode */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetEccMode), - /** nvmlComputeInstanceDestroy */ - NVML_OVERRIDE_ENUM(nvmlComputeInstanceDestroy), - /** nvmlComputeInstanceGetInfo */ - NVML_OVERRIDE_ENUM(nvmlComputeInstanceGetInfo), - /** nvmlDeviceCreateGpuInstance */ - NVML_OVERRIDE_ENUM(nvmlDeviceCreateGpuInstance), - /** nvmlDeviceGetArchitecture */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetArchitecture), - /** nvmlDeviceGetAttributes */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetAttributes), - /** nvmlDeviceGetAttributes_v2 */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetAttributes_v2), - /** nvmlDeviceGetComputeInstanceId */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetComputeInstanceId), - /** nvmlDeviceGetCpuAffinityWithinScope */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetCpuAffinityWithinScope), - /** nvmlDeviceGetDeviceHandleFromMigDeviceHandle */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetDeviceHandleFromMigDeviceHandle), - /** nvmlDeviceGetGpuInstanceById */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstanceById), - /** nvmlDeviceGetGpuInstanceId */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstanceId), - /** nvmlDeviceGetGpuInstancePossiblePlacements */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstancePossiblePlacements), - /** nvmlDeviceGetGpuInstanceProfileInfo */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstanceProfileInfo), - /** nvmlDeviceGetGpuInstanceRemainingCapacity */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstanceRemainingCapacity), - /** nvmlDeviceGetGpuInstances */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstances), - /** nvmlDeviceGetMaxMigDeviceCount */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxMigDeviceCount), - /** nvmlDeviceGetMemoryAffinity */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetMemoryAffinity), - /** nvmlDeviceGetMigDeviceHandleByIndex */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetMigDeviceHandleByIndex), - /** nvmlDeviceGetMigMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetMigMode), - /** nvmlDeviceGetRemappedRows */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetRemappedRows), - /** nvmlDeviceGetRowRemapperHistogram */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetRowRemapperHistogram), - /** nvmlDeviceIsMigDeviceHandle */ - NVML_OVERRIDE_ENUM(nvmlDeviceIsMigDeviceHandle), - /** nvmlDeviceSetMigMode */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetMigMode), - /** nvmlEventSetWait_v2 */ - NVML_OVERRIDE_ENUM(nvmlEventSetWait_v2), - /** nvmlGpuInstanceCreateComputeInstance */ - NVML_OVERRIDE_ENUM(nvmlGpuInstanceCreateComputeInstance), - /** nvmlGpuInstanceDestroy */ - NVML_OVERRIDE_ENUM(nvmlGpuInstanceDestroy), - /** nvmlGpuInstanceGetComputeInstanceById */ - NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetComputeInstanceById), - /** nvmlGpuInstanceGetComputeInstanceProfileInfo */ - NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetComputeInstanceProfileInfo), - /** nvmlGpuInstanceGetComputeInstanceRemainingCapacity */ - NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetComputeInstanceRemainingCapacity), - /** nvmlGpuInstanceGetComputeInstances */ - NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetComputeInstances), - /** nvmlGpuInstanceGetInfo */ - NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetInfo), - /** nvmlVgpuInstanceClearAccountingPids */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceClearAccountingPids), - /** nvmlVgpuInstanceGetMdevUUID */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetMdevUUID), - /** nvmlComputeInstanceGetInfo_v2 */ - NVML_OVERRIDE_ENUM(nvmlComputeInstanceGetInfo_v2), - /** nvmlDeviceGetComputeRunningProcesses_v2 */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetComputeRunningProcesses_v2), - /** nvmlDeviceGetGraphicsRunningProcesses_v2 */ - NVML_OVERRIDE_ENUM(nvmlDeviceGetGraphicsRunningProcesses_v2), - /** nvmlDeviceSetTemperatureThreshold */ - NVML_OVERRIDE_ENUM(nvmlDeviceSetTemperatureThreshold), - /** nvmlRetry_NvRmControl */ - //NVML_OVERRIDE_ENUM(nvmlRetry_NvRmControl), - /** nvmlVgpuInstanceGetGpuInstanceId */ - NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetGpuInstanceId), - /** nvmlVgpuTypeGetGpuInstanceProfileId */ - NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetGpuInstanceProfileId), - NVML_ENTRY_END + /** nvmlInit */ + NVML_OVERRIDE_ENUM(nvmlInit), + /** nvmlShutdown */ + NVML_OVERRIDE_ENUM(nvmlShutdown), + /** nvmlErrorString */ + NVML_OVERRIDE_ENUM(nvmlErrorString), + /** nvmlDeviceGetHandleByIndex */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByIndex), + /** nvmlDeviceGetComputeRunningProcesses */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetComputeRunningProcesses), + /** nvmlDeviceGetPciInfo */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetPciInfo), + /** nvmlDeviceGetProcessUtilization */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetProcessUtilization), + /** nvmlDeviceGetCount */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetCount), + /** nvmlDeviceClearAccountingPids */ + NVML_OVERRIDE_ENUM(nvmlDeviceClearAccountingPids), + /** nvmlDeviceClearCpuAffinity */ + NVML_OVERRIDE_ENUM(nvmlDeviceClearCpuAffinity), + /** nvmlDeviceClearEccErrorCounts */ + NVML_OVERRIDE_ENUM(nvmlDeviceClearEccErrorCounts), + /** nvmlDeviceDiscoverGpus */ + NVML_OVERRIDE_ENUM(nvmlDeviceDiscoverGpus), + /** nvmlDeviceFreezeNvLinkUtilizationCounter */ + NVML_OVERRIDE_ENUM(nvmlDeviceFreezeNvLinkUtilizationCounter), + /** nvmlDeviceGetAccountingBufferSize */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetAccountingBufferSize), + /** nvmlDeviceGetAccountingMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetAccountingMode), + /** nvmlDeviceGetAccountingPids */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetAccountingPids), + /** nvmlDeviceGetAccountingStats */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetAccountingStats), + /** nvmlDeviceGetActiveVgpus */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetActiveVgpus), + /** nvmlDeviceGetAPIRestriction */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetAPIRestriction), + /** nvmlDeviceGetApplicationsClock */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetApplicationsClock), + /** nvmlDeviceGetAutoBoostedClocksEnabled */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetAutoBoostedClocksEnabled), + /** nvmlDeviceGetBAR1MemoryInfo */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetBAR1MemoryInfo), + /** nvmlDeviceGetBoardId */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetBoardId), + /** nvmlDeviceGetBoardPartNumber */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetBoardPartNumber), + /** nvmlDeviceGetBrand */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetBrand), + /** nvmlDeviceGetBridgeChipInfo */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetBridgeChipInfo), + /** nvmlDeviceGetClock */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetClock), + /** nvmlDeviceGetClockInfo */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetClockInfo), + /** nvmlDeviceGetComputeMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetComputeMode), + /** nvmlDeviceGetCount_v2 */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetCount_v2), + /** nvmlDeviceGetCpuAffinity */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetCpuAffinity), + /** nvmlDeviceGetCreatableVgpus */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetCreatableVgpus), + /** nvmlDeviceGetCudaComputeCapability */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetCudaComputeCapability), + /** nvmlDeviceGetCurrentClocksThrottleReasons */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetCurrentClocksThrottleReasons), + /** nvmlDeviceGetCurrPcieLinkGeneration */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetCurrPcieLinkGeneration), + /** nvmlDeviceGetCurrPcieLinkWidth */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetCurrPcieLinkWidth), + /** nvmlDeviceGetDecoderUtilization */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetDecoderUtilization), + /** nvmlDeviceGetDefaultApplicationsClock */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetDefaultApplicationsClock), + /** nvmlDeviceGetDetailedEccErrors */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetDetailedEccErrors), + /** nvmlDeviceGetDisplayActive */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetDisplayActive), + /** nvmlDeviceGetDisplayMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetDisplayMode), + /** nvmlDeviceGetDriverModel */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetDriverModel), + /** nvmlDeviceGetEccMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetEccMode), + /** nvmlDeviceGetEncoderCapacity */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetEncoderCapacity), + /** nvmlDeviceGetEncoderSessions */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetEncoderSessions), + /** nvmlDeviceGetEncoderStats */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetEncoderStats), + /** nvmlDeviceGetEncoderUtilization */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetEncoderUtilization), + /** nvmlDeviceGetEnforcedPowerLimit */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetEnforcedPowerLimit), + /** nvmlDeviceGetFanSpeed */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetFanSpeed), + /** nvmlDeviceGetFanSpeed_v2 */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetFanSpeed_v2), + /** nvmlDeviceGetFieldValues */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetFieldValues), + /** nvmlDeviceGetGpuOperationMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuOperationMode), + /** nvmlDeviceGetGraphicsRunningProcesses */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetGraphicsRunningProcesses), + /** nvmlDeviceGetGridLicensableFeatures */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetGridLicensableFeatures), + /** nvmlDeviceGetHandleByIndex_v2 */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByIndex_v2), + /** nvmlDeviceGetHandleByPciBusId */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByPciBusId), + /** nvmlDeviceGetHandleByPciBusId_v2 */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByPciBusId_v2), + /** nvmlDeviceGetHandleBySerial */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleBySerial), + /** nvmlDeviceGetHandleByUUID */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByUUID), + /** nvmlDeviceGetIndex */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetIndex), + /** nvmlDeviceGetInforomConfigurationChecksum */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetInforomConfigurationChecksum), + /** nvmlDeviceGetInforomImageVersion */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetInforomImageVersion), + /** nvmlDeviceGetInforomVersion */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetInforomVersion), + /** nvmlDeviceGetMaxClockInfo */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxClockInfo), + /** nvmlDeviceGetMaxCustomerBoostClock */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxCustomerBoostClock), + /** nvmlDeviceGetMaxPcieLinkGeneration */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxPcieLinkGeneration), + /** nvmlDeviceGetMaxPcieLinkWidth */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxPcieLinkWidth), + /** nvmlDeviceGetMemoryErrorCounter */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetMemoryErrorCounter), + /** nvmlDeviceGetMemoryInfo */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetMemoryInfo), + /** nvmlDeviceGetMemoryInfo_v2 */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetMemoryInfo_v2), + /** nvmlDeviceGetMinorNumber */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetMinorNumber), + /** nvmlDeviceGetMPSComputeRunningProcesses */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetMPSComputeRunningProcesses), + /** nvmlDeviceGetMultiGpuBoard */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetMultiGpuBoard), + /** nvmlDeviceGetName */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetName), + /** nvmlDeviceGetNvLinkCapability */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkCapability), + /** nvmlDeviceGetNvLinkErrorCounter */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkErrorCounter), + /** nvmlDeviceGetNvLinkRemotePciInfo */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkRemotePciInfo), + /** nvmlDeviceGetNvLinkRemotePciInfo_v2 */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkRemotePciInfo_v2), + /** nvmlDeviceGetNvLinkState */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkState), + /** nvmlDeviceGetNvLinkUtilizationControl */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkUtilizationControl), + /** nvmlDeviceGetNvLinkUtilizationCounter */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkUtilizationCounter), + /** nvmlDeviceGetNvLinkVersion */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkVersion), + /** nvmlDeviceGetP2PStatus */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetP2PStatus), + /** nvmlDeviceGetPcieReplayCounter */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetPcieReplayCounter), + /** nvmlDeviceGetPcieThroughput */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetPcieThroughput), + /** nvmlDeviceGetPciInfo_v2 */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetPciInfo_v2), + /** nvmlDeviceGetPciInfo_v3 */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetPciInfo_v3), + /** nvmlDeviceGetPerformanceState */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetPerformanceState), + /** nvmlDeviceGetPersistenceMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetPersistenceMode), + /** nvmlDeviceGetPowerManagementDefaultLimit */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerManagementDefaultLimit), + /** nvmlDeviceGetPowerManagementLimit */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerManagementLimit), + /** nvmlDeviceGetPowerManagementLimitConstraints */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerManagementLimitConstraints), + /** nvmlDeviceGetPowerManagementMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerManagementMode), + /** nvmlDeviceGetPowerState */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerState), + /** nvmlDeviceGetPowerUsage */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerUsage), + /** nvmlDeviceGetRetiredPages */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetRetiredPages), + /** nvmlDeviceGetRetiredPagesPendingStatus */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetRetiredPagesPendingStatus), + /** nvmlDeviceGetSamples */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetSamples), + /** nvmlDeviceGetSerial */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetSerial), + /** nvmlDeviceGetSupportedClocksThrottleReasons */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedClocksThrottleReasons), + /** nvmlDeviceGetSupportedEventTypes */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedEventTypes), + /** nvmlDeviceGetSupportedGraphicsClocks */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedGraphicsClocks), + /** nvmlDeviceGetSupportedMemoryClocks */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedMemoryClocks), + /** nvmlDeviceGetSupportedVgpus */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedVgpus), + /** nvmlDeviceGetTemperature */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetTemperature), + /** nvmlDeviceGetTemperatureThreshold */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetTemperatureThreshold), + /** nvmlDeviceGetTopologyCommonAncestor */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetTopologyCommonAncestor), + /** nvmlDeviceGetTopologyNearestGpus */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetTopologyNearestGpus), + /** nvmlDeviceGetTotalEccErrors */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetTotalEccErrors), + /** nvmlDeviceGetTotalEnergyConsumption */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetTotalEnergyConsumption), + /** nvmlDeviceGetUtilizationRates */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetUtilizationRates), + /** nvmlDeviceGetUUID */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetUUID), + /** nvmlDeviceGetVbiosVersion */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetVbiosVersion), + /** nvmlDeviceGetVgpuMetadata */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetVgpuMetadata), + /** nvmlDeviceGetVgpuProcessUtilization */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetVgpuProcessUtilization), + /** nvmlDeviceGetVgpuUtilization */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetVgpuUtilization), + /** nvmlDeviceGetViolationStatus */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetViolationStatus), + /** nvmlDeviceGetVirtualizationMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetVirtualizationMode), + /** nvmlDeviceModifyDrainState */ + NVML_OVERRIDE_ENUM(nvmlDeviceModifyDrainState), + /** nvmlDeviceOnSameBoard */ + NVML_OVERRIDE_ENUM(nvmlDeviceOnSameBoard), + /** nvmlDeviceQueryDrainState */ + NVML_OVERRIDE_ENUM(nvmlDeviceQueryDrainState), + /** nvmlDeviceRegisterEvents */ + NVML_OVERRIDE_ENUM(nvmlDeviceRegisterEvents), + /** nvmlDeviceRemoveGpu */ + NVML_OVERRIDE_ENUM(nvmlDeviceRemoveGpu), + /** nvmlDeviceRemoveGpu_v2 */ + NVML_OVERRIDE_ENUM(nvmlDeviceRemoveGpu_v2), + /** nvmlDeviceResetApplicationsClocks */ + NVML_OVERRIDE_ENUM(nvmlDeviceResetApplicationsClocks), + /** nvmlDeviceResetNvLinkErrorCounters */ + NVML_OVERRIDE_ENUM(nvmlDeviceResetNvLinkErrorCounters), + /** nvmlDeviceResetNvLinkUtilizationCounter */ + NVML_OVERRIDE_ENUM(nvmlDeviceResetNvLinkUtilizationCounter), + /** nvmlDeviceSetAccountingMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetAccountingMode), + /** nvmlDeviceSetAPIRestriction */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetAPIRestriction), + /** nvmlDeviceSetApplicationsClocks */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetApplicationsClocks), + /** nvmlDeviceSetAutoBoostedClocksEnabled */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetAutoBoostedClocksEnabled), + /** nvmlDeviceSetComputeMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetComputeMode), + /** nvmlDeviceSetCpuAffinity */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetCpuAffinity), + /** nvmlDeviceSetDefaultAutoBoostedClocksEnabled */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetDefaultAutoBoostedClocksEnabled), + /** nvmlDeviceSetDriverModel */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetDriverModel), + /** nvmlDeviceSetEccMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetEccMode), + /** nvmlDeviceSetGpuOperationMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetGpuOperationMode), + /** nvmlDeviceSetNvLinkUtilizationControl */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetNvLinkUtilizationControl), + /** nvmlDeviceSetPersistenceMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetPersistenceMode), + /** nvmlDeviceSetPowerManagementLimit */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetPowerManagementLimit), + /** nvmlDeviceSetVirtualizationMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetVirtualizationMode), + /** nvmlDeviceValidateInforom */ + NVML_OVERRIDE_ENUM(nvmlDeviceValidateInforom), + /** nvmlEventSetCreate */ + NVML_OVERRIDE_ENUM(nvmlEventSetCreate), + /** nvmlEventSetFree */ + NVML_OVERRIDE_ENUM(nvmlEventSetFree), + /** nvmlEventSetWait */ + NVML_OVERRIDE_ENUM(nvmlEventSetWait), + /** nvmlGetVgpuCompatibility */ + NVML_OVERRIDE_ENUM(nvmlGetVgpuCompatibility), + /** nvmlInit_v2 */ + NVML_OVERRIDE_ENUM(nvmlInit_v2), + /** nvmlInitWithFlags */ + NVML_OVERRIDE_ENUM(nvmlInitWithFlags), + /** nvmlInternalGetExportTable */ + NVML_OVERRIDE_ENUM(nvmlInternalGetExportTable), + /** nvmlSystemGetCudaDriverVersion */ + NVML_OVERRIDE_ENUM(nvmlSystemGetCudaDriverVersion), + /** nvmlSystemGetCudaDriverVersion_v2 */ + NVML_OVERRIDE_ENUM(nvmlSystemGetCudaDriverVersion_v2), + /** nvmlSystemGetDriverVersion */ + NVML_OVERRIDE_ENUM(nvmlSystemGetDriverVersion), + /** nvmlSystemGetHicVersion */ + NVML_OVERRIDE_ENUM(nvmlSystemGetHicVersion), + /** nvmlSystemGetNVMLVersion */ + NVML_OVERRIDE_ENUM(nvmlSystemGetNVMLVersion), + /** nvmlSystemGetProcessName */ + NVML_OVERRIDE_ENUM(nvmlSystemGetProcessName), + /** nvmlSystemGetTopologyGpuSet */ + NVML_OVERRIDE_ENUM(nvmlSystemGetTopologyGpuSet), + /** nvmlUnitGetCount */ + NVML_OVERRIDE_ENUM(nvmlUnitGetCount), + /** nvmlUnitGetDevices */ + NVML_OVERRIDE_ENUM(nvmlUnitGetDevices), + /** nvmlUnitGetFanSpeedInfo */ + NVML_OVERRIDE_ENUM(nvmlUnitGetFanSpeedInfo), + /** nvmlUnitGetHandleByIndex */ + NVML_OVERRIDE_ENUM(nvmlUnitGetHandleByIndex), + /** nvmlUnitGetLedState */ + NVML_OVERRIDE_ENUM(nvmlUnitGetLedState), + /** nvmlUnitGetPsuInfo */ + NVML_OVERRIDE_ENUM(nvmlUnitGetPsuInfo), + /** nvmlUnitGetTemperature */ + NVML_OVERRIDE_ENUM(nvmlUnitGetTemperature), + /** nvmlUnitGetUnitInfo */ + NVML_OVERRIDE_ENUM(nvmlUnitGetUnitInfo), + /** nvmlUnitSetLedState */ + NVML_OVERRIDE_ENUM(nvmlUnitSetLedState), + /** nvmlVgpuInstanceGetEncoderCapacity */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetEncoderCapacity), + /** nvmlVgpuInstanceGetEncoderSessions */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetEncoderSessions), + /** nvmlVgpuInstanceGetEncoderStats */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetEncoderStats), + /** nvmlVgpuInstanceGetFbUsage */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetFbUsage), + /** nvmlVgpuInstanceGetFrameRateLimit */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetFrameRateLimit), + /** nvmlVgpuInstanceGetLicenseStatus */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetLicenseStatus), + /** nvmlVgpuInstanceGetMetadata */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetMetadata), + /** nvmlVgpuInstanceGetType */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetType), + /** nvmlVgpuInstanceGetUUID */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetUUID), + /** nvmlVgpuInstanceGetVmDriverVersion */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetVmDriverVersion), + /** nvmlVgpuInstanceGetVmID */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetVmID), + /** nvmlVgpuInstanceSetEncoderCapacity */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceSetEncoderCapacity), + /** nvmlVgpuTypeGetClass */ + NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetClass), + /** nvmlVgpuTypeGetDeviceID */ + NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetDeviceID), + /** nvmlVgpuTypeGetFramebufferSize */ + NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetFramebufferSize), + /** nvmlVgpuTypeGetFrameRateLimit */ + NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetFrameRateLimit), + /** nvmlVgpuTypeGetLicense */ + NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetLicense), + /** nvmlVgpuTypeGetMaxInstances */ + NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetMaxInstances), + /** nvmlVgpuTypeGetName */ + NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetName), + /** nvmlVgpuTypeGetNumDisplayHeads */ + NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetNumDisplayHeads), + /** nvmlVgpuTypeGetResolution */ + NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetResolution), + /** nvmlDeviceGetFBCSessions */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetFBCSessions), + /** nvmlDeviceGetFBCStats */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetFBCStats), + /** nvmlDeviceGetGridLicensableFeatures_v2 */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetGridLicensableFeatures_v2), + /** nvmlDeviceGetRetiredPages_v2 */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetRetiredPages_v2), + /** nvmlDeviceResetGpuLockedClocks */ + NVML_OVERRIDE_ENUM(nvmlDeviceResetGpuLockedClocks), + /** nvmlDeviceSetGpuLockedClocks */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetGpuLockedClocks), + /** nvmlGetBlacklistDeviceCount */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetAccountingMode), + /** nvmlVgpuInstanceGetAccountingPids */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetAccountingPids), + /** nvmlVgpuInstanceGetAccountingStats */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetAccountingStats), + /** nvmlVgpuInstanceGetFBCSessions */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetFBCSessions), + /** nvmlVgpuInstanceGetFBCStats */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetFBCStats), + /** nvmlVgpuTypeGetMaxInstancesPerVm */ + NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetMaxInstancesPerVm), + /** nvmlGetVgpuVersion */ + NVML_OVERRIDE_ENUM(nvmlGetVgpuVersion), + /** nvmlSetVgpuVersion */ + NVML_OVERRIDE_ENUM(nvmlSetVgpuVersion), + /** nvmlDeviceGetGridLicensableFeatures_v3 */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetGridLicensableFeatures_v3), + /** nvmlDeviceGetHostVgpuMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetHostVgpuMode), + /** nvmlDeviceGetPgpuMetadataString */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetPgpuMetadataString), + /** nvmlVgpuInstanceGetEccMode */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetEccMode), + /** nvmlComputeInstanceDestroy */ + NVML_OVERRIDE_ENUM(nvmlComputeInstanceDestroy), + /** nvmlComputeInstanceGetInfo */ + NVML_OVERRIDE_ENUM(nvmlComputeInstanceGetInfo), + /** nvmlDeviceCreateGpuInstance */ + NVML_OVERRIDE_ENUM(nvmlDeviceCreateGpuInstance), + /** nvmlDeviceGetArchitecture */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetArchitecture), + /** nvmlDeviceGetAttributes */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetAttributes), + /** nvmlDeviceGetAttributes_v2 */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetAttributes_v2), + /** nvmlDeviceGetComputeInstanceId */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetComputeInstanceId), + /** nvmlDeviceGetCpuAffinityWithinScope */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetCpuAffinityWithinScope), + /** nvmlDeviceGetDeviceHandleFromMigDeviceHandle */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetDeviceHandleFromMigDeviceHandle), + /** nvmlDeviceGetGpuInstanceById */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstanceById), + /** nvmlDeviceGetGpuInstanceId */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstanceId), + /** nvmlDeviceGetGpuInstancePossiblePlacements */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstancePossiblePlacements), + /** nvmlDeviceGetGpuInstanceProfileInfo */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstanceProfileInfo), + /** nvmlDeviceGetGpuInstanceRemainingCapacity */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstanceRemainingCapacity), + /** nvmlDeviceGetGpuInstances */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstances), + /** nvmlDeviceGetMaxMigDeviceCount */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxMigDeviceCount), + /** nvmlDeviceGetMemoryAffinity */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetMemoryAffinity), + /** nvmlDeviceGetMigDeviceHandleByIndex */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetMigDeviceHandleByIndex), + /** nvmlDeviceGetMigMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetMigMode), + /** nvmlDeviceGetRemappedRows */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetRemappedRows), + /** nvmlDeviceGetRowRemapperHistogram */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetRowRemapperHistogram), + /** nvmlDeviceIsMigDeviceHandle */ + NVML_OVERRIDE_ENUM(nvmlDeviceIsMigDeviceHandle), + /** nvmlDeviceSetMigMode */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetMigMode), + /** nvmlEventSetWait_v2 */ + NVML_OVERRIDE_ENUM(nvmlEventSetWait_v2), + /** nvmlGpuInstanceCreateComputeInstance */ + NVML_OVERRIDE_ENUM(nvmlGpuInstanceCreateComputeInstance), + /** nvmlGpuInstanceDestroy */ + NVML_OVERRIDE_ENUM(nvmlGpuInstanceDestroy), + /** nvmlGpuInstanceGetComputeInstanceById */ + NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetComputeInstanceById), + /** nvmlGpuInstanceGetComputeInstanceProfileInfo */ + NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetComputeInstanceProfileInfo), + /** nvmlGpuInstanceGetComputeInstanceRemainingCapacity */ + NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetComputeInstanceRemainingCapacity), + /** nvmlGpuInstanceGetComputeInstances */ + NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetComputeInstances), + /** nvmlGpuInstanceGetInfo */ + NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetInfo), + /** nvmlVgpuInstanceClearAccountingPids */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceClearAccountingPids), + /** nvmlVgpuInstanceGetMdevUUID */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetMdevUUID), + /** nvmlComputeInstanceGetInfo_v2 */ + NVML_OVERRIDE_ENUM(nvmlComputeInstanceGetInfo_v2), + /** nvmlDeviceGetComputeRunningProcesses_v2 */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetComputeRunningProcesses_v2), + /** nvmlDeviceGetGraphicsRunningProcesses_v2 */ + NVML_OVERRIDE_ENUM(nvmlDeviceGetGraphicsRunningProcesses_v2), + /** nvmlDeviceSetTemperatureThreshold */ + NVML_OVERRIDE_ENUM(nvmlDeviceSetTemperatureThreshold), + /** nvmlRetry_NvRmControl */ + // NVML_OVERRIDE_ENUM(nvmlRetry_NvRmControl), + /** nvmlVgpuInstanceGetGpuInstanceId */ + NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetGpuInstanceId), + /** nvmlVgpuTypeGetGpuInstanceProfileId */ + NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetGpuInstanceProfileId), + NVML_ENTRY_END } NVML_OVERRIDE_ENUM_t; #endif \ No newline at end of file diff --git a/src/include/libvgpu.h b/src/include/libvgpu.h old mode 100755 new mode 100644 index e596497e..2ab8dbf1 --- a/src/include/libvgpu.h +++ b/src/include/libvgpu.h @@ -1,55 +1,57 @@ #ifndef __LIBVGPU_H__ #define __LIBVGPU_H__ -#include #include -#include "include/nvml_prefix.h" +#include #include #include -#include -#include #include +#include +#include #include "include/log_utils.h" +#include "include/nvml_prefix.h" #include "static_config.h" -//#include "memory_limit.h" +// #include "memory_limit.h" -#define ENSURE_INITIALIZED() ensure_initialized(); \ +#define ENSURE_INITIALIZED() ensure_initialized(); extern void load_cuda_libraries(); #if defined(__GNUC__) && defined(__GLIBC__) -#define FUNC_ATTR_VISIBLE __attribute__((visibility("default"))) +#define FUNC_ATTR_VISIBLE __attribute__((visibility("default"))) #define FUNC_PTR_TYPE(fname) __func_ptr_type_##fname #define FUNC_PTR_NAME(fname) __func_ptr_origin_##fname -#define FUNC_PTR_ALIAS_ATTR(overrided) \ - __attribute__((alias(#overrided), used)) \ - FUNC_ATTR_VISIBLE; \ +#define FUNC_PTR_ALIAS_ATTR(overrided) __attribute__((alias(#overrided), used)) FUNC_ATTR_VISIBLE; #define FUNC_OVERRIDE_NAME(fname) overrided_##fname #if defined(DLSYM_HOOK_DEBUG) -#define DLSYM_HOOK_FUNC(f) \ - if (0 == strcmp(symbol, #f)) { \ - LOG_DEBUG("Detect dlsym for %s\n", #f); \ - return (void*) f; } \ +#define DLSYM_HOOK_FUNC(f) \ + if (0 == strcmp(symbol, #f)) { \ + LOG_DEBUG("Detect dlsym for %s\n", #f); \ + return (void*)f; \ + } -#define DLSYM_HOOK_FUNC_REPLACE(f) \ - if (0 == strcmp(symbol, hacked_#f)) { \ - return (void*) f; } \ +#define DLSYM_HOOK_FUNC_REPLACE(f) \ + if (0 == strcmp(symbol, hacked_ #f)) { \ + return (void*)f; \ + } -#else +#else -#define DLSYM_HOOK_FUNC(f) \ - if (0 == strcmp(symbol, #f)) { \ - return (void*) f; } \ +#define DLSYM_HOOK_FUNC(f) \ + if (0 == strcmp(symbol, #f)) { \ + return (void*)f; \ + } -#define DLSYM_HOOK_FUNC_REPLACE(f) \ - if (0 == strcmp(symbol, #f)) { \ - return (void*) hacked_##f; } \ +#define DLSYM_HOOK_FUNC_REPLACE(f) \ + if (0 == strcmp(symbol, #f)) { \ + return (void*)hacked_##f; \ + } -#endif +#endif void* __dlsym_hook_section(void* handle, const char* symbol); void* __dlsym_hook_section_nvml(void* handle, const char* symbol); @@ -62,8 +64,7 @@ typedef void* (*fp_dlsym)(void*, const char*); #endif /* Determine the return address. */ -#define RETURN_ADDRESS(nr) \ - __builtin_extract_return_addr (__builtin_return_address (nr)) +#define RETURN_ADDRESS(nr) __builtin_extract_return_addr(__builtin_return_address(nr)) nvmlReturn_t set_task_pid(); int map_cuda_visible_devices(); diff --git a/src/include/log_utils.h b/src/include/log_utils.h old mode 100755 new mode 100644 index 630ee5a9..7dfa512f --- a/src/include/log_utils.h +++ b/src/include/log_utils.h @@ -1,12 +1,12 @@ #ifndef __LOG_UTILS_H__ #define __LOG_UTILS_H__ -#include #include -#include +#include #include -#include #include +#include +#include extern FILE *fp1; @@ -23,92 +23,114 @@ extern int g_log_level; void log_utils_init(void); #ifdef FILEDEBUG -#define LOG_DEBUG(msg, ...) { \ - if (g_log_level >= 4) {\ - if (fp1==NULL) fp1 = fopen ("/tmp/vgpulog", "a"); \ - fprintf(fp1, "[HAMI-core Debug(%d:%ld:%s:%d)]: "msg"\n",getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \ - }\ +#define LOG_DEBUG(msg, ...) \ + { \ + if (g_log_level >= 4) { \ + if (fp1 == NULL) fp1 = fopen("/tmp/vgpulog", "a"); \ + fprintf(fp1, "[HAMI-core Debug(%d:%ld:%s:%d)]: " msg "\n", getpid(), pthread_self(), \ + basename(__FILE__), __LINE__, ##__VA_ARGS__); \ + } \ + } +#define LOG_INFO(msg, ...) \ + { \ + if (g_log_level >= 3) { \ + if (fp1 == NULL) fp1 = fopen("/tmp/vgpulog", "a"); \ + fprintf(fp1, "[HAMI-core Info(%d:%ld:%s:%d)]: " msg "\n", getpid(), pthread_self(), \ + basename(__FILE__), __LINE__, ##__VA_ARGS__); \ + } \ } -#define LOG_INFO(msg, ...) { \ - if (g_log_level >= 3) {\ - if (fp1==NULL) fp1 = fopen ("/tmp/vgpulog", "a"); \ - fprintf(fp1, "[HAMI-core Info(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \ - }\ +#define LOG_WARN(msg, ...) \ + { \ + if (g_log_level >= 2) { \ + if (fp1 == NULL) fp1 = fopen("/tmp/vgpulog", "a"); \ + fprintf(fp1, "[HAMI-core Warn(%d:%ld:%s:%d)]: " msg "\n", getpid(), pthread_self(), \ + basename(__FILE__), __LINE__, ##__VA_ARGS__); \ + } \ } -#define LOG_WARN(msg, ...) { \ - if (g_log_level >= 2) {\ - if (fp1==NULL) fp1 = fopen ("/tmp/vgpulog", "a"); \ - fprintf(fp1, "[HAMI-core Warn(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \ - }\ +#define LOG_MSG(msg, ...) \ + { \ + if (g_log_level >= 2) { \ + if (fp1 == NULL) fp1 = fopen("/tmp/vgpulog", "a"); \ + fprintf(fp1, "[HAMI-core Msg(%d:%ld:%s:%d)]: " msg "\n", getpid(), pthread_self(), \ + basename(__FILE__), __LINE__, ##__VA_ARGS__); \ + } \ } -#define LOG_MSG(msg, ...) { \ - if (g_log_level >= 2) {\ - if (fp1==NULL) fp1 = fopen ("/tmp/vgpulog", "a"); \ - fprintf(fp1, "[HAMI-core Msg(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \ - }\ +#define LOG_ERROR(msg, ...) \ + { \ + if (fp1 == NULL) fp1 = fopen("/tmp/vgpulog", "a"); \ + fprintf(fp1, "[HAMI-core ERROR (pid:%d thread=%ld %s:%d)]: " msg "\n", getpid(), \ + pthread_self(), basename(__FILE__), __LINE__, ##__VA_ARGS__); \ } -#define LOG_ERROR(msg, ...) { \ - if (fp1==NULL) fp1 = fopen ("/tmp/vgpulog", "a"); \ - fprintf(fp1, "[HAMI-core ERROR (pid:%d thread=%ld %s:%d)]: "msg"\n", getpid(), pthread_self(), basename(__FILE__),__LINE__, ##__VA_ARGS__); \ -} #else -#define LOG_DEBUG(msg, ...) { \ - if (g_log_level >= 4) {\ - fprintf(stderr, "[HAMI-core Debug(%d:%ld:%s:%d)]: "msg"\n",getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \ - }\ +#define LOG_DEBUG(msg, ...) \ + { \ + if (g_log_level >= 4) { \ + fprintf(stderr, "[HAMI-core Debug(%d:%ld:%s:%d)]: " msg "\n", getpid(), \ + pthread_self(), basename(__FILE__), __LINE__, ##__VA_ARGS__); \ + } \ } -#define LOG_INFO(msg, ...) { \ - if (g_log_level >= 3) {\ - fprintf(stderr, "[HAMI-core Info(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \ - }\ +#define LOG_INFO(msg, ...) \ + { \ + if (g_log_level >= 3) { \ + fprintf(stderr, "[HAMI-core Info(%d:%ld:%s:%d)]: " msg "\n", getpid(), pthread_self(), \ + basename(__FILE__), __LINE__, ##__VA_ARGS__); \ + } \ } -#define LOG_WARN(msg, ...) { \ - if (g_log_level >= 2) {\ - fprintf(stderr, "[HAMI-core Warn(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \ - }\ +#define LOG_WARN(msg, ...) \ + { \ + if (g_log_level >= 2) { \ + fprintf(stderr, "[HAMI-core Warn(%d:%ld:%s:%d)]: " msg "\n", getpid(), pthread_self(), \ + basename(__FILE__), __LINE__, ##__VA_ARGS__); \ + } \ } -#define LOG_MSG(msg, ...) { \ - if (g_log_level >= 2) {\ - fprintf(stderr, "[HAMI-core Msg(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \ - }\ +#define LOG_MSG(msg, ...) \ + { \ + if (g_log_level >= 2) { \ + fprintf(stderr, "[HAMI-core Msg(%d:%ld:%s:%d)]: " msg "\n", getpid(), pthread_self(), \ + basename(__FILE__), __LINE__, ##__VA_ARGS__); \ + } \ + } +#define LOG_ERROR(msg, ...) \ + { \ + fprintf(stderr, "[HAMI-core ERROR (pid:%d thread=%ld %s:%d)]: " msg "\n", getpid(), \ + pthread_self(), basename(__FILE__), __LINE__, ##__VA_ARGS__); \ } -#define LOG_ERROR(msg, ...) { \ - fprintf(stderr, "[HAMI-core ERROR (pid:%d thread=%ld %s:%d)]: "msg"\n", getpid(), pthread_self(), basename(__FILE__),__LINE__, ##__VA_ARGS__); \ -} #endif -#define CHECK_DRV_API(f) { \ - CUresult status = (f); \ - if (status != CUDA_SUCCESS) { \ - LOG_WARN("Driver error at %d: %d", \ - __LINE__, status); \ - return status; \ - } } \ - -#define CHECK_NVML_API(f) { \ - nvmlReturn_t status = (f); \ - if (status != NVML_SUCCESS) { \ - LOG_WARN("NVML error at line %d: %d", \ - __LINE__, status); \ - return status; \ - } } \ +#define CHECK_DRV_API(f) \ + { \ + CUresult status = (f); \ + if (status != CUDA_SUCCESS) { \ + LOG_WARN("Driver error at %d: %d", __LINE__, status); \ + return status; \ + } \ + } -#define CHECK_CU_RESULT(res) { \ - if (res != CUDA_SUCCESS) { \ - LOG_WARN("Driver error at %d: %d", \ - __LINE__, res); \ - return res; \ - } } \ +#define CHECK_NVML_API(f) \ + { \ + nvmlReturn_t status = (f); \ + if (status != NVML_SUCCESS) { \ + LOG_WARN("NVML error at line %d: %d", __LINE__, status); \ + return status; \ + } \ + } -#define CHECK_SUCCESS(res) { \ - if (res != CUDA_SUCCESS) \ - return res; \ -} +#define CHECK_CU_RESULT(res) \ + { \ + if (res != CUDA_SUCCESS) { \ + LOG_WARN("Driver error at %d: %d", __LINE__, res); \ + return res; \ + } \ + } -#define IF_CHECK_OOM(res) { \ - if (res < 0) \ - return CUDA_ERROR_OUT_OF_MEMORY; \ -} +#define CHECK_SUCCESS(res) \ + { \ + if (res != CUDA_SUCCESS) return res; \ + } +#define IF_CHECK_OOM(res) \ + { \ + if (res < 0) return CUDA_ERROR_OUT_OF_MEMORY; \ + } #endif diff --git a/src/include/memory_limit.h b/src/include/memory_limit.h old mode 100755 new mode 100644 index f9510d28..cafe3ae1 --- a/src/include/memory_limit.h +++ b/src/include/memory_limit.h @@ -1,72 +1,80 @@ #ifndef __MEMORY_LIMIT_H__ #define __MEMORY_LIMIT_H__ +#include #include -#include #include -#include +#include #include "static_config.h" - #define CUDA_DEVICE_MEMORY_LIMIT "CUDA_DEVICE_MEMORY_LIMIT" #define CUDA_DEVICE_MEMORY_LIMIT_KEY_LENGTH 32 #define CUDA_DEVICE_SM_LIMIT "CUDA_DEVICE_SM_LIMIT" #define CUDA_DEVICE_SM_LIMIT_KEY_LENGTH 32 -#define ENSURE_INITIALIZED() ensure_initialized(); \ +#define ENSURE_INITIALIZED() ensure_initialized(); extern int wait_status_self(int status); -#define ENSURE_RUNNING() { \ - /* LOG_DEBUG("Memory op at %d",__LINE__); */ \ - ensure_initialized(); \ - while(!wait_status_self(1)) { LOG_DEBUG("E1"); sleep(1); } \ -} \ +#define ENSURE_RUNNING() \ + { \ + /* LOG_DEBUG("Memory op at %d",__LINE__); */ \ + ensure_initialized(); \ + while (!wait_status_self(1)) { \ + LOG_DEBUG("E1"); \ + sleep(1); \ + } \ + } -#define INC_MEMORY_OR_RETURN_ERROR(bytes) { \ - CUdevice dev; \ - CHECK_DRV_API(cuCtxGetDevice(&dev)); \ - if (inc_current_device_memory_usage(dev, bytes) != \ - CUDA_DEVICE_MEMORY_UPDATE_SUCCESS) { \ - return CUDA_ERROR_OUT_OF_MEMORY; \ - } } \ +#define INC_MEMORY_OR_RETURN_ERROR(bytes) \ + { \ + CUdevice dev; \ + CHECK_DRV_API(cuCtxGetDevice(&dev)); \ + if (inc_current_device_memory_usage(dev, bytes) != CUDA_DEVICE_MEMORY_UPDATE_SUCCESS) { \ + return CUDA_ERROR_OUT_OF_MEMORY; \ + } \ + } -#define DECL_MEMORY_ON_ERROR(res, bytes) { \ - CUdevice dev; \ - CHECK_DRV_API(cuCtxGetDevice(&dev)); \ - if (res != CUDA_SUCCESS) { \ - decl_current_device_memory_usage(dev, bytes); \ - } } \ +#define DECL_MEMORY_ON_ERROR(res, bytes) \ + { \ + CUdevice dev; \ + CHECK_DRV_API(cuCtxGetDevice(&dev)); \ + if (res != CUDA_SUCCESS) { \ + decl_current_device_memory_usage(dev, bytes); \ + } \ + } -#define DECL_MEMORY_ON_SUCCESS(res, bytes) { \ - CUdevice dev; \ - CHECK_DRV_API(cuCtxGetDevice(&dev)); \ - if (res == CUDA_SUCCESS) { \ - decl_current_device_memory_usage(dev, bytes); \ - } } \ +#define DECL_MEMORY_ON_SUCCESS(res, bytes) \ + { \ + CUdevice dev; \ + CHECK_DRV_API(cuCtxGetDevice(&dev)); \ + if (res == CUDA_SUCCESS) { \ + decl_current_device_memory_usage(dev, bytes); \ + } \ + } -#define INC_MEMORY_OR_RETURN_ERROR_WITH_DEV(d, bytes) { \ - if (inc_current_device_memory_usage(d, bytes) != \ - CUDA_DEVICE_MEMORY_UPDATE_SUCCESS) { \ - return CUDA_ERROR_OUT_OF_MEMORY; \ - } \ +#define INC_MEMORY_OR_RETURN_ERROR_WITH_DEV(d, bytes) \ + { \ + if (inc_current_device_memory_usage(d, bytes) != CUDA_DEVICE_MEMORY_UPDATE_SUCCESS) { \ + return CUDA_ERROR_OUT_OF_MEMORY; \ + } -#define DECL_MEMORY_ON_ERROR_WITH_DEV(dev, res, bytes) \ - if (res != CUDA_SUCCESS) { \ - decl_current_device_memory_usage(dev, bytes); \ - } \ +#define DECL_MEMORY_ON_ERROR_WITH_DEV(dev, res, bytes) \ + if (res != CUDA_SUCCESS) { \ + decl_current_device_memory_usage(dev, bytes); \ + } -#define DECL_MEMORY_ON_SUCCESS_WITH_DEV(dev, res, bytes) \ - if (res == CUDA_SUCCESS) { \ - decl_current_device_memory_usage(dev, bytes); \ - } \ -/* -#define OOM_CHECK() \ - CUdevice dev; \ - CHECK_DRV_API(cuCtxGetDevice(&dev)); \ - oom_check(dev); -*/ +#define DECL_MEMORY_ON_SUCCESS_WITH_DEV(dev, res, bytes) \ + if (res == CUDA_SUCCESS) { \ + decl_current_device_memory_usage(dev, bytes); \ + } \ + /* \ + #define OOM_CHECK() \ + CUdevice dev; \ + CHECK_DRV_API(cuCtxGetDevice(&dev)); \ + oom_check(dev); \ + */ #include "multiprocess/multiprocess_memory_limit.h" diff --git a/src/include/multi_func_hook.h b/src/include/multi_func_hook.h index af32c189..d4a30929 100644 --- a/src/include/multi_func_hook.h +++ b/src/include/multi_func_hook.h @@ -6,12 +6,11 @@ #include - typedef struct { - const char *func_name; // base func name(like "cuGraphAddDependencies") - int min_ver; // adjust to low version - int max_ver; // adjust to high version - const char *real_name; // the real name( "cuGraphAddDependencies_v2") + const char *func_name; // base func name(like "cuGraphAddDependencies") + int min_ver; // adjust to low version + int max_ver; // adjust to high version + const char *real_name; // the real name( "cuGraphAddDependencies_v2") } CudaFuncMapEntry; // if multi func, we can add here @@ -25,8 +24,6 @@ static CudaFuncMapEntry g_func_map[] = { {"cuGraphKernelNodeGetParams", 12000, 99999, "cuGraphKernelNodeGetParams_v2"}, {"cuGraphKernelNodeSetParams", 10000, 11999, "cuGraphKernelNodeSetParams"}, - {"cuGraphKernelNodeSetParams", 12000, 99999, "cuGraphKernelNodeSetParams_v2"} -}; - + {"cuGraphKernelNodeSetParams", 12000, 99999, "cuGraphKernelNodeSetParams_v2"}}; #endif diff --git a/src/include/nvml-subset.h b/src/include/nvml-subset.h index cb0908e5..a259e9e5 100644 --- a/src/include/nvml-subset.h +++ b/src/include/nvml-subset.h @@ -74,88 +74,79 @@ extern "C" { typedef struct nvmlDevice_st *nvmlDevice_t; typedef struct nvmlProcessInfo_st { - unsigned int pid; //!< Process ID - unsigned long long usedGpuMemory; //!< Amount of used GPU memory in bytes. - //! Under WDDM, \ref NVML_VALUE_NOT_AVAILABLE is always reported - //! because Windows KMD manages all the memory and not the NVIDIA driver + unsigned int pid; //!< Process ID + unsigned long long usedGpuMemory; //!< Amount of used GPU memory in bytes. + //! Under WDDM, \ref NVML_VALUE_NOT_AVAILABLE is always reported + //! because Windows KMD manages all the memory and not the NVIDIA driver } nvmlProcessInfo_t; /** * Return values for NVML API calls. */ typedef enum nvmlReturn_enum { - NVML_SUCCESS = 0, //!< The operation was successful - NVML_ERROR_UNINITIALIZED = - 1, //!< NVML was not first initialized with nvmlInit() - NVML_ERROR_INVALID_ARGUMENT = 2, //!< A supplied argument is invalid - NVML_ERROR_NOT_SUPPORTED = - 3, //!< The requested operation is not available on target device - NVML_ERROR_NO_PERMISSION = - 4, //!< The current user does not have permission for operation - NVML_ERROR_ALREADY_INITIALIZED = 5, //!< Deprecated: Multiple initializations - //! are now allowed through ref counting - NVML_ERROR_NOT_FOUND = 6, //!< A query to find an object was unsuccessful - NVML_ERROR_INSUFFICIENT_SIZE = 7, //!< An input argument is not large enough - NVML_ERROR_INSUFFICIENT_POWER = - 8, //!< A device's external power cables are not properly attached - NVML_ERROR_DRIVER_NOT_LOADED = 9, //!< NVIDIA driver is not loaded - NVML_ERROR_TIMEOUT = 10, //!< User provided timeout passed - NVML_ERROR_IRQ_ISSUE = - 11, //!< NVIDIA Kernel detected an interrupt issue with a GPU - NVML_ERROR_LIBRARY_NOT_FOUND = - 12, //!< NVML Shared Library couldn't be found or loaded - NVML_ERROR_FUNCTION_NOT_FOUND = - 13, //!< Local version of NVML doesn't implement this function - NVML_ERROR_CORRUPTED_INFOROM = 14, //!< infoROM is corrupted - NVML_ERROR_GPU_IS_LOST = 15, //!< The GPU has fallen off the bus or has - //! otherwise become inaccessible - NVML_ERROR_RESET_REQUIRED = - 16, //!< The GPU requires a reset before it can be used again - NVML_ERROR_OPERATING_SYSTEM = 17, //!< The GPU control device has been - //! blocked by the operating system/cgroups - NVML_ERROR_LIB_RM_VERSION_MISMATCH = - 18, //!< RM detects a driver/library version mismatch - NVML_ERROR_IN_USE = 19, //!< An operation cannot be performed because the GPU - //! is currently in use - NVML_ERROR_NO_DATA = 20, //!< No data - NVML_ERROR_UNKNOWN = 999 //!< An internal driver error occurred + NVML_SUCCESS = 0, //!< The operation was successful + NVML_ERROR_UNINITIALIZED = 1, //!< NVML was not first initialized with nvmlInit() + NVML_ERROR_INVALID_ARGUMENT = 2, //!< A supplied argument is invalid + NVML_ERROR_NOT_SUPPORTED = 3, //!< The requested operation is not available on target device + NVML_ERROR_NO_PERMISSION = 4, //!< The current user does not have permission for operation + NVML_ERROR_ALREADY_INITIALIZED = 5, //!< Deprecated: Multiple initializations + //! are now allowed through ref counting + NVML_ERROR_NOT_FOUND = 6, //!< A query to find an object was unsuccessful + NVML_ERROR_INSUFFICIENT_SIZE = 7, //!< An input argument is not large enough + NVML_ERROR_INSUFFICIENT_POWER = + 8, //!< A device's external power cables are not properly attached + NVML_ERROR_DRIVER_NOT_LOADED = 9, //!< NVIDIA driver is not loaded + NVML_ERROR_TIMEOUT = 10, //!< User provided timeout passed + NVML_ERROR_IRQ_ISSUE = 11, //!< NVIDIA Kernel detected an interrupt issue with a GPU + NVML_ERROR_LIBRARY_NOT_FOUND = 12, //!< NVML Shared Library couldn't be found or loaded + NVML_ERROR_FUNCTION_NOT_FOUND = 13, //!< Local version of NVML doesn't implement this function + NVML_ERROR_CORRUPTED_INFOROM = 14, //!< infoROM is corrupted + NVML_ERROR_GPU_IS_LOST = 15, //!< The GPU has fallen off the bus or has + //! otherwise become inaccessible + NVML_ERROR_RESET_REQUIRED = 16, //!< The GPU requires a reset before it can be used again + NVML_ERROR_OPERATING_SYSTEM = 17, //!< The GPU control device has been + //! blocked by the operating system/cgroups + NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18, //!< RM detects a driver/library version mismatch + NVML_ERROR_IN_USE = 19, //!< An operation cannot be performed because the GPU + //! is currently in use + NVML_ERROR_NO_DATA = 20, //!< No data + NVML_ERROR_UNKNOWN = 999 //!< An internal driver error occurred } nvmlReturn_t; /** * PCI information about a GPU device. */ typedef struct nvmlPciInfo_st { - char busId[NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE]; //!< The tuple - //!< domain:bus:device.function - //!< PCI identifier (& - //!< NULL terminator) - unsigned int domain; //!< The PCI domain on which the device's bus resides, 0 - //!< to 0xffff - unsigned int bus; //!< The bus on which the device resides, 0 to 0xff - unsigned int device; //!< The device's id on the bus, 0 to 31 - unsigned int - pciDeviceId; //!< The combined 16-bit device id and 16-bit vendor id - - // Added in NVML 2.285 API - unsigned int pciSubSystemId; //!< The 32-bit Sub System Device ID - - // NVIDIA reserved for internal use only - unsigned int reserved0; - unsigned int reserved1; - unsigned int reserved2; - unsigned int reserved3; + char busId[NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE]; //!< The tuple + //!< domain:bus:device.function + //!< PCI identifier (& + //!< NULL terminator) + unsigned int domain; //!< The PCI domain on which the device's bus resides, 0 + //!< to 0xffff + unsigned int bus; //!< The bus on which the device resides, 0 to 0xff + unsigned int device; //!< The device's id on the bus, 0 to 31 + unsigned int pciDeviceId; //!< The combined 16-bit device id and 16-bit vendor id + + // Added in NVML 2.285 API + unsigned int pciSubSystemId; //!< The 32-bit Sub System Device ID + + // NVIDIA reserved for internal use only + unsigned int reserved0; + unsigned int reserved1; + unsigned int reserved2; + unsigned int reserved3; } nvmlPciInfo_t; /** * Structure to store utilization value and process Id */ typedef struct nvmlProcessUtilizationSample_st { - unsigned int pid; //!< PID of process - unsigned long long timeStamp; //!< CPU Timestamp in microseconds - unsigned int smUtil; //!< SM (3D/Compute) Util Value - unsigned int memUtil; //!< Frame Buffer Memory Util Value - unsigned int encUtil; //!< Encoder Util Value - unsigned int decUtil; //!< Decoder Util Value + unsigned int pid; //!< PID of process + unsigned long long timeStamp; //!< CPU Timestamp in microseconds + unsigned int smUtil; //!< SM (3D/Compute) Util Value + unsigned int memUtil; //!< Frame Buffer Memory Util Value + unsigned int encUtil; //!< Encoder Util Value + unsigned int decUtil; //!< Decoder Util Value } nvmlProcessUtilizationSample_t; /** @@ -168,57 +159,55 @@ typedef struct nvmlProcessUtilizationSample_st { * behavior. If not, volatile counts are reset each time a compute app is run. */ typedef enum nvmlEccCounterType_enum { - NVML_VOLATILE_ECC = - 0, //!< Volatile counts are reset each time the driver loads. - NVML_AGGREGATE_ECC = 1, //!< Aggregate counts persist across reboots (i.e. - //!< for the lifetime of the device) + NVML_VOLATILE_ECC = 0, //!< Volatile counts are reset each time the driver loads. + NVML_AGGREGATE_ECC = 1, //!< Aggregate counts persist across reboots (i.e. + //!< for the lifetime of the device) - // Keep this last - NVML_ECC_COUNTER_TYPE_COUNT //!< Count of memory counter types + // Keep this last + NVML_ECC_COUNTER_TYPE_COUNT //!< Count of memory counter types } nvmlEccCounterType_t; /** * Generic enable/disable enum. */ typedef enum nvmlEnableState_enum { - NVML_FEATURE_DISABLED = 0, //!< Feature disabled - NVML_FEATURE_ENABLED = 1 //!< Feature enabled + NVML_FEATURE_DISABLED = 0, //!< Feature disabled + NVML_FEATURE_ENABLED = 1 //!< Feature enabled } nvmlEnableState_t; /** * Describes accounting statistics of a process. */ typedef struct nvmlAccountingStats_st { - unsigned int - gpuUtilization; //!< Percent of time over the process's lifetime during - //!< which one or more kernels was executing on the GPU. - //! Utilization stats just like returned by \ref nvmlDeviceGetUtilizationRates - //! but for the life time of a process (not just the last sample period). Set - //! to NVML_VALUE_NOT_AVAILABLE if nvmlDeviceGetUtilizationRates is not - //! supported - - unsigned int memoryUtilization; //!< Percent of time over the process's - //!< lifetime during which global (device) - //!< memory was being read or written. - //! Set to NVML_VALUE_NOT_AVAILABLE if nvmlDeviceGetUtilizationRates is not - //! supported - - unsigned long long maxMemoryUsage; //!< Maximum total memory in bytes that - //!< was ever allocated by the process. - //! Set to NVML_VALUE_NOT_AVAILABLE if nvmlProcessInfo_t->usedGpuMemory is not - //! supported - - unsigned long long time; //!< Amount of time in ms during which the compute - //!< context was active. The time is reported as 0 if - //!< the process is not terminated - - unsigned long long startTime; //!< CPU Timestamp in usec representing start - //!< time for the process - - unsigned int isRunning; //!< Flag to represent if the process is running (1 - //!< for running, 0 for terminated) - - unsigned int reserved[5]; //!< Reserved for future use + unsigned int gpuUtilization; //!< Percent of time over the process's lifetime during + //!< which one or more kernels was executing on the GPU. + //! Utilization stats just like returned by \ref nvmlDeviceGetUtilizationRates + //! but for the life time of a process (not just the last sample period). Set + //! to NVML_VALUE_NOT_AVAILABLE if nvmlDeviceGetUtilizationRates is not + //! supported + + unsigned int memoryUtilization; //!< Percent of time over the process's + //!< lifetime during which global (device) + //!< memory was being read or written. + //! Set to NVML_VALUE_NOT_AVAILABLE if nvmlDeviceGetUtilizationRates is not + //! supported + + unsigned long long maxMemoryUsage; //!< Maximum total memory in bytes that + //!< was ever allocated by the process. + //! Set to NVML_VALUE_NOT_AVAILABLE if nvmlProcessInfo_t->usedGpuMemory is not + //! supported + + unsigned long long time; //!< Amount of time in ms during which the compute + //!< context was active. The time is reported as 0 if + //!< the process is not terminated + + unsigned long long startTime; //!< CPU Timestamp in usec representing start + //!< time for the process + + unsigned int isRunning; //!< Flag to represent if the process is running (1 + //!< for running, 0 for terminated) + + unsigned int reserved[5]; //!< Reserved for future use } nvmlAccountingStats_t; typedef unsigned int nvmlVgpuInstance_t; @@ -227,15 +216,14 @@ typedef unsigned int nvmlVgpuInstance_t; * API types that allow changes to default permission restrictions */ typedef enum nvmlRestrictedAPI_enum { - NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS = - 0, //!< APIs that change application clocks, see - //!< nvmlDeviceSetApplicationsClocks - //!< and see nvmlDeviceResetApplicationsClocks - NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = - 1, //!< APIs that enable/disable Auto Boosted clocks - //!< see nvmlDeviceSetAutoBoostedClocksEnabled - // Keep this last - NVML_RESTRICTED_API_COUNT + NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS = 0, //!< APIs that change application clocks, see + //!< nvmlDeviceSetApplicationsClocks + //!< and see nvmlDeviceResetApplicationsClocks + NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = + 1, //!< APIs that enable/disable Auto Boosted clocks + //!< see nvmlDeviceSetAutoBoostedClocksEnabled + // Keep this last + NVML_RESTRICTED_API_COUNT } nvmlRestrictedAPI_t; /** @@ -244,54 +232,54 @@ typedef enum nvmlRestrictedAPI_enum { * All speeds are in Mhz. */ typedef enum nvmlClockType_enum { - NVML_CLOCK_GRAPHICS = 0, //!< Graphics clock domain - NVML_CLOCK_SM = 1, //!< SM clock domain - NVML_CLOCK_MEM = 2, //!< Memory clock domain - NVML_CLOCK_VIDEO = 3, //!< Video encoder/decoder clock domain + NVML_CLOCK_GRAPHICS = 0, //!< Graphics clock domain + NVML_CLOCK_SM = 1, //!< SM clock domain + NVML_CLOCK_MEM = 2, //!< Memory clock domain + NVML_CLOCK_VIDEO = 3, //!< Video encoder/decoder clock domain - // Keep this last - NVML_CLOCK_COUNT // #include -#include +#include #include +#include #define BUFFER_LENGTH 8192 // ensure larger than linux max filename length #define FILENAME_LENGTH 8192 @@ -13,13 +13,12 @@ #define PROC_STATE_NONALIVE 1 #define PROC_STATE_UNKNOWN 2 - int proc_alive(int32_t pid) { char filename[FILENAME_LENGTH] = {0}; sprintf(filename, "/proc/%d/stat", pid); FILE* fp; - if ((fp = fopen(filename, "r")) == NULL) { + if ((fp = fopen(filename, "r")) == NULL) { return PROC_STATE_NONALIVE; } @@ -39,5 +38,4 @@ int proc_alive(int32_t pid) { return res; } - #endif // __UTILS_PROCESS_UTILS_H__ diff --git a/src/include/utils.h b/src/include/utils.h index 8a40012e..f636cbf8 100644 --- a/src/include/utils.h +++ b/src/include/utils.h @@ -1,14 +1,13 @@ -#include -#include #include #include +#include +#include #include - int try_lock_unified_lock(); int try_unlock_unified_lock(); -//Nvml part utils +// Nvml part utils void sort(int vmap[16]); int initial_virtual_devices(); int parser(char *str); diff --git a/src/libvgpu.c b/src/libvgpu.c index befc16c3..934ef19e 100644 --- a/src/libvgpu.c +++ b/src/libvgpu.c @@ -1,21 +1,22 @@ -//#include "memory_limit.h" -#include +// #include "memory_limit.h" +#include "include/libvgpu.h" + #include -#include -#include "include/nvml_prefix.h" +#include #include -#include "include/nvml_prefix.h" -#include "include/log_utils.h" +#include + +#include "allocator/allocator.h" #include "include/libcuda_hook.h" -#include "include/libvgpu.h" -#include "include/utils.h" +#include "include/log_utils.h" #include "include/nvml_override.h" -#include "allocator/allocator.h" +#include "include/nvml_prefix.h" +#include "include/utils.h" #include "multiprocess/multiprocess_memory_limit.h" extern void init_utilization_watcher(void); extern void utilization_watcher(void); -extern void initial_virtual_map(void); +extern void initial_virtual_map(void); extern int set_host_pid(int hostpid); extern void allocator_init(void); void preInit(); @@ -26,8 +27,8 @@ pthread_once_t pre_cuinit_flag = PTHREAD_ONCE_INIT; pthread_once_t post_cuinit_flag = PTHREAD_ONCE_INIT; pthread_once_t dlsym_init_flag = PTHREAD_ONCE_INIT; -/* pidfound is to enable core utilization, if we don't find hostpid in container, then we have no - where to find its core utilization */ +/* pidfound is to enable core utilization, if we don't find hostpid in + container, then we have no where to find its core utilization */ extern int pidfound; /* used to switch on/off the core utilization limitation*/ @@ -43,25 +44,24 @@ pthread_mutex_t dlsym_lock; typedef struct { pthread_t tid; void *pointer; -}tid_dl_map; +} tid_dl_map; #define DLMAP_SIZE 100 tid_dl_map dlmap[DLMAP_SIZE]; -int dlmap_count=0; +int dlmap_count = 0; -void init_dlsym(){ +void init_dlsym() { LOG_DEBUG("init_dlsym\n"); - pthread_mutex_init(&dlsym_lock,NULL); - dlmap_count=0; - memset(dlmap, 0, sizeof(tid_dl_map)*DLMAP_SIZE); + pthread_mutex_init(&dlsym_lock, NULL); + dlmap_count = 0; + memset(dlmap, 0, sizeof(tid_dl_map) * DLMAP_SIZE); } -int check_dlmap(pthread_t tid, void *pointer){ +int check_dlmap(pthread_t tid, void *pointer) { int i; int cursor = (dlmap_count < DLMAP_SIZE) ? dlmap_count : DLMAP_SIZE; - for (i=cursor-1; i>=0; i--) { - if ((dlmap[i].pointer == pointer) && pthread_equal(dlmap[i].tid, tid)) - return 1; + for (i = cursor - 1; i >= 0; i--) { + if ((dlmap[i].pointer == pointer) && pthread_equal(dlmap[i].tid, tid)) return 1; } cursor = dlmap_count % DLMAP_SIZE; dlmap[cursor].tid = tid; @@ -70,20 +70,14 @@ int check_dlmap(pthread_t tid, void *pointer){ return 0; } -FUNC_ATTR_VISIBLE void* dlsym(void* handle, const char* symbol) { - LOG_DEBUG("into dlsym %s",symbol); - pthread_once(&dlsym_init_flag,init_dlsym); +FUNC_ATTR_VISIBLE void *dlsym(void *handle, const char *symbol) { + LOG_DEBUG("into dlsym %s", symbol); + pthread_once(&dlsym_init_flag, init_dlsym); if (real_dlsym == NULL) { - const char* glibc_versions[] = { - "GLIBC_2.2.5", // for amd64 - "GLIBC_2.17", // for arm64 - "GLIBC_2.3", - "GLIBC_2.4", - "GLIBC_2.10", - "GLIBC_2.18", - "GLIBC_2.22", - NULL - }; + const char *glibc_versions[] = {"GLIBC_2.2.5", // for amd64 + "GLIBC_2.17", // for arm64 + "GLIBC_2.3", "GLIBC_2.4", "GLIBC_2.10", + "GLIBC_2.18", "GLIBC_2.22", NULL}; for (int i = 0; glibc_versions[i] != NULL; i++) { real_dlsym = dlvsym(RTLD_NEXT, "dlsym", glibc_versions[i]); if (real_dlsym != NULL) { @@ -91,11 +85,11 @@ FUNC_ATTR_VISIBLE void* dlsym(void* handle, const char* symbol) { break; } } - char *path_search=getenv("CUDA_REDIRECT"); - if ((path_search!=NULL) && (strlen(path_search)>0)){ - vgpulib = dlopen(path_search,RTLD_LAZY); - }else{ - vgpulib = dlopen("/usr/local/vgpu/libvgpu.so",RTLD_LAZY); + char *path_search = getenv("CUDA_REDIRECT"); + if ((path_search != NULL) && (strlen(path_search) > 0)) { + vgpulib = dlopen(path_search, RTLD_LAZY); + } else { + vgpulib = dlopen("/usr/local/vgpu/libvgpu.so", RTLD_LAZY); } if (real_dlsym == NULL) { LOG_ERROR("real dlsym not found"); @@ -103,33 +97,30 @@ FUNC_ATTR_VISIBLE void* dlsym(void* handle, const char* symbol) { if (libc_handle) { real_dlsym = dlsym(libc_handle, "dlsym"); } - if (real_dlsym == NULL) - LOG_ERROR("real dlsym not found after trying libc.so.6"); + if (real_dlsym == NULL) LOG_ERROR("real dlsym not found after trying libc.so.6"); } } if (handle == RTLD_NEXT) { - void *h = real_dlsym(RTLD_NEXT,symbol); + void *h = real_dlsym(RTLD_NEXT, symbol); pthread_mutex_lock(&dlsym_lock); pthread_t tid = pthread_self(); - if (check_dlmap(tid,h)){ - LOG_WARN("recursive dlsym : %s\n",symbol); + if (check_dlmap(tid, h)) { + LOG_WARN("recursive dlsym : %s\n", symbol); h = NULL; } pthread_mutex_unlock(&dlsym_lock); return h; } if (symbol[0] == 'c' && symbol[1] == 'u') { - //Compatible with cuda 12.8+ fix - if (strcmp(symbol,"cuGetExportTable")!=0) - pthread_once(&pre_cuinit_flag,(void(*)(void))preInit); - void *f = real_dlsym(vgpulib,symbol); - if (f!=NULL) - return f; + // Compatible with cuda 12.8+ fix + if (strcmp(symbol, "cuGetExportTable") != 0) + pthread_once(&pre_cuinit_flag, (void (*)(void))preInit); + void *f = real_dlsym(vgpulib, symbol); + if (f != NULL) return f; } - #ifdef HOOK_NVML_ENABLE - if (symbol[0] == 'n' && symbol[1] == 'v' && - symbol[2] == 'm' && symbol[3] == 'l' ) { - void* f = __dlsym_hook_section_nvml(handle, symbol); +#ifdef HOOK_NVML_ENABLE + if (symbol[0] == 'n' && symbol[1] == 'v' && symbol[2] == 'm' && symbol[3] == 'l') { + void *f = __dlsym_hook_section_nvml(handle, symbol); if (f != NULL) { return f; } @@ -138,14 +129,14 @@ FUNC_ATTR_VISIBLE void* dlsym(void* handle, const char* symbol) { return real_dlsym(handle, symbol); } -void* __dlsym_hook_section(void* handle, const char* symbol) { +void *__dlsym_hook_section(void *handle, const char *symbol) { int it; - for (it=0;it -#include -#include -#include +#include "multiprocess/multiprocess_memory_limit.h" + +#include +#include +#include #include +#include +#include +#include +#include #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include -#include +#include -#include -#include +#include "include/memory_limit.h" #include "include/nvml_prefix.h" -#include - #include "include/process_utils.h" -#include "include/memory_limit.h" -#include "multiprocess/multiprocess_memory_limit.h" - #ifndef SEM_WAIT_TIME #define SEM_WAIT_TIME 10 @@ -52,8 +51,8 @@ static shared_region_info_t region_info = {0, -1, PTHREAD_ONCE_INIT, NULL, 0, NU int env_utilization_switch; int enable_active_oom_killer; size_t context_size; -size_t initial_offset=0; -//lock for record kernel time +size_t initial_offset = 0; +// lock for record kernel time pthread_mutex_t _kernel_mutex; int _record_kernel_interval = 1; @@ -62,7 +61,7 @@ int _record_kernel_interval = 1; void do_init_device_memory_limits(uint64_t*, int); void exit_withlock(int exitcode); -void set_current_gpu_status(int status){ +void set_current_gpu_status(int status) { // Fast path: use cached slot if available if (region_info.my_slot != NULL) { atomic_store_explicit(®ion_info.my_slot->status, status, memory_order_release); @@ -74,22 +73,19 @@ void set_current_gpu_status(int status){ int i; int32_t my_pid = getpid(); for (i = 0; i < proc_num; i++) { - int32_t slot_pid = atomic_load_explicit(®ion_info.shared_region->procs[i].pid, memory_order_acquire); + int32_t slot_pid = + atomic_load_explicit(®ion_info.shared_region->procs[i].pid, memory_order_acquire); if (my_pid == slot_pid) { - atomic_store_explicit(®ion_info.shared_region->procs[i].status, status, memory_order_release); + atomic_store_explicit(®ion_info.shared_region->procs[i].status, status, + memory_order_release); return; } } } -void sig_restore_stub(int signo){ - set_current_gpu_status(1); -} - -void sig_swap_stub(int signo){ - set_current_gpu_status(2); -} +void sig_restore_stub(int signo) { set_current_gpu_status(1); } +void sig_swap_stub(int signo) { set_current_gpu_status(2); } // get device memory from env size_t get_limit_from_env(const char* env_name) { @@ -118,13 +114,13 @@ size_t get_limit_from_env(const char* env_name) { size_t res = strtoul(env_limit, &digit_end, 0); size_t scaled_res = res * scalar; if (scaled_res == 0) { - if (env_name[12]=='S'){ - LOG_INFO("device core util limit set to 0, which means no limit: %s=%s", - env_name, env_limit); - }else if (env_name[12]=='M'){ - LOG_WARN("invalid device memory limit %s=%s",env_name,env_limit); - }else{ - LOG_WARN("invalid env name:%s",env_name); + if (env_name[12] == 'S') { + LOG_INFO("device core util limit set to 0, which means no limit: %s=%s", env_name, + env_limit); + } else if (env_name[12] == 'M') { + LOG_WARN("invalid device memory limit %s=%s", env_name, env_limit); + } else { + LOG_WARN("invalid env name:%s", env_name); } return 0; } @@ -136,36 +132,33 @@ size_t get_limit_from_env(const char* env_name) { } int init_device_info() { - unsigned int i,nvmlDevicesCount; + unsigned int i, nvmlDevicesCount; CHECK_NVML_API(nvmlDeviceGetCount_v2(&nvmlDevicesCount)); - region_info.shared_region->device_num=nvmlDevicesCount; + region_info.shared_region->device_num = nvmlDevicesCount; nvmlDevice_t dev; - for(i=0;iuuids[i],NVML_DEVICE_UUID_V2_BUFFER_SIZE)); + CHECK_NVML_API(nvmlDeviceGetUUID(dev, region_info.shared_region->uuids[i], + NVML_DEVICE_UUID_V2_BUFFER_SIZE)); } - LOG_INFO("put_device_info finished %d",nvmlDevicesCount); + LOG_INFO("put_device_info finished %d", nvmlDevicesCount); return 0; } - -int load_env_from_file(char *filename) { - FILE *f=fopen(filename,"r"); - if (f==NULL) - return 0; +int load_env_from_file(char* filename) { + FILE* f = fopen(filename, "r"); + if (f == NULL) return 0; char tmp[10000]; - int cursor=0; - while (!feof(f)){ - fgets(tmp,10000,f); - if (strstr(tmp,"=")==NULL) - break; - if (tmp[strlen(tmp)-1]=='\n') - tmp[strlen(tmp)-1]='\0'; - for (cursor=0;cursorproc_num;i++) { - kill(region_info.shared_region->procs[i].pid,9); + for (i = 0; i < region_info.shared_region->proc_num; i++) { + kill(region_info.shared_region->procs[i].pid, 9); } return 0; } @@ -241,41 +234,36 @@ void pre_launch_kernel() { LOG_INFO("write last kernel time: %ld", now) // Lock-free update using atomic compare-exchange - uint64_t expected = atomic_load_explicit(®ion_info.shared_region->last_kernel_time, memory_order_acquire); + uint64_t expected = + atomic_load_explicit(®ion_info.shared_region->last_kernel_time, memory_order_acquire); while (expected < now) { if (atomic_compare_exchange_weak_explicit(®ion_info.shared_region->last_kernel_time, - &expected, now, memory_order_release, memory_order_acquire)) { + &expected, now, memory_order_release, + memory_order_acquire)) { break; } } } -int shrreg_major_version() { - return MAJOR_VERSION; -} - -int shrreg_minor_version() { - return MINOR_VERSION; -} +int shrreg_major_version() { return MAJOR_VERSION; } +int shrreg_minor_version() { return MINOR_VERSION; } // Lock-free memory monitor aggregation size_t get_gpu_memory_monitor(const int dev) { LOG_DEBUG("get_gpu_memory_monitor_lockfree dev=%d", dev); ensure_initialized(); - int i=0; - size_t total=0; + int i = 0; + size_t total = 0; int proc_num = atomic_load_explicit(®ion_info.shared_region->proc_num, memory_order_acquire); - for (i=0; i < proc_num; i++) { + for (i = 0; i < proc_num; i++) { uint64_t monitor = atomic_load_explicit( - ®ion_info.shared_region->procs[i].monitorused[dev], - memory_order_relaxed); + ®ion_info.shared_region->procs[i].monitorused[dev], memory_order_relaxed); uint64_t used_total = atomic_load_explicit( - ®ion_info.shared_region->procs[i].used[dev].total, - memory_order_relaxed); + ®ion_info.shared_region->procs[i].used[dev].total, memory_order_relaxed); LOG_DEBUG("dev=%d i=%lu,%lu\n", dev, monitor, used_total); - total+=monitor; + total += monitor; } return total; } @@ -284,13 +272,13 @@ size_t get_gpu_memory_monitor(const int dev) { size_t get_gpu_memory_usage(const int dev) { LOG_INFO("get_gpu_memory_usage_lockfree dev=%d", dev); ensure_initialized(); - int i=0; - size_t total=0; + int i = 0; + size_t total = 0; // Lock-free read with acquire semantics for proc_num int proc_num = atomic_load_explicit(®ion_info.shared_region->proc_num, memory_order_acquire); - for (i=0; i < proc_num; i++) { + for (i = 0; i < proc_num; i++) { shrreg_proc_slot_t* slot = ®ion_info.shared_region->procs[i]; uint64_t proc_usage; uint64_t seq1, seq2; @@ -306,12 +294,12 @@ size_t get_gpu_memory_usage(const int dev) { while (seq1 & 1) { // Exponential backoff to reduce contention if (retry_count < 5) { - // First 5 retries: just CPU pause (fast path) - #if defined(__x86_64__) || defined(__i386__) +// First 5 retries: just CPU pause (fast path) +#if defined(__x86_64__) || defined(__i386__) __asm__ __volatile__("pause" ::: "memory"); - #elif defined(__aarch64__) +#elif defined(__aarch64__) __asm__ __volatile__("yield" ::: "memory"); - #endif +#endif } else if (retry_count < 20) { // Next 15 retries: 1μs delay usleep(1); @@ -323,7 +311,8 @@ size_t get_gpu_memory_usage(const int dev) { usleep(100); // Log if we're spinning for a very long time if (retry_count % 100 == 0) { - LOG_DEBUG("Seqlock spinning for slot %d, retry %d (writer active)", i, retry_count); + LOG_DEBUG("Seqlock spinning for slot %d, retry %d (writer active)", i, + retry_count); } } @@ -348,28 +337,30 @@ size_t get_gpu_memory_usage(const int dev) { int32_t hostpid = atomic_load_explicit(&slot->hostpid, memory_order_relaxed); LOG_INFO("dev=%d pid=%d host pid=%d i=%lu", dev, pid, hostpid, proc_usage); - total+=proc_usage; + total += proc_usage; } - total+=initial_offset; + total += initial_offset; return total; } // Lock-free memory monitor update -int set_gpu_device_memory_monitor(int32_t pid,int dev,size_t monitor){ +int set_gpu_device_memory_monitor(int32_t pid, int dev, size_t monitor) { // LOG_WARN("set_gpu_device_memory_monitor_lockfree:%d %d %lu",pid,dev,monitor); int i; ensure_initialized(); int proc_num = atomic_load_explicit(®ion_info.shared_region->proc_num, memory_order_acquire); - for (i=0; i < proc_num; i++) { - int32_t hostpid = atomic_load_explicit(®ion_info.shared_region->procs[i].hostpid, memory_order_acquire); + for (i = 0; i < proc_num; i++) { + int32_t hostpid = atomic_load_explicit(®ion_info.shared_region->procs[i].hostpid, + memory_order_acquire); if (hostpid == pid) { uint64_t used_total = atomic_load_explicit( - ®ion_info.shared_region->procs[i].used[dev].total, - memory_order_relaxed); - LOG_INFO("set_gpu_device_memory_monitor_lockfree:%d %d %lu->%lu", pid, dev, used_total, monitor); - atomic_store_explicit(®ion_info.shared_region->procs[i].monitorused[dev], monitor, memory_order_relaxed); + ®ion_info.shared_region->procs[i].used[dev].total, memory_order_relaxed); + LOG_INFO("set_gpu_device_memory_monitor_lockfree:%d %d %lu->%lu", pid, dev, used_total, + monitor); + atomic_store_explicit(®ion_info.shared_region->procs[i].monitorused[dev], monitor, + memory_order_relaxed); return 1; } } @@ -383,15 +374,16 @@ int set_gpu_device_sm_utilization(int32_t pid, int dev, unsigned int smUtil) { int proc_num = atomic_load_explicit(®ion_info.shared_region->proc_num, memory_order_acquire); for (i = 0; i < proc_num; i++) { - int32_t hostpid = atomic_load_explicit(®ion_info.shared_region->procs[i].hostpid, memory_order_acquire); + int32_t hostpid = atomic_load_explicit(®ion_info.shared_region->procs[i].hostpid, + memory_order_acquire); if (hostpid == pid) { - uint64_t old_util = atomic_load_explicit( - ®ion_info.shared_region->procs[i].device_util[dev].sm_util, - memory_order_relaxed); - LOG_INFO("set_gpu_device_sm_utilization_lockfree:%d %d %lu->%u", pid, dev, old_util, smUtil); - atomic_store_explicit( - ®ion_info.shared_region->procs[i].device_util[dev].sm_util, smUtil, - memory_order_relaxed); + uint64_t old_util = + atomic_load_explicit(®ion_info.shared_region->procs[i].device_util[dev].sm_util, + memory_order_relaxed); + LOG_INFO("set_gpu_device_sm_utilization_lockfree:%d %d %lu->%u", pid, dev, old_util, + smUtil); + atomic_store_explicit(®ion_info.shared_region->procs[i].device_util[dev].sm_util, + smUtil, memory_order_relaxed); return 1; } } @@ -399,18 +391,17 @@ int set_gpu_device_sm_utilization(int32_t pid, int dev, unsigned int smUtil) { } // Lock-free utilization initialization -int init_gpu_device_utilization(){ - int i,dev; +int init_gpu_device_utilization() { + int i, dev; ensure_initialized(); int proc_num = atomic_load_explicit(®ion_info.shared_region->proc_num, memory_order_acquire); - for (i=0; i < proc_num; i++) { - for (dev=0;devprocs[i].device_util[dev].sm_util, - 0, - memory_order_relaxed); - atomic_store_explicit(®ion_info.shared_region->procs[i].monitorused[dev], 0, memory_order_relaxed); + for (i = 0; i < proc_num; i++) { + for (dev = 0; dev < CUDA_DEVICE_MAX_COUNT; dev++) { + atomic_store_explicit(®ion_info.shared_region->procs[i].device_util[dev].sm_util, 0, + memory_order_relaxed); + atomic_store_explicit(®ion_info.shared_region->procs[i].monitorused[dev], 0, + memory_order_relaxed); break; } } @@ -441,19 +432,18 @@ uint64_t nvml_get_device_memory_usage(const int dev) { int slot = 0; for (; slot < proc_num; slot++) { int32_t slot_pid = atomic_load_explicit(®ion->procs[slot].pid, memory_order_relaxed); - if (infos[i].pid != slot_pid) - continue; + if (infos[i].pid != slot_pid) continue; usage += infos[i].usedGpuMemory; } } - LOG_DEBUG("Device %d current memory %lu / %lu", - dev, usage, region->limit[dev]); + LOG_DEBUG("Device %d current memory %lu / %lu", dev, usage, region->limit[dev]); return usage; } // Lock-free memory add using atomics with seqlock for consistent reads int add_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type) { - LOG_INFO("add_gpu_device_memory_lockfree:%d %d->%d %lu", pid, cudadev, cuda_to_nvml_map(cudadev), usage); + LOG_INFO("add_gpu_device_memory_lockfree:%d %d->%d %lu", pid, cudadev, + cuda_to_nvml_map(cudadev), usage); int dev = cuda_to_nvml_map(cudadev); ensure_initialized(); @@ -469,10 +459,12 @@ int add_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type atomic_fetch_add_explicit(&slot->used[dev].total, usage, memory_order_release); switch (type) { case 0: - atomic_fetch_add_explicit(&slot->used[dev].context_size, usage, memory_order_release); + atomic_fetch_add_explicit(&slot->used[dev].context_size, usage, + memory_order_release); break; case 1: - atomic_fetch_add_explicit(&slot->used[dev].module_size, usage, memory_order_release); + atomic_fetch_add_explicit(&slot->used[dev].module_size, usage, + memory_order_release); break; case 2: atomic_fetch_add_explicit(&slot->used[dev].data_size, usage, memory_order_release); @@ -489,8 +481,9 @@ int add_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type // Slow path: find slot for other process (still lock-free) int proc_num = atomic_load_explicit(®ion_info.shared_region->proc_num, memory_order_acquire); int i; - for (i=0; i < proc_num; i++) { - int32_t slot_pid = atomic_load_explicit(®ion_info.shared_region->procs[i].pid, memory_order_acquire); + for (i = 0; i < proc_num; i++) { + int32_t slot_pid = + atomic_load_explicit(®ion_info.shared_region->procs[i].pid, memory_order_acquire); if (slot_pid == pid) { shrreg_proc_slot_t* slot = ®ion_info.shared_region->procs[i]; @@ -501,13 +494,16 @@ int add_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type atomic_fetch_add_explicit(&slot->used[dev].total, usage, memory_order_release); switch (type) { case 0: - atomic_fetch_add_explicit(&slot->used[dev].context_size, usage, memory_order_release); + atomic_fetch_add_explicit(&slot->used[dev].context_size, usage, + memory_order_release); break; case 1: - atomic_fetch_add_explicit(&slot->used[dev].module_size, usage, memory_order_release); + atomic_fetch_add_explicit(&slot->used[dev].module_size, usage, + memory_order_release); break; case 2: - atomic_fetch_add_explicit(&slot->used[dev].data_size, usage, memory_order_release); + atomic_fetch_add_explicit(&slot->used[dev].data_size, usage, + memory_order_release); break; } @@ -525,7 +521,8 @@ int add_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type // Lock-free memory remove using atomics with seqlock for consistent reads int rm_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type) { - LOG_INFO("rm_gpu_device_memory_lockfree:%d %d->%d %d:%lu", pid, cudadev, cuda_to_nvml_map(cudadev), type, usage); + LOG_INFO("rm_gpu_device_memory_lockfree:%d %d->%d %d:%lu", pid, cudadev, + cuda_to_nvml_map(cudadev), type, usage); int dev = cuda_to_nvml_map(cudadev); ensure_initialized(); @@ -540,10 +537,12 @@ int rm_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type) atomic_fetch_sub_explicit(&slot->used[dev].total, usage, memory_order_release); switch (type) { case 0: - atomic_fetch_sub_explicit(&slot->used[dev].context_size, usage, memory_order_release); + atomic_fetch_sub_explicit(&slot->used[dev].context_size, usage, + memory_order_release); break; case 1: - atomic_fetch_sub_explicit(&slot->used[dev].module_size, usage, memory_order_release); + atomic_fetch_sub_explicit(&slot->used[dev].module_size, usage, + memory_order_release); break; case 2: atomic_fetch_sub_explicit(&slot->used[dev].data_size, usage, memory_order_release); @@ -562,7 +561,8 @@ int rm_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type) int proc_num = atomic_load_explicit(®ion_info.shared_region->proc_num, memory_order_acquire); int i; for (i = 0; i < proc_num; i++) { - int32_t slot_pid = atomic_load_explicit(®ion_info.shared_region->procs[i].pid, memory_order_acquire); + int32_t slot_pid = + atomic_load_explicit(®ion_info.shared_region->procs[i].pid, memory_order_acquire); if (slot_pid == pid) { shrreg_proc_slot_t* slot = ®ion_info.shared_region->procs[i]; @@ -573,13 +573,16 @@ int rm_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type) atomic_fetch_sub_explicit(&slot->used[dev].total, usage, memory_order_release); switch (type) { case 0: - atomic_fetch_sub_explicit(&slot->used[dev].context_size, usage, memory_order_release); + atomic_fetch_sub_explicit(&slot->used[dev].context_size, usage, + memory_order_release); break; case 1: - atomic_fetch_sub_explicit(&slot->used[dev].module_size, usage, memory_order_release); + atomic_fetch_sub_explicit(&slot->used[dev].module_size, usage, + memory_order_release); break; case 2: - atomic_fetch_sub_explicit(&slot->used[dev].data_size, usage, memory_order_release); + atomic_fetch_sub_explicit(&slot->used[dev].data_size, usage, + memory_order_release); break; } @@ -620,8 +623,10 @@ int fix_lock_shrreg() { if (current_owner != 0) { int flag = 0; if (current_owner == region_info.pid) { - LOG_INFO("Detect owner pid = self pid (%d), " - "indicates pid loopback or race condition", current_owner); + LOG_INFO( + "Detect owner pid = self pid (%d), " + "indicates pid loopback or race condition", + current_owner); flag = 1; } else { int proc_status = proc_alive(current_owner); @@ -634,7 +639,7 @@ int fix_lock_shrreg() { LOG_INFO("Take upgraded lock (%d)", region_info.pid); region->owner_pid = region_info.pid; SEQ_POINT_MARK(SEQ_FIX_SHRREG_UPDATE_OWNER_OK); - res = 0; + res = 0; } } @@ -657,36 +662,50 @@ void exit_withlock(int exitcode) { * member individually using atomic loads and stores. */ static inline void copy_proc_slot_atomic(shrreg_proc_slot_t* dst, shrreg_proc_slot_t* src) { - atomic_store_explicit(&dst->pid, - atomic_load_explicit(&src->pid, memory_order_relaxed), memory_order_relaxed); - atomic_store_explicit(&dst->hostpid, - atomic_load_explicit(&src->hostpid, memory_order_relaxed), memory_order_relaxed); - atomic_store_explicit(&dst->seqlock, - atomic_load_explicit(&src->seqlock, memory_order_relaxed), memory_order_relaxed); - atomic_store_explicit(&dst->status, - atomic_load_explicit(&src->status, memory_order_relaxed), memory_order_relaxed); + atomic_store_explicit(&dst->pid, atomic_load_explicit(&src->pid, memory_order_relaxed), + memory_order_relaxed); + atomic_store_explicit(&dst->hostpid, atomic_load_explicit(&src->hostpid, memory_order_relaxed), + memory_order_relaxed); + atomic_store_explicit(&dst->seqlock, atomic_load_explicit(&src->seqlock, memory_order_relaxed), + memory_order_relaxed); + atomic_store_explicit(&dst->status, atomic_load_explicit(&src->status, memory_order_relaxed), + memory_order_relaxed); for (int dev = 0; dev < CUDA_DEVICE_MAX_COUNT; dev++) { atomic_store_explicit(&dst->used[dev].total, - atomic_load_explicit(&src->used[dev].total, memory_order_relaxed), memory_order_relaxed); - atomic_store_explicit(&dst->used[dev].context_size, - atomic_load_explicit(&src->used[dev].context_size, memory_order_relaxed), memory_order_relaxed); - atomic_store_explicit(&dst->used[dev].module_size, - atomic_load_explicit(&src->used[dev].module_size, memory_order_relaxed), memory_order_relaxed); + atomic_load_explicit(&src->used[dev].total, memory_order_relaxed), + memory_order_relaxed); + atomic_store_explicit( + &dst->used[dev].context_size, + atomic_load_explicit(&src->used[dev].context_size, memory_order_relaxed), + memory_order_relaxed); + atomic_store_explicit( + &dst->used[dev].module_size, + atomic_load_explicit(&src->used[dev].module_size, memory_order_relaxed), + memory_order_relaxed); atomic_store_explicit(&dst->used[dev].data_size, - atomic_load_explicit(&src->used[dev].data_size, memory_order_relaxed), memory_order_relaxed); + atomic_load_explicit(&src->used[dev].data_size, memory_order_relaxed), + memory_order_relaxed); atomic_store_explicit(&dst->used[dev].offset, - atomic_load_explicit(&src->used[dev].offset, memory_order_relaxed), memory_order_relaxed); + atomic_load_explicit(&src->used[dev].offset, memory_order_relaxed), + memory_order_relaxed); atomic_store_explicit(&dst->monitorused[dev], - atomic_load_explicit(&src->monitorused[dev], memory_order_relaxed), memory_order_relaxed); + atomic_load_explicit(&src->monitorused[dev], memory_order_relaxed), + memory_order_relaxed); - atomic_store_explicit(&dst->device_util[dev].dec_util, - atomic_load_explicit(&src->device_util[dev].dec_util, memory_order_relaxed), memory_order_relaxed); - atomic_store_explicit(&dst->device_util[dev].enc_util, - atomic_load_explicit(&src->device_util[dev].enc_util, memory_order_relaxed), memory_order_relaxed); - atomic_store_explicit(&dst->device_util[dev].sm_util, - atomic_load_explicit(&src->device_util[dev].sm_util, memory_order_relaxed), memory_order_relaxed); + atomic_store_explicit( + &dst->device_util[dev].dec_util, + atomic_load_explicit(&src->device_util[dev].dec_util, memory_order_relaxed), + memory_order_relaxed); + atomic_store_explicit( + &dst->device_util[dev].enc_util, + atomic_load_explicit(&src->device_util[dev].enc_util, memory_order_relaxed), + memory_order_relaxed); + atomic_store_explicit( + &dst->device_util[dev].sm_util, + atomic_load_explicit(&src->device_util[dev].sm_util, memory_order_relaxed), + memory_order_relaxed); } } @@ -739,7 +758,6 @@ void exit_handler() { LOG_MSG("Exit cleanup complete for PID %d", my_pid); } - void lock_shrreg() { shared_region_t* region = region_info.shared_region; int trials = 0; @@ -764,23 +782,25 @@ void lock_shrreg() { size_t current_owner = atomic_load_explicit(®ion->owner_pid, memory_order_acquire); if (trials <= 3 || trials % 5 == 0) { // Log first 3, then every 5th - LOG_WARN("Lock shrreg timeout (trial %d/%d), owner=%ld", - trials, SEM_WAIT_RETRY_TIMES, current_owner); + LOG_WARN("Lock shrreg timeout (trial %d/%d), owner=%ld", trials, + SEM_WAIT_RETRY_TIMES, current_owner); } // SIGKILL RECOVERY: Check if owner is dead (the ONLY case where exit cleanup fails) if (current_owner != 0) { int owner_status = proc_alive((int32_t)current_owner); if (owner_status == PROC_STATE_NONALIVE) { - LOG_WARN("Owner %ld is dead (was SIGKILL'd), cleaning up stale lock", current_owner); + LOG_WARN("Owner %ld is dead (was SIGKILL'd), cleaning up stale lock", + current_owner); // Use CAS so only one process does this size_t expected = current_owner; if (atomic_compare_exchange_strong_explicit(®ion->owner_pid, &expected, 0, - memory_order_release, memory_order_acquire)) { + memory_order_release, + memory_order_acquire)) { LOG_WARN("Cleared dead owner_pid and posting semaphore"); sem_post(®ion->sem); // Unlock - usleep(10000); // 10ms for semaphore to propagate - continue; // Retry immediately + usleep(10000); // 10ms for semaphore to propagate + continue; // Retry immediately } // Another process is handling it, wait a bit usleep(100000); // 100ms @@ -837,13 +857,14 @@ int lock_postinit() { return 1; // Success } else if (errno == ETIMEDOUT) { trials++; - LOG_MSG("Waiting for postinit lock (trial %d/%d, waited %ds, PID %d)", - trials, SEM_WAIT_RETRY_TIMES_POSTINIT, trials * SEM_WAIT_TIME_POSTINIT, getpid()); + LOG_MSG("Waiting for postinit lock (trial %d/%d, waited %ds, PID %d)", trials, + SEM_WAIT_RETRY_TIMES_POSTINIT, trials * SEM_WAIT_TIME_POSTINIT, getpid()); // After many retries, give up if (trials > SEM_WAIT_RETRY_TIMES_POSTINIT) { - LOG_ERROR("Postinit lock timeout after %d seconds - another process may have crashed", - SEM_WAIT_RETRY_TIMES_POSTINIT * SEM_WAIT_TIME_POSTINIT); + LOG_ERROR( + "Postinit lock timeout after %d seconds - another process may have crashed", + SEM_WAIT_RETRY_TIMES_POSTINIT * SEM_WAIT_TIME_POSTINIT); LOG_ERROR("Skipping host PID detection for this process (will use container PID)"); return 0; // Timeout - didn't acquire lock } @@ -862,10 +883,9 @@ void unlock_postinit() { sem_post(®ion->sem_postinit); } - int clear_proc_slot_nolock(int do_clear) { int slot = 0; - int res=0; + int res = 0; int cleaned_pid_zero = 0; int cleaned_dead = 0; shared_region_t* region = region_info.shared_region; @@ -876,27 +896,34 @@ int clear_proc_slot_nolock(int do_clear) { if (pid == 0) { LOG_DEBUG("Removing slot %d with PID=0 (marked dead by exit cleanup)", slot); cleaned_pid_zero++; - res=1; + res = 1; region->proc_num--; copy_proc_slot_atomic(®ion->procs[slot], ®ion->procs[region->proc_num]); - if (region_info.my_slot != NULL && region_info.my_slot == ®ion->procs[region->proc_num]) { + if (region_info.my_slot != NULL && + region_info.my_slot == ®ion->procs[region->proc_num]) { region_info.my_slot = ®ion->procs[slot]; - atomic_store_explicit(®ion->procs[region->proc_num].seqlock, 0, memory_order_relaxed); - atomic_store_explicit(®ion->procs[region->proc_num].pid, 0, memory_order_release); - atomic_store_explicit(®ion->procs[region->proc_num].hostpid, 0, memory_order_relaxed); - atomic_store_explicit(®ion->procs[region->proc_num].status, 0, memory_order_release); + atomic_store_explicit(®ion->procs[region->proc_num].seqlock, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[region->proc_num].pid, 0, + memory_order_release); + atomic_store_explicit(®ion->procs[region->proc_num].hostpid, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[region->proc_num].status, 0, + memory_order_release); for (int dev = 0; dev < CUDA_DEVICE_MAX_COUNT; dev++) { - atomic_store_explicit(®ion->procs[region->proc_num].used[dev].total, 0, memory_order_relaxed); - atomic_store_explicit( - ®ion->procs[region->proc_num].used[dev].context_size, 0, memory_order_relaxed); - atomic_store_explicit( - ®ion->procs[region->proc_num].used[dev].module_size, 0, memory_order_relaxed); - atomic_store_explicit( - ®ion->procs[region->proc_num].used[dev].data_size, 0, memory_order_relaxed); - atomic_store_explicit( - ®ion->procs[region->proc_num].device_util[dev].sm_util, 0, memory_order_relaxed); - atomic_store_explicit(®ion->procs[region->proc_num].monitorused[dev], 0, memory_order_relaxed); + atomic_store_explicit(®ion->procs[region->proc_num].used[dev].total, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[region->proc_num].used[dev].context_size, + 0, memory_order_relaxed); + atomic_store_explicit(®ion->procs[region->proc_num].used[dev].module_size, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[region->proc_num].used[dev].data_size, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[region->proc_num].device_util[dev].sm_util, + 0, memory_order_relaxed); + atomic_store_explicit(®ion->procs[region->proc_num].monitorused[dev], 0, + memory_order_relaxed); } } __sync_synchronize(); @@ -913,24 +940,31 @@ int clear_proc_slot_nolock(int do_clear) { res = 1; region->proc_num--; copy_proc_slot_atomic(®ion->procs[slot], ®ion->procs[region->proc_num]); - if (region_info.my_slot != NULL && region_info.my_slot == ®ion->procs[region->proc_num]) { + if (region_info.my_slot != NULL && + region_info.my_slot == ®ion->procs[region->proc_num]) { region_info.my_slot = ®ion->procs[slot]; - atomic_store_explicit(®ion->procs[region->proc_num].seqlock, 0, memory_order_relaxed); - atomic_store_explicit(®ion->procs[region->proc_num].pid, 0, memory_order_release); - atomic_store_explicit(®ion->procs[region->proc_num].hostpid, 0, memory_order_relaxed); - atomic_store_explicit(®ion->procs[region->proc_num].status, 0, memory_order_release); + atomic_store_explicit(®ion->procs[region->proc_num].seqlock, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[region->proc_num].pid, 0, + memory_order_release); + atomic_store_explicit(®ion->procs[region->proc_num].hostpid, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[region->proc_num].status, 0, + memory_order_release); for (int dev = 0; dev < CUDA_DEVICE_MAX_COUNT; dev++) { - atomic_store_explicit(®ion->procs[region->proc_num].used[dev].total, 0, memory_order_relaxed); - atomic_store_explicit( - ®ion->procs[region->proc_num].used[dev].context_size, 0, memory_order_relaxed); - atomic_store_explicit( - ®ion->procs[region->proc_num].used[dev].module_size, 0, memory_order_relaxed); - atomic_store_explicit( - ®ion->procs[region->proc_num].used[dev].data_size, 0, memory_order_relaxed); - atomic_store_explicit( - ®ion->procs[region->proc_num].device_util[dev].sm_util, 0, memory_order_relaxed); - atomic_store_explicit(®ion->procs[region->proc_num].monitorused[dev], 0, memory_order_relaxed); + atomic_store_explicit(®ion->procs[region->proc_num].used[dev].total, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[region->proc_num].used[dev].context_size, + 0, memory_order_relaxed); + atomic_store_explicit(®ion->procs[region->proc_num].used[dev].module_size, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[region->proc_num].used[dev].data_size, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[region->proc_num].device_util[dev].sm_util, + 0, memory_order_relaxed); + atomic_store_explicit(®ion->procs[region->proc_num].monitorused[dev], 0, + memory_order_relaxed); } } __sync_synchronize(); @@ -942,8 +976,8 @@ int clear_proc_slot_nolock(int do_clear) { slot++; } if (cleaned_pid_zero > 0 || cleaned_dead > 0) { - LOG_INFO("Cleaned %d PID=0 slots, %d dead proc slots (proc_num now %d)", - cleaned_pid_zero, cleaned_dead, region->proc_num); + LOG_INFO("Cleaned %d PID=0 slots, %d dead proc slots (proc_num now %d)", cleaned_pid_zero, + cleaned_dead, region->proc_num); } return res; } @@ -957,25 +991,30 @@ void init_proc_slot_withlock() { if (proc_num >= SHARED_REGION_MAX_PROCESS_NUM) { exit_withlock(-1); } - signal(SIGUSR2,sig_swap_stub); - signal(SIGUSR1,sig_restore_stub); + signal(SIGUSR2, sig_swap_stub); + signal(SIGUSR1, sig_restore_stub); - // If, by any means a pid of itself is found in region->process, then it is probably caused by crashloop - // we need to reset it. - int i,found=0; - for (i=0; i < proc_num; i++) { + // If, by any means a pid of itself is found in region->process, then it is probably caused by + // crashloop we need to reset it. + int i, found = 0; + for (i = 0; i < proc_num; i++) { int32_t slot_pid = atomic_load_explicit(®ion->procs[i].pid, memory_order_acquire); if (slot_pid == current_pid) { - atomic_store_explicit(®ion->procs[i].seqlock, 0, memory_order_relaxed); // Reset seqlock + atomic_store_explicit(®ion->procs[i].seqlock, 0, + memory_order_relaxed); // Reset seqlock atomic_store_explicit(®ion->procs[i].status, 1, memory_order_release); // Zero out atomics for (int dev = 0; dev < CUDA_DEVICE_MAX_COUNT; dev++) { atomic_store_explicit(®ion->procs[i].used[dev].total, 0, memory_order_relaxed); - atomic_store_explicit(®ion->procs[i].used[dev].context_size, 0, memory_order_relaxed); - atomic_store_explicit(®ion->procs[i].used[dev].module_size, 0, memory_order_relaxed); - atomic_store_explicit(®ion->procs[i].used[dev].data_size, 0, memory_order_relaxed); - atomic_store_explicit(®ion->procs[i].device_util[dev].sm_util, 0, memory_order_relaxed); + atomic_store_explicit(®ion->procs[i].used[dev].context_size, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[i].used[dev].module_size, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[i].used[dev].data_size, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[i].device_util[dev].sm_util, 0, + memory_order_relaxed); atomic_store_explicit(®ion->procs[i].monitorused[dev], 0, memory_order_relaxed); } @@ -987,18 +1026,25 @@ void init_proc_slot_withlock() { if (!found) { // Initialize new slot with atomics - atomic_store_explicit(®ion->procs[proc_num].seqlock, 0, memory_order_relaxed); // Start with even (no write) + atomic_store_explicit(®ion->procs[proc_num].seqlock, 0, + memory_order_relaxed); // Start with even (no write) atomic_store_explicit(®ion->procs[proc_num].pid, current_pid, memory_order_release); atomic_store_explicit(®ion->procs[proc_num].hostpid, 0, memory_order_relaxed); atomic_store_explicit(®ion->procs[proc_num].status, 1, memory_order_release); for (int dev = 0; dev < CUDA_DEVICE_MAX_COUNT; dev++) { - atomic_store_explicit(®ion->procs[proc_num].used[dev].total, 0, memory_order_relaxed); - atomic_store_explicit(®ion->procs[proc_num].used[dev].context_size, 0, memory_order_relaxed); - atomic_store_explicit(®ion->procs[proc_num].used[dev].module_size, 0, memory_order_relaxed); - atomic_store_explicit(®ion->procs[proc_num].used[dev].data_size, 0, memory_order_relaxed); - atomic_store_explicit(®ion->procs[proc_num].device_util[dev].sm_util, 0, memory_order_relaxed); - atomic_store_explicit(®ion->procs[proc_num].monitorused[dev], 0, memory_order_relaxed); + atomic_store_explicit(®ion->procs[proc_num].used[dev].total, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[proc_num].used[dev].context_size, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[proc_num].used[dev].module_size, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[proc_num].used[dev].data_size, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[proc_num].device_util[dev].sm_util, 0, + memory_order_relaxed); + atomic_store_explicit(®ion->procs[proc_num].monitorused[dev], 0, + memory_order_relaxed); } region_info.my_slot = ®ion->procs[proc_num]; // Cache our slot pointer @@ -1011,47 +1057,43 @@ void init_proc_slot_withlock() { void print_all() { int i; - LOG_INFO("Total process: %d",region_info.shared_region->proc_num); - for (i=0;iproc_num;i++) { - for (int dev=0;devproc_num); + for (i = 0; i < region_info.shared_region->proc_num; i++) { + for (int dev = 0; dev < CUDA_DEVICE_MAX_COUNT; dev++) { LOG_INFO("Process %d hostPid: %d, sm: %lu, memory: %lu, record: %lu", - region_info.shared_region->procs[i].pid, - region_info.shared_region->procs[i].hostpid, - region_info.shared_region->procs[i].device_util[dev].sm_util, - region_info.shared_region->procs[i].monitorused[dev], - region_info.shared_region->procs[i].used[dev].total); + region_info.shared_region->procs[i].pid, + region_info.shared_region->procs[i].hostpid, + region_info.shared_region->procs[i].device_util[dev].sm_util, + region_info.shared_region->procs[i].monitorused[dev], + region_info.shared_region->procs[i].used[dev].total); } } } void child_reinit_flag() { - LOG_DEBUG("Detect child pid: %d -> %d", region_info.pid, getpid()); + LOG_DEBUG("Detect child pid: %d -> %d", region_info.pid, getpid()); region_info.init_status = PTHREAD_ONCE_INIT; } int set_active_oom_killer() { - char *oom_killer_env; + char* oom_killer_env; oom_killer_env = getenv("ACTIVE_OOM_KILLER"); - if (oom_killer_env!=NULL){ - if (strcmp(oom_killer_env,"false") == 0) - return 0; - if (strcmp(oom_killer_env,"true") == 0) - return 1; - if (strcmp(oom_killer_env,"0")==0) - return 0; - if (strcmp(oom_killer_env,"1")==0) - return 1; + if (oom_killer_env != NULL) { + if (strcmp(oom_killer_env, "false") == 0) return 0; + if (strcmp(oom_killer_env, "true") == 0) return 1; + if (strcmp(oom_killer_env, "0") == 0) return 0; + if (strcmp(oom_killer_env, "1") == 0) return 1; } return 1; } int set_env_utilization_switch() { - char *utilization_env; + char* utilization_env; utilization_env = getenv("GPU_CORE_UTILIZATION_POLICY"); - if (utilization_env!=NULL){ - if ((strcmp(utilization_env,"FORCE") ==0 ) || (strcmp(utilization_env,"force") ==0)) + if (utilization_env != NULL) { + if ((strcmp(utilization_env, "FORCE") == 0) || (strcmp(utilization_env, "force") == 0)) return 1; - if ((strcmp(utilization_env,"DISABLE") ==0 ) || (strcmp(utilization_env,"disable") ==0 )) + if ((strcmp(utilization_env, "DISABLE") == 0) || (strcmp(utilization_env, "disable") == 0)) return 2; } return 0; @@ -1082,7 +1124,7 @@ void try_create_shrreg() { shr_reg_file = MULTIPROCESS_SHARED_REGION_CACHE_DEFAULT; } // Initialize NVML BEFORE!! open it - //nvmlInit(); + // nvmlInit(); /* If you need sm modification, do it here */ /* ... set_sm_scale */ @@ -1103,9 +1145,8 @@ void try_create_shrreg() { if (lseek(fd, 0, SEEK_SET) != 0) { LOG_ERROR("Fail to reseek shrreg %s: errno=%d", shr_reg_file, errno); } - region_info.shared_region = (shared_region_t*) mmap( - NULL, SHARED_REGION_SIZE_MAGIC, - PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); + region_info.shared_region = (shared_region_t*)mmap(NULL, SHARED_REGION_SIZE_MAGIC, + PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); shared_region_t* region = region_info.shared_region; if (region == NULL) { LOG_ERROR("Fail to map shrreg %s: errno=%d", shr_reg_file, errno); @@ -1113,15 +1154,13 @@ void try_create_shrreg() { if (lockf(fd, F_LOCK, SHARED_REGION_SIZE_MAGIC) != 0) { LOG_ERROR("Fail to lock shrreg %s: errno=%d", shr_reg_file, errno); } - //put_device_info(); + // put_device_info(); int32_t init_flag = atomic_load_explicit(®ion->initialized_flag, memory_order_acquire); if (init_flag != MULTIPROCESS_SHARED_REGION_MAGIC_FLAG) { region->major_version = MAJOR_VERSION; region->minor_version = MINOR_VERSION; - do_init_device_memory_limits( - region->limit, CUDA_DEVICE_MAX_COUNT); - do_init_device_sm_limits( - region->sm_limit,CUDA_DEVICE_MAX_COUNT); + do_init_device_memory_limits(region->limit, CUDA_DEVICE_MAX_COUNT); + do_init_device_sm_limits(region->sm_limit, CUDA_DEVICE_MAX_COUNT); if (sem_init(®ion->sem, 1, 1) != 0) { LOG_ERROR("Fail to init sem %s: errno=%d", shr_reg_file, errno); } @@ -1134,37 +1173,39 @@ void try_create_shrreg() { atomic_store_explicit(®ion->recent_kernel, 2, memory_order_relaxed); atomic_store_explicit(®ion->proc_num, 0, memory_order_relaxed); region->priority = 1; - if (getenv(CUDA_TASK_PRIORITY_ENV)!=NULL) + if (getenv(CUDA_TASK_PRIORITY_ENV) != NULL) region->priority = atoi(getenv(CUDA_TASK_PRIORITY_ENV)); // Release barrier ensures all initialization is visible before flag is set atomic_thread_fence(memory_order_release); - atomic_store_explicit(®ion->initialized_flag, MULTIPROCESS_SHARED_REGION_MAGIC_FLAG, memory_order_release); + atomic_store_explicit(®ion->initialized_flag, MULTIPROCESS_SHARED_REGION_MAGIC_FLAG, + memory_order_release); } else { - if (region->major_version != MAJOR_VERSION || - region->minor_version != MINOR_VERSION) { - LOG_ERROR("The current version number %d.%d" - " is different from the file's version number %d.%d", - MAJOR_VERSION, MINOR_VERSION, - region->major_version, region->minor_version); + if (region->major_version != MAJOR_VERSION || region->minor_version != MINOR_VERSION) { + LOG_ERROR( + "The current version number %d.%d" + " is different from the file's version number %d.%d", + MAJOR_VERSION, MINOR_VERSION, region->major_version, region->minor_version); } uint64_t local_limits[CUDA_DEVICE_MAX_COUNT]; do_init_device_memory_limits(local_limits, CUDA_DEVICE_MAX_COUNT); int i; for (i = 0; i < CUDA_DEVICE_MAX_COUNT; ++i) { if (local_limits[i] != region->limit[i]) { - LOG_ERROR("Limit inconsistency detected for %dth device" - ", %lu expected, get %lu", + LOG_ERROR( + "Limit inconsistency detected for %dth device" + ", %lu expected, get %lu", i, local_limits[i], region->limit[i]); } } - do_init_device_sm_limits(local_limits,CUDA_DEVICE_MAX_COUNT); + do_init_device_sm_limits(local_limits, CUDA_DEVICE_MAX_COUNT); for (i = 0; i < CUDA_DEVICE_MAX_COUNT; ++i) { if (local_limits[i] != region->sm_limit[i]) { - LOG_INFO("SM limit inconsistency detected for %dth device" - ", %lu expected, get %lu", + LOG_INFO( + "SM limit inconsistency detected for %dth device" + ", %lu expected, get %lu", i, local_limits[i], region->sm_limit[i]); - // exit(1); + // exit(1); } } } @@ -1185,34 +1226,31 @@ void initialized() { init_proc_slot_withlock(); } -void ensure_initialized() { - (void) pthread_once(®ion_info.init_status, initialized); -} +void ensure_initialized() { (void)pthread_once(®ion_info.init_status, initialized); } int update_host_pid() { int i; - for (i=0;iproc_num;i++){ - if (region_info.shared_region->procs[i].pid == getpid()){ - if (region_info.shared_region->procs[i].hostpid!=0) - pidfound=1; + for (i = 0; i < region_info.shared_region->proc_num; i++) { + if (region_info.shared_region->procs[i].pid == getpid()) { + if (region_info.shared_region->procs[i].hostpid != 0) pidfound = 1; } } return 0; } int set_host_pid(int hostpid) { - int i,j,found=0; - for (i=0;iproc_num;i++){ - if (region_info.shared_region->procs[i].pid == getpid()){ - LOG_INFO("SET PID= %d",hostpid); - found=1; + int i, j, found = 0; + for (i = 0; i < region_info.shared_region->proc_num; i++) { + if (region_info.shared_region->procs[i].pid == getpid()) { + LOG_INFO("SET PID= %d", hostpid); + found = 1; region_info.shared_region->procs[i].hostpid = hostpid; - for (j=0;jprocs[i].monitorused[j]=0; + for (j = 0; j < CUDA_DEVICE_MAX_COUNT; j++) + region_info.shared_region->procs[i].monitorused[j] = 0; } } if (!found) { - LOG_ERROR("HOST PID NOT FOUND. %d",hostpid); + LOG_ERROR("HOST PID NOT FOUND. %d", hostpid); return -1; } setspec(); @@ -1221,12 +1259,12 @@ int set_host_pid(int hostpid) { int set_current_device_sm_limit_scale(int dev, int scale) { ensure_initialized(); - if (region_info.shared_region->sm_init_flag==1) return 0; + if (region_info.shared_region->sm_init_flag == 1) return 0; if (dev < 0 || dev >= CUDA_DEVICE_MAX_COUNT) { LOG_ERROR("Illegal device id: %d", dev); } - LOG_INFO("dev %d new sm limit set mul by %d",dev,scale); - region_info.shared_region->sm_limit[dev]=region_info.shared_region->sm_limit[dev]*scale; + LOG_INFO("dev %d new sm limit set mul by %d", dev, scale); + region_info.shared_region->sm_limit[dev] = region_info.shared_region->sm_limit[dev] * scale; region_info.shared_region->sm_init_flag = 1; return 0; } @@ -1239,14 +1277,14 @@ int get_current_device_sm_limit(int dev) { return region_info.shared_region->sm_limit[dev]; } -int set_current_device_memory_limit(const int dev,size_t newlimit) { +int set_current_device_memory_limit(const int dev, size_t newlimit) { ensure_initialized(); if (dev < 0 || dev >= CUDA_DEVICE_MAX_COUNT) { LOG_ERROR("Illegal device id: %d", dev); } - LOG_INFO("dev %d new limit set to %ld",dev,newlimit); - region_info.shared_region->limit[dev]=newlimit; - return 0; + LOG_INFO("dev %d new limit set to %ld", dev, newlimit); + region_info.shared_region->limit[dev] = newlimit; + return 0; } uint64_t get_current_device_memory_limit(const int dev) { @@ -1254,7 +1292,7 @@ uint64_t get_current_device_memory_limit(const int dev) { if (dev < 0 || dev >= CUDA_DEVICE_MAX_COUNT) { LOG_ERROR("Illegal device id: %d", dev); } - return region_info.shared_region->limit[dev]; + return region_info.shared_region->limit[dev]; } uint64_t get_current_device_memory_monitor(const int dev) { @@ -1263,12 +1301,12 @@ uint64_t get_current_device_memory_monitor(const int dev) { LOG_ERROR("Illegal device id: %d", dev); } uint64_t result = get_gpu_memory_monitor(dev); -// result= nvml_get_device_memory_usage(dev); + // result= nvml_get_device_memory_usage(dev); return result; } uint64_t get_current_device_memory_usage(const int dev) { - clock_t start,finish; + clock_t start, finish; uint64_t result; start = clock(); ensure_initialized(); @@ -1276,50 +1314,44 @@ uint64_t get_current_device_memory_usage(const int dev) { LOG_ERROR("Illegal device id: %d", dev); } result = get_gpu_memory_usage(dev); -// result= nvml_get_device_memory_usage(dev); - finish=clock(); - LOG_DEBUG("get_current_device_memory_usage:tick=%lu result=%lu\n",finish-start,result); + // result= nvml_get_device_memory_usage(dev); + finish = clock(); + LOG_DEBUG("get_current_device_memory_usage:tick=%lu result=%lu\n", finish - start, result); return result; } -int get_current_priority() { - return region_info.shared_region->priority; -} +int get_current_priority() { return region_info.shared_region->priority; } -int get_recent_kernel(){ - return region_info.shared_region->recent_kernel; -} +int get_recent_kernel() { return region_info.shared_region->recent_kernel; } -int set_recent_kernel(int value){ - region_info.shared_region->recent_kernel=value; +int set_recent_kernel(int value) { + region_info.shared_region->recent_kernel = value; return 0; } int get_utilization_switch() { - if (env_utilization_switch==1) - return 1; - if (env_utilization_switch==2) - return 0; - return region_info.shared_region->utilization_switch; + if (env_utilization_switch == 1) return 1; + if (env_utilization_switch == 2) return 0; + return region_info.shared_region->utilization_switch; } -void suspend_all(){ +void suspend_all() { int i; - for (i=0;iproc_num;i++){ - LOG_INFO("Sending USR2 to %d",region_info.shared_region->procs[i].pid); - kill(region_info.shared_region->procs[i].pid,SIGUSR2); + for (i = 0; i < region_info.shared_region->proc_num; i++) { + LOG_INFO("Sending USR2 to %d", region_info.shared_region->procs[i].pid); + kill(region_info.shared_region->procs[i].pid, SIGUSR2); } } -void resume_all(){ +void resume_all() { int i; - for (i=0;iproc_num;i++){ - LOG_INFO("Sending USR1 to %d",region_info.shared_region->procs[i].pid); - kill(region_info.shared_region->procs[i].pid,SIGUSR1); + for (i = 0; i < region_info.shared_region->proc_num; i++) { + LOG_INFO("Sending USR1 to %d", region_info.shared_region->procs[i].pid); + kill(region_info.shared_region->procs[i].pid, SIGUSR1); } } -int wait_status_self(int status){ +int wait_status_self(int status) { // Fast path: use cached slot pointer (set during init_proc_slot_withlock) if (region_info.my_slot != NULL) { int32_t cur = atomic_load_explicit(®ion_info.my_slot->status, memory_order_acquire); @@ -1330,10 +1362,12 @@ int wait_status_self(int status){ int i; int proc_num = atomic_load_explicit(®ion_info.shared_region->proc_num, memory_order_acquire); int32_t my_pid = getpid(); - for (i=0; i < proc_num; i++) { - int32_t slot_pid = atomic_load_explicit(®ion_info.shared_region->procs[i].pid, memory_order_acquire); + for (i = 0; i < proc_num; i++) { + int32_t slot_pid = + atomic_load_explicit(®ion_info.shared_region->procs[i].pid, memory_order_acquire); if (slot_pid == my_pid) { - if (atomic_load_explicit(®ion_info.shared_region->procs[i].status, memory_order_acquire) == status) + if (atomic_load_explicit(®ion_info.shared_region->procs[i].status, + memory_order_acquire) == status) return 1; else return 0; @@ -1342,37 +1376,37 @@ int wait_status_self(int status){ return -1; } -int wait_status_all(int status){ +int wait_status_all(int status) { int i; int released = 1; - for (i=0;iproc_num;i++) { - LOG_INFO("i=%d pid=%d status=%d",i,region_info.shared_region->procs[i].pid,region_info.shared_region->procs[i].status); - if ((region_info.shared_region->procs[i].status!=status) && (region_info.shared_region->procs[i].pid!=getpid())) - released = 0; - } - LOG_INFO("Return released=%d",released); + for (i = 0; i < region_info.shared_region->proc_num; i++) { + LOG_INFO("i=%d pid=%d status=%d", i, region_info.shared_region->procs[i].pid, + region_info.shared_region->procs[i].status); + if ((region_info.shared_region->procs[i].status != status) && + (region_info.shared_region->procs[i].pid != getpid())) + released = 0; + } + LOG_INFO("Return released=%d", released); return released; } -shrreg_proc_slot_t *find_proc_by_hostpid(int hostpid) { +shrreg_proc_slot_t* find_proc_by_hostpid(int hostpid) { int i; - for (i=0;iproc_num;i++) { - if (region_info.shared_region->procs[i].hostpid == hostpid) + for (i = 0; i < region_info.shared_region->proc_num; i++) { + if (region_info.shared_region->procs[i].hostpid == hostpid) return ®ion_info.shared_region->procs[i]; } return NULL; } - -int comparelwr(const char *s1,char *s2){ - if ((s1==NULL) || (s2==NULL)) - return 1; - if (strlen(s1)!=strlen(s2)) { +int comparelwr(const char* s1, char* s2) { + if ((s1 == NULL) || (s2 == NULL)) return 1; + if (strlen(s1) != strlen(s2)) { return 1; } int i; - for (i=0;i -#include -#include -#include -#include +#include +#include #include +#include +#include +#include +#include #include #include -#include -#include -#include -#include #include +#include #include -#include -#include -#include +#include +#include +#include +#include +#include -#include "static_config.h" #include "include/log_utils.h" +#include "static_config.h" - -#define MULTIPROCESS_SHARED_REGION_MAGIC_FLAG 19920718 -#define MULTIPROCESS_SHARED_REGION_CACHE_ENV "CUDA_DEVICE_MEMORY_SHARED_CACHE" -#define MULTIPROCESS_SHARED_REGION_CACHE_DEFAULT "/tmp/cudevshr.cache" +#define MULTIPROCESS_SHARED_REGION_MAGIC_FLAG 19920718 +#define MULTIPROCESS_SHARED_REGION_CACHE_ENV "CUDA_DEVICE_MEMORY_SHARED_CACHE" +#define MULTIPROCESS_SHARED_REGION_CACHE_DEFAULT "/tmp/cudevshr.cache" #define ENV_OVERRIDE_FILE "/overrideEnv" #define CUDA_TASK_PRIORITY_ENV "CUDA_TASK_PRIORITY" @@ -34,7 +33,7 @@ #define CUDA_DEVICE_MEMORY_UPDATE_FAILURE 1 #define MEMORY_LIMIT_TOLERATION_RATE 1.1 -#define SHARED_REGION_SIZE_MAGIC sizeof(shared_region_t) +#define SHARED_REGION_SIZE_MAGIC sizeof(shared_region_t) #define SHARED_REGION_MAX_PROCESS_NUM 1024 // macros for debugging @@ -51,7 +50,7 @@ #define SEQ_AFTER_DEC 9 #ifndef SEQ_POINT_MARK - #define SEQ_POINT_MARK(s) +#define SEQ_POINT_MARK(s) #endif #define FACTOR 32 @@ -76,13 +75,13 @@ typedef struct { } device_util_t; typedef struct { - _Atomic int32_t pid; // Atomic to detect slot allocation + _Atomic int32_t pid; // Atomic to detect slot allocation _Atomic int32_t hostpid; device_memory_t used[CUDA_DEVICE_MAX_COUNT]; _Atomic uint64_t monitorused[CUDA_DEVICE_MAX_COUNT]; device_util_t device_util[CUDA_DEVICE_MAX_COUNT]; _Atomic int32_t status; - _Atomic uint64_t seqlock; // Sequence lock for consistent snapshots + _Atomic uint64_t seqlock; // Sequence lock for consistent snapshots uint64_t unused[2]; } shrreg_proc_slot_t; @@ -112,24 +111,23 @@ typedef struct { int32_t pid; int fd; pthread_once_t init_status; - shared_region_t* shared_region; - uint64_t last_kernel_time; // cache for current process - shrreg_proc_slot_t* my_slot; // Cached pointer to this process's slot (lock-free access) + shared_region_t *shared_region; + uint64_t last_kernel_time; // cache for current process + shrreg_proc_slot_t *my_slot; // Cached pointer to this process's slot (lock-free access) } shared_region_info_t; - typedef struct { - size_t tid; - CUcontext ctx; + size_t tid; + CUcontext ctx; } thread_context_map; void ensure_initialized(); int get_current_device_sm_limit(int dev); uint64_t get_current_device_memory_limit(const int dev); -int set_current_device_memory_limit(const int dev,size_t newlimit); -int set_current_device_sm_limit(int dev,int scale); -int set_current_device_sm_limit_scale(int dev,int scale); +int set_current_device_memory_limit(const int dev, size_t newlimit); +int set_current_device_sm_limit(int dev, int scale); +int set_current_device_sm_limit_scale(int dev, int scale); int update_host_pid(); int set_host_pid(int hostpid); @@ -144,11 +142,11 @@ int get_recent_kernel(); int get_utilization_switch(); int set_env_utilization_switch(); -int set_gpu_device_memory_monitor(int32_t pid,int dev,size_t monitor); -int set_gpu_device_sm_utilization(int32_t pid,int dev, unsigned int smUtil); +int set_gpu_device_memory_monitor(int32_t pid, int dev, size_t monitor); +int set_gpu_device_sm_utilization(int32_t pid, int dev, unsigned int smUtil); int init_gpu_device_utilization(); -int add_gpu_device_memory_usage(int32_t pid,int dev,size_t usage,int type); -int rm_gpu_device_memory_usage(int32_t pid,int dev,size_t usage,int type); +int add_gpu_device_memory_usage(int32_t pid, int dev, size_t usage, int type); +int rm_gpu_device_memory_usage(int32_t pid, int dev, size_t usage, int type); shrreg_proc_slot_t *find_proc_by_hostpid(int hostpid); int active_oom_killer(); @@ -158,10 +156,10 @@ int shrreg_major_version(); int shrreg_minor_version(); int init_device_info(); -//void inc_current_device_memory_usage(const int dev, const uint64_t usage); -//void decl_current_device_memory_usage(const int dev, const uint64_t usage); +// void inc_current_device_memory_usage(const int dev, const uint64_t usage); +// void decl_current_device_memory_usage(const int dev, const uint64_t usage); -//int oom_check(const int dev,int addon); +// int oom_check(const int dev,int addon); void lock_shrreg(); void unlock_shrreg(); @@ -169,9 +167,9 @@ void unlock_shrreg(); int lock_postinit(); // Returns 1 on success, 0 on timeout void unlock_postinit(); -//Setspec of the corresponding device +// Setspec of the corresponding device int setspec(); -//Remove quit process +// Remove quit process void suspend_all(); void resume_all(); @@ -180,7 +178,7 @@ int wait_status_all(int status); void print_all(); int load_env_from_file(char *filename); -int comparelwr(const char *s1,char *s2); +int comparelwr(const char *s1, char *s2); int put_device_info(); unsigned int nvml_to_cuda_map(unsigned int nvmldev); unsigned int cuda_to_nvml_map(unsigned int cudadev); diff --git a/src/multiprocess/multiprocess_utilization_watcher.c b/src/multiprocess/multiprocess_utilization_watcher.c index b4620d30..c80095be 100644 --- a/src/multiprocess/multiprocess_utilization_watcher.c +++ b/src/multiprocess/multiprocess_utilization_watcher.c @@ -1,29 +1,27 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "multiprocess/multiprocess_utilization_watcher.h" #include -#include "include/nvml_prefix.h" +#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include #include +#include +#include -#include "multiprocess/multiprocess_memory_limit.h" -#include "multiprocess/multiprocess_utilization_watcher.h" #include "include/log_utils.h" #include "include/nvml_override.h" - +#include "include/nvml_prefix.h" +#include "multiprocess/multiprocess_memory_limit.h" static int g_sm_num[CUDA_DEVICE_MAX_COUNT]; static int g_max_thread_per_sm[CUDA_DEVICE_MAX_COUNT]; @@ -37,89 +35,83 @@ static int cached_sm_limit[CUDA_DEVICE_MAX_COUNT] = {0}; static int cached_util_switch = 0; void rate_limiter(int grids, int blocks) { - CUdevice current_device; - CUresult res = cuCtxGetDevice(¤t_device); - int device_id = (res == CUDA_SUCCESS) ? (int)current_device : 0; - - int64_t before_cuda_cores = 0; - int64_t after_cuda_cores = 0; - int64_t kernel_size = grids; - - /* Fast exit using cached values — no shared memory access needed */ - if (cached_sm_limit[device_id] >= 100 || cached_sm_limit[device_id] == 0) { - return; - } - if (cached_util_switch == 0) { - return; - } - - while (get_recent_kernel()<0) { - sleep(1); - } - set_recent_kernel(2); - - do { -CHECK: - before_cuda_cores = g_cur_cuda_cores[device_id]; - if (before_cuda_cores < 0) { - nanosleep(&g_cycle, NULL); - goto CHECK; - } - after_cuda_cores = before_cuda_cores - kernel_size; - } while (!CAS(&g_cur_cuda_cores[device_id], before_cuda_cores, after_cuda_cores)); + CUdevice current_device; + CUresult res = cuCtxGetDevice(¤t_device); + int device_id = (res == CUDA_SUCCESS) ? (int)current_device : 0; + + int64_t before_cuda_cores = 0; + int64_t after_cuda_cores = 0; + int64_t kernel_size = grids; + + /* Fast exit using cached values — no shared memory access needed */ + if (cached_sm_limit[device_id] >= 100 || cached_sm_limit[device_id] == 0) { + return; + } + if (cached_util_switch == 0) { + return; + } + + while (get_recent_kernel() < 0) { + sleep(1); + } + set_recent_kernel(2); + + do { + CHECK: + before_cuda_cores = g_cur_cuda_cores[device_id]; + if (before_cuda_cores < 0) { + nanosleep(&g_cycle, NULL); + goto CHECK; + } + after_cuda_cores = before_cuda_cores - kernel_size; + } while (!CAS(&g_cur_cuda_cores[device_id], before_cuda_cores, after_cuda_cores)); } static void change_token(int64_t delta, int device_id) { - int64_t cuda_cores_before = 0, cuda_cores_after = 0; + int64_t cuda_cores_before = 0, cuda_cores_after = 0; - LOG_DEBUG("device %d: delta: %ld, curr: %ld", device_id, delta, g_cur_cuda_cores[device_id]); - do { - cuda_cores_before = g_cur_cuda_cores[device_id]; - cuda_cores_after = cuda_cores_before + delta; + LOG_DEBUG("device %d: delta: %ld, curr: %ld", device_id, delta, g_cur_cuda_cores[device_id]); + do { + cuda_cores_before = g_cur_cuda_cores[device_id]; + cuda_cores_after = cuda_cores_before + delta; - if (cuda_cores_after > g_total_cuda_cores[device_id]) { - cuda_cores_after = g_total_cuda_cores[device_id]; - } - } while (!CAS(&g_cur_cuda_cores[device_id], cuda_cores_before, cuda_cores_after)); + if (cuda_cores_after > g_total_cuda_cores[device_id]) { + cuda_cores_after = g_total_cuda_cores[device_id]; + } + } while (!CAS(&g_cur_cuda_cores[device_id], cuda_cores_before, cuda_cores_after)); } static int64_t delta(int up_limit, int user_current, int64_t share, int device_id) { - int utilization_diff = - abs(up_limit - user_current) < 5 ? 5 : abs(up_limit - user_current); - int64_t increment = - (int64_t)g_sm_num[device_id] * (int64_t)g_sm_num[device_id] * - (int64_t)g_max_thread_per_sm[device_id] * (int64_t)utilization_diff / 2560; - - /* Accelerate cuda cores allocation when utilization vary widely */ - if (utilization_diff > up_limit / 2) { - increment = increment * utilization_diff * 2 / (up_limit + 1); - } - - if (user_current <= up_limit) { - share = (share + increment) > g_total_cuda_cores[device_id] - ? g_total_cuda_cores[device_id] - : (share + increment); - } else { - share = (share - increment) < 0 ? 0 : (share - increment); - } - - return share; + int utilization_diff = abs(up_limit - user_current) < 5 ? 5 : abs(up_limit - user_current); + int64_t increment = (int64_t)g_sm_num[device_id] * (int64_t)g_sm_num[device_id] * + (int64_t)g_max_thread_per_sm[device_id] * (int64_t)utilization_diff / 2560; + + /* Accelerate cuda cores allocation when utilization vary widely */ + if (utilization_diff > up_limit / 2) { + increment = increment * utilization_diff * 2 / (up_limit + 1); + } + + if (user_current <= up_limit) { + share = (share + increment) > g_total_cuda_cores[device_id] ? g_total_cuda_cores[device_id] + : (share + increment); + } else { + share = (share - increment) < 0 ? 0 : (share - increment); + } + + return share; } -unsigned int nvml_to_cuda_map(unsigned int nvmldev){ +unsigned int nvml_to_cuda_map(unsigned int nvmldev) { unsigned int devcount; CHECK_NVML_API(nvmlDeviceGetCount_v2(&devcount)); - int i=0; - for (i=0;imonitorused[cudadev] = infos[i].usedGpuMemory; - } + int devi, cudadev; + for (devi = 0; devi < nvmlCounts; devi++) { + uint64_t sum = 0; + infcount = SHARED_REGION_MAX_PROCESS_NUM; + shrreg_proc_slot_t *proc; + cudadev = nvml_to_cuda_map((unsigned int)(devi)); + if (cudadev < 0) continue; + userutil[cudadev] = 0; + nvmlDevice_t device; + CHECK_NVML_API(nvmlDeviceGetHandleByIndex(cudadev, &device)); + + // OPTIMIZATION: Do slow NVML queries WITHOUT holding lock + // This prevents blocking memory allocation operations + + // Get Memory for container + nvmlReturn_t res = nvmlDeviceGetComputeRunningProcesses(device, &infcount, infos); + + // Get SM util for container + gettimeofday(&cur, NULL); + microsec = (cur.tv_sec - 1) * 1000UL * 1000UL + cur.tv_usec; + nvmlProcessUtilizationSample_t processes_sample[SHARED_REGION_MAX_PROCESS_NUM]; + unsigned int processes_num = SHARED_REGION_MAX_PROCESS_NUM; + nvmlReturn_t res2 = + nvmlDeviceGetProcessUtilization(device, processes_sample, &processes_num, microsec); + + // Now acquire lock only for the brief period needed to update shared memory + lock_shrreg(); + + if (res == NVML_SUCCESS) { + for (i = 0; i < infcount; i++) { + proc = find_proc_by_hostpid(infos[i].pid); + if (proc != NULL) { + proc->monitorused[cudadev] = infos[i].usedGpuMemory; + } + } } - } - - if (res2 == NVML_SUCCESS) { - for (i=0; idevice_util[cudadev].sm_util = processes_sample[i].smUtil; - } + + if (res2 == NVML_SUCCESS) { + for (i = 0; i < processes_num; i++) { + proc = find_proc_by_hostpid(processes_sample[i].pid); + if (proc != NULL) { + sum += processes_sample[i].smUtil; + proc->device_util[cudadev].sm_util = processes_sample[i].smUtil; + } + } } - } - unlock_shrreg(); + unlock_shrreg(); - if (sum < 0) - sum = 0; - userutil[cudadev] = sum; + if (sum < 0) sum = 0; + userutil[cudadev] = sum; } return 0; } -void* utilization_watcher() { +void *utilization_watcher() { nvmlInit(); int userutil[CUDA_DEVICE_MAX_COUNT]; int sysprocnum; @@ -222,17 +213,16 @@ void* utilization_watcher() { ensure_initialized(); - while (1){ + while (1) { nanosleep(&g_wait, NULL); - if (pidfound==0) { - update_host_pid(); - if (pidfound==0) - continue; + if (pidfound == 0) { + update_host_pid(); + if (pidfound == 0) continue; } cached_util_switch = get_utilization_switch(); LOG_INFO("init_utilization_watcher: util_switch=%d", cached_util_switch); init_gpu_device_utilization(); - get_used_gpu_utilization(userutil,&sysprocnum); + get_used_gpu_utilization(userutil, &sysprocnum); // Calculate independently for each device for (unsigned int dev = 0; dev < device_count && dev < CUDA_DEVICE_MAX_COUNT; dev++) { @@ -241,17 +231,17 @@ void* utilization_watcher() { } if ((share[dev] == g_total_cuda_cores[dev]) && (g_cur_cuda_cores[dev] < 0)) { - g_total_cuda_cores[dev] *= 2; - share[dev] = g_total_cuda_cores[dev]; + g_total_cuda_cores[dev] *= 2; + share[dev] = g_total_cuda_cores[dev]; } if ((userutil[dev] <= 100) && (userutil[dev] >= 0)) { - share[dev] = delta(cached_sm_limit[dev], userutil[dev], share[dev], dev); - change_token(share[dev], dev); + share[dev] = delta(cached_sm_limit[dev], userutil[dev], share[dev], dev); + change_token(share[dev], dev); } - LOG_INFO("device %d: userutil=%d currentcores=%ld total=%ld limit=%d share=%ld\n", - dev, userutil[dev], g_cur_cuda_cores[dev], g_total_cuda_cores[dev], + LOG_INFO("device %d: userutil=%d currentcores=%ld total=%ld limit=%d share=%ld\n", dev, + userutil[dev], g_cur_cuda_cores[dev], g_total_cuda_cores[dev], cached_sm_limit[dev], share[dev]); } } @@ -282,4 +272,3 @@ void init_utilization_watcher() { } return; } - diff --git a/src/multiprocess/multiprocess_utilization_watcher.h b/src/multiprocess/multiprocess_utilization_watcher.h index c7411f3b..791a907d 100644 --- a/src/multiprocess/multiprocess_utilization_watcher.h +++ b/src/multiprocess/multiprocess_utilization_watcher.h @@ -16,7 +16,6 @@ static const struct timespec g_wait = { .tv_nsec = 120 * MILLISEC, }; - void rate_limiter(int grids, int blocks); void init_utilization_watcher(); void* utilization_watcher(); diff --git a/src/multiprocess/shrreg_tool.c b/src/multiprocess/shrreg_tool.c old mode 100755 new mode 100644 index 78c14016..14f6caca --- a/src/multiprocess/shrreg_tool.c +++ b/src/multiprocess/shrreg_tool.c @@ -1,62 +1,55 @@ +#include #include -#include -#include #include -#include +#include #include +#include #include "include/memory_limit.h" - void create_new() { load_env_from_file(ENV_OVERRIDE_FILE); umask(000); - char* shrreg_file = getenv(MULTIPROCESS_SHARED_REGION_CACHE_ENV); + char* shrreg_file = getenv(MULTIPROCESS_SHARED_REGION_CACHE_ENV); if (shrreg_file == NULL) { shrreg_file = MULTIPROCESS_SHARED_REGION_CACHE_DEFAULT; } int fd = open(shrreg_file, O_RDWR | O_CREAT | O_TRUNC, 0666); if (fd < 0) { - LOG_ERROR("Fail to create new shrreg file\n"); + LOG_ERROR("Fail to create new shrreg file\n"); assert(0); } close(fd); ensure_initialized(); } - -void print_shared_region(){ +void print_shared_region() { ensure_initialized(); print_all(); } - -void send_stop_signal(){ +void send_stop_signal() { ensure_initialized(); suspend_all(); - while (1){ - if (wait_status_all(2)) - break; + while (1) { + if (wait_status_all(2)) break; sleep(1); } } -void send_resume_signal(){ +void send_resume_signal() { ensure_initialized(); resume_all(); - while (1){ - if (wait_status_all(1)) - break; + while (1) { + if (wait_status_all(1)) break; sleep(1); } } int main(int argc, char* argv[]) { - int k; + int k; if (argc > 1 && strcmp(argv[1], "--help") == 0) { - printf( - "--create_new Create new shared region file\n" - ); + printf("--create_new Create new shared region file\n"); return 0; } @@ -65,25 +58,22 @@ int main(int argc, char* argv[]) { if (strcmp(arg, "--create_new") == 0) { create_new(); } - if (strcmp(arg, "--suspend") == 0){ + if (strcmp(arg, "--suspend") == 0) { send_stop_signal(); } - if (strcmp(arg, "--print") == 0){ + if (strcmp(arg, "--print") == 0) { print_shared_region(); } - if (strcmp(arg, "--resume") == 0){ + if (strcmp(arg, "--resume") == 0) { send_resume_signal(); } - if (strcmp(arg, "--print") == 0){ + if (strcmp(arg, "--print") == 0) { print_shared_region(); } - if (strcmp(arg, "--version") == 0){ - printf("shrreg size: %ld, version %d.%d\n", - sizeof(shared_region_t), - shrreg_major_version(), - shrreg_minor_version()); + if (strcmp(arg, "--version") == 0) { + printf("shrreg size: %ld, version %d.%d\n", sizeof(shared_region_t), + shrreg_major_version(), shrreg_minor_version()); } } return 0; } - diff --git a/src/nvml/hook.c b/src/nvml/hook.c index 0bc16d31..67ab6046 100644 --- a/src/nvml/hook.c +++ b/src/nvml/hook.c @@ -1,6 +1,7 @@ -#include #include #include +#include + #include "include/libnvml_hook.h" #include "include/nvml-subset.h" #include "include/utils.h" @@ -262,24 +263,22 @@ extern fp_dlsym real_dlsym; extern int virtual_nvml_devices; extern int cuda_to_nvml_map_array[CUDA_DEVICE_MAX_COUNT]; -nvmlReturn_t nvmlDeviceGetIndex(nvmlDevice_t device, unsigned int *index) { +nvmlReturn_t nvmlDeviceGetIndex(nvmlDevice_t device, unsigned int* index) { return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetIndex, device, index); } - void load_nvml_libraries() { - void *table = NULL; + void* table = NULL; char driver_filename[FILENAME_MAX]; if (real_dlsym == NULL) { - real_dlsym = dlvsym(RTLD_NEXT,"dlsym","GLIBC_2.2.5"); + real_dlsym = dlvsym(RTLD_NEXT, "dlsym", "GLIBC_2.2.5"); if (real_dlsym == NULL) { - void *libc_handle = dlopen("libc.so.6", RTLD_LAZY); + void* libc_handle = dlopen("libc.so.6", RTLD_LAZY); if (libc_handle) { real_dlsym = dlsym(libc_handle, "dlsym"); } - if (real_dlsym == NULL) - LOG_ERROR("real dlsym not found"); + if (real_dlsym == NULL) LOG_ERROR("real dlsym not found"); } } snprintf(driver_filename, FILENAME_MAX - 1, "%s", "libnvidia-ml.so.1"); @@ -287,15 +286,14 @@ void load_nvml_libraries() { table = dlopen(driver_filename, RTLD_NOW | RTLD_NODELETE); if (!table) { - LOG_WARN("can't find library %s", driver_filename); + LOG_WARN("can't find library %s", driver_filename); } int i; for (i = 0; i < NVML_ENTRY_END; i++) { - LOG_DEBUG("loading %s:%d",nvml_library_entry[i].name,i); + LOG_DEBUG("loading %s:%d", nvml_library_entry[i].name, i); nvml_library_entry[i].fn_ptr = real_dlsym(table, nvml_library_entry[i].name); if (!nvml_library_entry[i].fn_ptr) { - LOG_INFO("can't find function %s in %s", nvml_library_entry[i].name, - driver_filename); + LOG_INFO("can't find function %s in %s", nvml_library_entry[i].name, driver_filename); } } LOG_INFO("loaded nvml libraries"); @@ -308,14 +306,12 @@ void nvml_preInit() { load_nvml_libraries(); for (int i = 0; i < CUDA_DEVICE_MAX_COUNT; i++) { cuda_to_nvml_map_array[i] = i; - } + } } -void nvml_postInit() { - init_device_info(); -} +void nvml_postInit() { init_device_info(); } -nvmlReturn_t _nvmlDeviceGetMemoryInfo(nvmlDevice_t device,void* memory,int version) { +nvmlReturn_t _nvmlDeviceGetMemoryInfo(nvmlDevice_t device, void* memory, int version) { LOG_DEBUG("into nvmlDeviceGetMemoryInfo"); if (memory == NULL) { return NVML_SUCCESS; @@ -324,12 +320,16 @@ nvmlReturn_t _nvmlDeviceGetMemoryInfo(nvmlDevice_t device,void* memory,int versi switch (version) { case 1: - CHECK_NVML_API(NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetMemoryInfo, device, memory)); - LOG_DEBUG("origin_free=%lld total=%lld\n", ((nvmlMemory_t*)memory)->free, ((nvmlMemory_t*)memory)->total); + CHECK_NVML_API( + NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMemoryInfo, device, memory)); + LOG_DEBUG("origin_free=%lld total=%lld\n", ((nvmlMemory_t*)memory)->free, + ((nvmlMemory_t*)memory)->total); break; case 2: - CHECK_NVML_API(NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetMemoryInfo_v2, device, (nvmlMemory_v2_t *)memory)); - LOG_DEBUG("origin_free=%lld total=%lld\n", ((nvmlMemory_v2_t*)memory)->free, ((nvmlMemory_v2_t*)memory)->total); + CHECK_NVML_API(NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMemoryInfo_v2, + device, (nvmlMemory_v2_t*)memory)); + LOG_DEBUG("origin_free=%lld total=%lld\n", ((nvmlMemory_v2_t*)memory)->free, + ((nvmlMemory_v2_t*)memory)->total); break; default: return NVML_ERROR_INVALID_ARGUMENT; @@ -345,138 +345,138 @@ nvmlReturn_t _nvmlDeviceGetMemoryInfo(nvmlDevice_t device,void* memory,int versi LOG_DEBUG("usage=%ld limit=%ld monitor=%ld", usage, limit, monitor); if (limit == 0) { switch (version) { - case 1: - ((nvmlMemory_t*)memory)->used = usage; - return NVML_SUCCESS; - case 2: - ((nvmlMemory_v2_t *)memory)->used = usage; - return NVML_SUCCESS; + case 1: + ((nvmlMemory_t*)memory)->used = usage; + return NVML_SUCCESS; + case 2: + ((nvmlMemory_v2_t*)memory)->used = usage; + return NVML_SUCCESS; } } else { switch (version) { - case 1: - ((nvmlMemory_t*)memory)->free = (limit-usage); - ((nvmlMemory_t*)memory)->total = limit; - ((nvmlMemory_t*)memory)->used = usage; - return NVML_SUCCESS; - case 2: - ((nvmlMemory_v2_t *)memory)->free = (limit-usage); - ((nvmlMemory_v2_t *)memory)->total = limit; - ((nvmlMemory_v2_t *)memory)->used = usage; - return NVML_SUCCESS; - } + case 1: + ((nvmlMemory_t*)memory)->free = (limit - usage); + ((nvmlMemory_t*)memory)->total = limit; + ((nvmlMemory_t*)memory)->used = usage; + return NVML_SUCCESS; + case 2: + ((nvmlMemory_v2_t*)memory)->free = (limit - usage); + ((nvmlMemory_v2_t*)memory)->total = limit; + ((nvmlMemory_v2_t*)memory)->used = usage; + return NVML_SUCCESS; + } } return NVML_SUCCESS; } nvmlReturn_t nvmlDeviceGetMemoryInfo(nvmlDevice_t device, nvmlMemory_t* memory) { - return _nvmlDeviceGetMemoryInfo(device,memory,1); + return _nvmlDeviceGetMemoryInfo(device, memory, 1); } nvmlReturn_t nvmlDeviceGetMemoryInfo_v2(nvmlDevice_t device, nvmlMemory_v2_t* memory) { - return _nvmlDeviceGetMemoryInfo(device,memory,2); + return _nvmlDeviceGetMemoryInfo(device, memory, 2); } - -nvmlReturn_t nvmlDeviceGetNvLinkRemotePciInfo_v2 ( nvmlDevice_t device, unsigned int link, nvmlPciInfo_t* pci ) { - nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetNvLinkRemotePciInfo_v2,device,link,pci); +nvmlReturn_t nvmlDeviceGetNvLinkRemotePciInfo_v2(nvmlDevice_t device, unsigned int link, + nvmlPciInfo_t* pci) { + nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkRemotePciInfo_v2, + device, link, pci); return res; } -nvmlReturn_t nvmlDeviceGetNvLinkRemotePciInfo ( nvmlDevice_t device, unsigned int link, nvmlPciInfo_t* pci ) { - nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetNvLinkRemotePciInfo,device,link,pci); +nvmlReturn_t nvmlDeviceGetNvLinkRemotePciInfo(nvmlDevice_t device, unsigned int link, + nvmlPciInfo_t* pci) { + nvmlReturn_t res = + NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkRemotePciInfo, device, link, pci); return res; } -nvmlReturn_t nvmlDeviceGetHandleByIndex ( unsigned int index, nvmlDevice_t* device ){ +nvmlReturn_t nvmlDeviceGetHandleByIndex(unsigned int index, nvmlDevice_t* device) { nvmlReturn_t res; - LOG_DEBUG("nvmlDeviceGetHandleByIndex index=%u",index); - res = NVML_OVERRIDE_CALL_NO_LOG(nvml_library_entry,nvmlDeviceGetHandleByIndex,index,device); + LOG_DEBUG("nvmlDeviceGetHandleByIndex index=%u", index); + res = NVML_OVERRIDE_CALL_NO_LOG(nvml_library_entry, nvmlDeviceGetHandleByIndex, index, device); return res; } -nvmlReturn_t nvmlDeviceGetHandleByIndex_v2 ( unsigned int index, nvmlDevice_t* device ){ +nvmlReturn_t nvmlDeviceGetHandleByIndex_v2(unsigned int index, nvmlDevice_t* device) { nvmlReturn_t res; - LOG_DEBUG("nvmlDeviceGetHandleByIndex_v2 index=%u",index); - res = NVML_OVERRIDE_CALL_NO_LOG(nvml_library_entry,nvmlDeviceGetHandleByIndex_v2,index,device); + LOG_DEBUG("nvmlDeviceGetHandleByIndex_v2 index=%u", index); + res = + NVML_OVERRIDE_CALL_NO_LOG(nvml_library_entry, nvmlDeviceGetHandleByIndex_v2, index, device); return res; } -nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2 ( const char* pciBusId, nvmlDevice_t* device ) { - LOG_INFO("NVML DeviceGetHandleByPciBusID_v2 %s",pciBusId); - return NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetHandleByPciBusId_v2,pciBusId,device); +nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2(const char* pciBusId, nvmlDevice_t* device) { + LOG_INFO("NVML DeviceGetHandleByPciBusID_v2 %s", pciBusId); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetHandleByPciBusId_v2, pciBusId, + device); } - -nvmlReturn_t nvmlDeviceGetHandleByPciBusId(const char *pciBusId, - nvmlDevice_t *device) { - LOG_DEBUG("NVML DeviceGetHandleByPciBusId %s",pciBusId); - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetHandleByPciBusId, - pciBusId, device); +nvmlReturn_t nvmlDeviceGetHandleByPciBusId(const char* pciBusId, nvmlDevice_t* device) { + LOG_DEBUG("NVML DeviceGetHandleByPciBusId %s", pciBusId); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetHandleByPciBusId, pciBusId, device); } -nvmlReturn_t nvmlDeviceGetHandleBySerial ( const char* serial, nvmlDevice_t* device ) { - LOG_INFO("NVML DeviceGetHandleBySerial Not supported %s",serial); - return NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetHandleBySerial,serial,device); +nvmlReturn_t nvmlDeviceGetHandleBySerial(const char* serial, nvmlDevice_t* device) { + LOG_INFO("NVML DeviceGetHandleBySerial Not supported %s", serial); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetHandleBySerial, serial, device); } -nvmlReturn_t nvmlDeviceGetHandleByUUID ( const char* uuid, nvmlDevice_t* device ) { - nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetHandleByUUID,uuid,device); +nvmlReturn_t nvmlDeviceGetHandleByUUID(const char* uuid, nvmlDevice_t* device) { + nvmlReturn_t res = + NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetHandleByUUID, uuid, device); return res; } -nvmlReturn_t nvmlDeviceGetCount ( unsigned int* deviceCount ) { - return NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetCount_v2,deviceCount); +nvmlReturn_t nvmlDeviceGetCount(unsigned int* deviceCount) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCount_v2, deviceCount); } -nvmlReturn_t nvmlDeviceGetCount_v2 ( unsigned int* deviceCount ) { - return NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetCount_v2,deviceCount); +nvmlReturn_t nvmlDeviceGetCount_v2(unsigned int* deviceCount) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCount_v2, deviceCount); } -nvmlReturn_t nvmlInitWithFlags( unsigned int flags ) { +nvmlReturn_t nvmlInitWithFlags(unsigned int flags) { LOG_DEBUG("nvmlInitWithFlags") - pthread_once(&init_virtual_map_pre_flag, (void(*) (void))nvml_preInit); - nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlInitWithFlags,flags); - pthread_once(&init_virtual_map_post_flag,(void (*)(void))nvml_postInit); + pthread_once(&init_virtual_map_pre_flag, (void (*)(void))nvml_preInit); + nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlInitWithFlags, flags); + pthread_once(&init_virtual_map_post_flag, (void (*)(void))nvml_postInit); return res; } nvmlReturn_t nvmlInit(void) { LOG_DEBUG("nvmlInit") - pthread_once(&init_virtual_map_pre_flag,(void (*)(void))nvml_preInit); + pthread_once(&init_virtual_map_pre_flag, (void (*)(void))nvml_preInit); nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlInit_v2); - pthread_once(&init_virtual_map_post_flag,(void (*)(void))nvml_postInit); + pthread_once(&init_virtual_map_post_flag, (void (*)(void))nvml_postInit); return res; } nvmlReturn_t nvmlInit_v2(void) { LOG_DEBUG("nvmlInit_v2"); - pthread_once(&init_virtual_map_pre_flag,(void (*)(void))nvml_preInit); + pthread_once(&init_virtual_map_pre_flag, (void (*)(void))nvml_preInit); nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlInit_v2); - pthread_once(&init_virtual_map_post_flag,(void (*)(void))nvml_postInit); + pthread_once(&init_virtual_map_post_flag, (void (*)(void))nvml_postInit); return res; } -nvmlReturn_t nvmlDeviceGetPciInfo_v3(nvmlDevice_t device, nvmlPciInfo_t *pci) { - nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPciInfo_v3, device, - pci); - return res; +nvmlReturn_t nvmlDeviceGetPciInfo_v3(nvmlDevice_t device, nvmlPciInfo_t* pci) { + nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPciInfo_v3, device, pci); + return res; } -nvmlReturn_t nvmlDeviceGetPciInfo_v2(nvmlDevice_t device, nvmlPciInfo_t *pci) { - nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPciInfo_v2, device, - pci); - return res; +nvmlReturn_t nvmlDeviceGetPciInfo_v2(nvmlDevice_t device, nvmlPciInfo_t* pci) { + nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPciInfo_v2, device, pci); + return res; } -nvmlReturn_t nvmlDeviceGetPciInfo(nvmlDevice_t device, nvmlPciInfo_t *pci) { - nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPciInfo, device, pci); - return res; +nvmlReturn_t nvmlDeviceGetPciInfo(nvmlDevice_t device, nvmlPciInfo_t* pci) { + nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPciInfo, device, pci); + return res; } -nvmlReturn_t nvmlDeviceGetUUID(nvmlDevice_t device, char *uuid, - unsigned int length) { - nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetUUID, device, uuid, - length); +nvmlReturn_t nvmlDeviceGetUUID(nvmlDevice_t device, char* uuid, unsigned int length) { + nvmlReturn_t res = + NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetUUID, device, uuid, length); return res; } diff --git a/src/nvml/nvml_entry.c b/src/nvml/nvml_entry.c index 2019869e..e5a5ca62 100644 --- a/src/nvml/nvml_entry.c +++ b/src/nvml/nvml_entry.c @@ -1,21 +1,19 @@ #include -#include "include/nvml_prefix.h" + #include "include/libnvml_hook.h" +#include "include/nvml_prefix.h" #include "include/utils.h" extern entry_t cuda_library_entry[]; extern entry_t nvml_library_entry[]; -//extern resource_data_t g_vcuda_config; +// extern resource_data_t g_vcuda_config; -nvmlReturn_t nvmlShutdown(void) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlShutdown); -} +nvmlReturn_t nvmlShutdown(void) { return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlShutdown); } const char *nvmlErrorString(nvmlReturn_t result) { - const char *(*_entry)(nvmlReturn_t) = - NVML_FIND_ENTRY(nvml_library_entry, nvmlErrorString); + const char *(*_entry)(nvmlReturn_t) = NVML_FIND_ENTRY(nvml_library_entry, nvmlErrorString); - return _entry(result); + return _entry(result); } /* @@ -31,12 +29,10 @@ nvmlReturn_t nvmlDeviceGetHandleByIndex(unsigned int index, } */ -nvmlReturn_t nvmlDeviceGetComputeRunningProcesses(nvmlDevice_t device, - unsigned int *infoCount, +nvmlReturn_t nvmlDeviceGetComputeRunningProcesses(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_t *infos) { - return NVML_OVERRIDE_CALL_NO_LOG(nvml_library_entry, - nvmlDeviceGetComputeRunningProcesses, device, - infoCount, infos); + return NVML_OVERRIDE_CALL_NO_LOG(nvml_library_entry, nvmlDeviceGetComputeRunningProcesses, + device, infoCount, infos); } /* nvmlReturn_t nvmlDeviceGetPciInfo_v3(nvmlDevice_t device, nvmlPciInfo_t *pci) { @@ -53,12 +49,12 @@ nvmlReturn_t nvmlDeviceGetPciInfo(nvmlDevice_t device, nvmlPciInfo_t *pci) { return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPciInfo, device, pci); }*/ -nvmlReturn_t nvmlDeviceGetProcessUtilization( - nvmlDevice_t device, nvmlProcessUtilizationSample_t *utilization, - unsigned int *processSamplesCount, unsigned long long lastSeenTimeStamp) { - return NVML_OVERRIDE_CALL_NO_LOG(nvml_library_entry, nvmlDeviceGetProcessUtilization, - device, utilization, processSamplesCount, - lastSeenTimeStamp); +nvmlReturn_t nvmlDeviceGetProcessUtilization(nvmlDevice_t device, + nvmlProcessUtilizationSample_t *utilization, + unsigned int *processSamplesCount, + unsigned long long lastSeenTimeStamp) { + return NVML_OVERRIDE_CALL_NO_LOG(nvml_library_entry, nvmlDeviceGetProcessUtilization, device, + utilization, processSamplesCount, lastSeenTimeStamp); } /* nvmlReturn_t nvmlDeviceGetCount_v2(unsigned int *deviceCount) { @@ -71,301 +67,245 @@ nvmlReturn_t nvmlDeviceGetCount(unsigned int *deviceCount) { }*/ nvmlReturn_t nvmlDeviceClearAccountingPids(nvmlDevice_t device) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceClearAccountingPids, - device); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceClearAccountingPids, device); } nvmlReturn_t nvmlDeviceClearCpuAffinity(nvmlDevice_t device) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceClearCpuAffinity, - device); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceClearCpuAffinity, device); } -nvmlReturn_t nvmlDeviceClearEccErrorCounts(nvmlDevice_t device, - nvmlEccCounterType_t counterType) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceClearEccErrorCounts, - device, counterType); +nvmlReturn_t nvmlDeviceClearEccErrorCounts(nvmlDevice_t device, nvmlEccCounterType_t counterType) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceClearEccErrorCounts, device, + counterType); } nvmlReturn_t nvmlDeviceDiscoverGpus(nvmlPciInfo_t *pciInfo) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceDiscoverGpus, pciInfo); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceDiscoverGpus, pciInfo); } -nvmlReturn_t -nvmlDeviceFreezeNvLinkUtilizationCounter(nvmlDevice_t device, unsigned int link, - unsigned int counter, - nvmlEnableState_t freeze) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceFreezeNvLinkUtilizationCounter, device, link, - counter, freeze); +nvmlReturn_t nvmlDeviceFreezeNvLinkUtilizationCounter(nvmlDevice_t device, unsigned int link, + unsigned int counter, + nvmlEnableState_t freeze) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceFreezeNvLinkUtilizationCounter, device, + link, counter, freeze); } -nvmlReturn_t nvmlDeviceGetAccountingBufferSize(nvmlDevice_t device, - unsigned int *bufferSize) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAccountingBufferSize, - device, bufferSize); +nvmlReturn_t nvmlDeviceGetAccountingBufferSize(nvmlDevice_t device, unsigned int *bufferSize) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAccountingBufferSize, device, + bufferSize); } -nvmlReturn_t nvmlDeviceGetAccountingMode(nvmlDevice_t device, - nvmlEnableState_t *mode) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAccountingMode, - device, mode); +nvmlReturn_t nvmlDeviceGetAccountingMode(nvmlDevice_t device, nvmlEnableState_t *mode) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAccountingMode, device, mode); } -nvmlReturn_t nvmlDeviceGetAccountingPids(nvmlDevice_t device, - unsigned int *count, +nvmlReturn_t nvmlDeviceGetAccountingPids(nvmlDevice_t device, unsigned int *count, unsigned int *pids) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAccountingPids, - device, count, pids); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAccountingPids, device, count, pids); } nvmlReturn_t nvmlDeviceGetAccountingStats(nvmlDevice_t device, unsigned int pid, nvmlAccountingStats_t *stats) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAccountingStats, - device, pid, stats); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAccountingStats, device, pid, stats); } -nvmlReturn_t nvmlDeviceGetActiveVgpus(nvmlDevice_t device, - unsigned int *vgpuCount, +nvmlReturn_t nvmlDeviceGetActiveVgpus(nvmlDevice_t device, unsigned int *vgpuCount, nvmlVgpuInstance_t *vgpuInstances) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetActiveVgpus, device, - vgpuCount, vgpuInstances); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetActiveVgpus, device, vgpuCount, + vgpuInstances); } -nvmlReturn_t nvmlDeviceGetAPIRestriction(nvmlDevice_t device, - nvmlRestrictedAPI_t apiType, +nvmlReturn_t nvmlDeviceGetAPIRestriction(nvmlDevice_t device, nvmlRestrictedAPI_t apiType, nvmlEnableState_t *isRestricted) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAPIRestriction, - device, apiType, isRestricted); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAPIRestriction, device, apiType, + isRestricted); } -nvmlReturn_t nvmlDeviceGetApplicationsClock(nvmlDevice_t device, - nvmlClockType_t clockType, +nvmlReturn_t nvmlDeviceGetApplicationsClock(nvmlDevice_t device, nvmlClockType_t clockType, unsigned int *clockMHz) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetApplicationsClock, - device, clockType, clockMHz); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetApplicationsClock, device, clockType, + clockMHz); } -nvmlReturn_t -nvmlDeviceGetAutoBoostedClocksEnabled(nvmlDevice_t device, - nvmlEnableState_t *isEnabled, - nvmlEnableState_t *defaultIsEnabled) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetAutoBoostedClocksEnabled, device, - isEnabled, defaultIsEnabled); +nvmlReturn_t nvmlDeviceGetAutoBoostedClocksEnabled(nvmlDevice_t device, + nvmlEnableState_t *isEnabled, + nvmlEnableState_t *defaultIsEnabled) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAutoBoostedClocksEnabled, device, + isEnabled, defaultIsEnabled); } -nvmlReturn_t nvmlDeviceGetBAR1MemoryInfo(nvmlDevice_t device, - nvmlBAR1Memory_t *bar1Memory) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBAR1MemoryInfo, - device, bar1Memory); +nvmlReturn_t nvmlDeviceGetBAR1MemoryInfo(nvmlDevice_t device, nvmlBAR1Memory_t *bar1Memory) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBAR1MemoryInfo, device, bar1Memory); } nvmlReturn_t nvmlDeviceGetBoardId(nvmlDevice_t device, unsigned int *boardId) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBoardId, device, - boardId); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBoardId, device, boardId); } nvmlReturn_t nvmlDeviceGetBoardPartNumber(nvmlDevice_t device, char *partNumber, unsigned int length) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBoardPartNumber, - device, partNumber, length); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBoardPartNumber, device, partNumber, + length); } nvmlReturn_t nvmlDeviceGetBrand(nvmlDevice_t device, nvmlBrandType_t *type) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBrand, device, type); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBrand, device, type); } -nvmlReturn_t -nvmlDeviceGetBridgeChipInfo(nvmlDevice_t device, - nvmlBridgeChipHierarchy_t *bridgeHierarchy) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBridgeChipInfo, - device, bridgeHierarchy); +nvmlReturn_t nvmlDeviceGetBridgeChipInfo(nvmlDevice_t device, + nvmlBridgeChipHierarchy_t *bridgeHierarchy) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBridgeChipInfo, device, + bridgeHierarchy); } nvmlReturn_t nvmlDeviceGetClock(nvmlDevice_t device, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetClock, device, - clockType, clockId, clockMHz); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetClock, device, clockType, clockId, + clockMHz); } nvmlReturn_t nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetClockInfo, device, - type, clock); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetClockInfo, device, type, clock); } -nvmlReturn_t nvmlDeviceGetComputeMode(nvmlDevice_t device, - nvmlComputeMode_t *mode) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetComputeMode, device, - mode); +nvmlReturn_t nvmlDeviceGetComputeMode(nvmlDevice_t device, nvmlComputeMode_t *mode) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetComputeMode, device, mode); } -nvmlReturn_t nvmlDeviceGetCpuAffinity(nvmlDevice_t device, - unsigned int cpuSetSize, +nvmlReturn_t nvmlDeviceGetCpuAffinity(nvmlDevice_t device, unsigned int cpuSetSize, unsigned long *cpuSet) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCpuAffinity, device, - cpuSetSize, cpuSet); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCpuAffinity, device, cpuSetSize, + cpuSet); } -nvmlReturn_t nvmlDeviceGetCreatableVgpus(nvmlDevice_t device, - unsigned int *vgpuCount, +nvmlReturn_t nvmlDeviceGetCreatableVgpus(nvmlDevice_t device, unsigned int *vgpuCount, nvmlVgpuTypeId_t *vgpuTypeIds) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCreatableVgpus, - device, vgpuCount, vgpuTypeIds); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCreatableVgpus, device, vgpuCount, + vgpuTypeIds); } -nvmlReturn_t nvmlDeviceGetCudaComputeCapability(nvmlDevice_t device, int *major, - int *minor) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCudaComputeCapability, - device, major, minor); +nvmlReturn_t nvmlDeviceGetCudaComputeCapability(nvmlDevice_t device, int *major, int *minor) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCudaComputeCapability, device, major, + minor); } -nvmlReturn_t nvmlDeviceGetCurrentClocksThrottleReasons( - nvmlDevice_t device, unsigned long long *clocksThrottleReasons) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetCurrentClocksThrottleReasons, device, - clocksThrottleReasons); +nvmlReturn_t nvmlDeviceGetCurrentClocksThrottleReasons(nvmlDevice_t device, + unsigned long long *clocksThrottleReasons) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCurrentClocksThrottleReasons, device, + clocksThrottleReasons); } -nvmlReturn_t nvmlDeviceGetCurrPcieLinkGeneration(nvmlDevice_t device, - unsigned int *currLinkGen) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetCurrPcieLinkGeneration, device, - currLinkGen); +nvmlReturn_t nvmlDeviceGetCurrPcieLinkGeneration(nvmlDevice_t device, unsigned int *currLinkGen) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCurrPcieLinkGeneration, device, + currLinkGen); } -nvmlReturn_t nvmlDeviceGetCurrPcieLinkWidth(nvmlDevice_t device, - unsigned int *currLinkWidth) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCurrPcieLinkWidth, - device, currLinkWidth); +nvmlReturn_t nvmlDeviceGetCurrPcieLinkWidth(nvmlDevice_t device, unsigned int *currLinkWidth) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCurrPcieLinkWidth, device, + currLinkWidth); } -nvmlReturn_t nvmlDeviceGetDecoderUtilization(nvmlDevice_t device, - unsigned int *utilization, +nvmlReturn_t nvmlDeviceGetDecoderUtilization(nvmlDevice_t device, unsigned int *utilization, unsigned int *samplingPeriodUs) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDecoderUtilization, - device, utilization, samplingPeriodUs); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDecoderUtilization, device, + utilization, samplingPeriodUs); } -nvmlReturn_t nvmlDeviceGetDefaultApplicationsClock(nvmlDevice_t device, - nvmlClockType_t clockType, +nvmlReturn_t nvmlDeviceGetDefaultApplicationsClock(nvmlDevice_t device, nvmlClockType_t clockType, unsigned int *clockMHz) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetDefaultApplicationsClock, device, - clockType, clockMHz); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDefaultApplicationsClock, device, + clockType, clockMHz); } -nvmlReturn_t nvmlDeviceGetDetailedEccErrors(nvmlDevice_t device, - nvmlMemoryErrorType_t errorType, +nvmlReturn_t nvmlDeviceGetDetailedEccErrors(nvmlDevice_t device, nvmlMemoryErrorType_t errorType, nvmlEccCounterType_t counterType, nvmlEccErrorCounts_t *eccCounts) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDetailedEccErrors, - device, errorType, counterType, eccCounts); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDetailedEccErrors, device, errorType, + counterType, eccCounts); } -nvmlReturn_t nvmlDeviceGetDisplayActive(nvmlDevice_t device, - nvmlEnableState_t *isActive) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDisplayActive, device, - isActive); +nvmlReturn_t nvmlDeviceGetDisplayActive(nvmlDevice_t device, nvmlEnableState_t *isActive) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDisplayActive, device, isActive); } -nvmlReturn_t nvmlDeviceGetDisplayMode(nvmlDevice_t device, - nvmlEnableState_t *display) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDisplayMode, device, - display); +nvmlReturn_t nvmlDeviceGetDisplayMode(nvmlDevice_t device, nvmlEnableState_t *display) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDisplayMode, device, display); } -nvmlReturn_t nvmlDeviceGetDriverModel(nvmlDevice_t device, - nvmlDriverModel_t *current, +nvmlReturn_t nvmlDeviceGetDriverModel(nvmlDevice_t device, nvmlDriverModel_t *current, nvmlDriverModel_t *pending) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDriverModel, device, - current, pending); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDriverModel, device, current, + pending); } -nvmlReturn_t nvmlDeviceGetEccMode(nvmlDevice_t device, - nvmlEnableState_t *current, +nvmlReturn_t nvmlDeviceGetEccMode(nvmlDevice_t device, nvmlEnableState_t *current, nvmlEnableState_t *pending) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEccMode, device, - current, pending); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEccMode, device, current, pending); } nvmlReturn_t nvmlDeviceSetEccMode(nvmlDevice_t device, nvmlEnableState_t ecc) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetEccMode, device, ecc); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetEccMode, device, ecc); } -nvmlReturn_t nvmlDeviceGetEncoderCapacity(nvmlDevice_t device, - nvmlEncoderType_t encoderQueryType, +nvmlReturn_t nvmlDeviceGetEncoderCapacity(nvmlDevice_t device, nvmlEncoderType_t encoderQueryType, unsigned int *encoderCapacity) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEncoderCapacity, - device, encoderQueryType, encoderCapacity); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEncoderCapacity, device, + encoderQueryType, encoderCapacity); } -nvmlReturn_t -nvmlDeviceGetEncoderSessions(nvmlDevice_t device, unsigned int *sessionCount, - nvmlEncoderSessionInfo_t *sessionInfos) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEncoderSessions, - device, sessionCount, sessionInfos); +nvmlReturn_t nvmlDeviceGetEncoderSessions(nvmlDevice_t device, unsigned int *sessionCount, + nvmlEncoderSessionInfo_t *sessionInfos) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEncoderSessions, device, + sessionCount, sessionInfos); } -nvmlReturn_t nvmlDeviceGetEncoderStats(nvmlDevice_t device, - unsigned int *sessionCount, - unsigned int *averageFps, - unsigned int *averageLatency) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEncoderStats, device, - sessionCount, averageFps, averageLatency); +nvmlReturn_t nvmlDeviceGetEncoderStats(nvmlDevice_t device, unsigned int *sessionCount, + unsigned int *averageFps, unsigned int *averageLatency) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEncoderStats, device, sessionCount, + averageFps, averageLatency); } -nvmlReturn_t nvmlDeviceGetEncoderUtilization(nvmlDevice_t device, - unsigned int *utilization, +nvmlReturn_t nvmlDeviceGetEncoderUtilization(nvmlDevice_t device, unsigned int *utilization, unsigned int *samplingPeriodUs) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEncoderUtilization, - device, utilization, samplingPeriodUs); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEncoderUtilization, device, + utilization, samplingPeriodUs); } -nvmlReturn_t nvmlDeviceGetEnforcedPowerLimit(nvmlDevice_t device, - unsigned int *limit) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEnforcedPowerLimit, - device, limit); +nvmlReturn_t nvmlDeviceGetEnforcedPowerLimit(nvmlDevice_t device, unsigned int *limit) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEnforcedPowerLimit, device, limit); } nvmlReturn_t nvmlDeviceGetFanSpeed(nvmlDevice_t device, unsigned int *speed) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFanSpeed, device, - speed); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFanSpeed, device, speed); } -nvmlReturn_t nvmlDeviceGetFanSpeed_v2(nvmlDevice_t device, - unsigned int *speed) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFanSpeed_v2, device, - speed); +nvmlReturn_t nvmlDeviceGetFanSpeed_v2(nvmlDevice_t device, unsigned int *speed) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFanSpeed_v2, device, speed); } nvmlReturn_t nvmlDeviceGetFieldValues(nvmlDevice_t device, int valuesCount, nvmlFieldValue_t *values) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFieldValues, device, - valuesCount, values); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFieldValues, device, valuesCount, + values); } -nvmlReturn_t nvmlDeviceGetGpuOperationMode(nvmlDevice_t device, - nvmlGpuOperationMode_t *current, +nvmlReturn_t nvmlDeviceGetGpuOperationMode(nvmlDevice_t device, nvmlGpuOperationMode_t *current, nvmlGpuOperationMode_t *pending) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuOperationMode, - device, current, pending); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuOperationMode, device, current, + pending); } -nvmlReturn_t nvmlDeviceGetGraphicsRunningProcesses(nvmlDevice_t device, - unsigned int *infoCount, +nvmlReturn_t nvmlDeviceGetGraphicsRunningProcesses(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_t *infos) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetGraphicsRunningProcesses, device, - infoCount, infos); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGraphicsRunningProcesses, device, + infoCount, infos); } nvmlReturn_t nvmlDeviceGetGridLicensableFeatures( - nvmlDevice_t device, - nvmlGridLicensableFeatures_t *pGridLicensableFeatures) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetGridLicensableFeatures, device, - pGridLicensableFeatures); + nvmlDevice_t device, nvmlGridLicensableFeatures_t *pGridLicensableFeatures) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGridLicensableFeatures, device, + pGridLicensableFeatures); } /* nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2(const char *pciBusId, @@ -391,61 +331,51 @@ nvmlReturn_t nvmlDeviceGetHandleByUUID(const char *uuid, nvmlDevice_t *device) { device); }*/ - nvmlReturn_t nvmlDeviceGetInforomConfigurationChecksum(nvmlDevice_t device, unsigned int *checksum) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetInforomConfigurationChecksum, device, - checksum); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetInforomConfigurationChecksum, device, + checksum); } -nvmlReturn_t nvmlDeviceGetInforomImageVersion(nvmlDevice_t device, - char *version, +nvmlReturn_t nvmlDeviceGetInforomImageVersion(nvmlDevice_t device, char *version, unsigned int length) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetInforomImageVersion, - device, version, length); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetInforomImageVersion, device, version, + length); } -nvmlReturn_t nvmlDeviceGetInforomVersion(nvmlDevice_t device, - nvmlInforomObject_t object, +nvmlReturn_t nvmlDeviceGetInforomVersion(nvmlDevice_t device, nvmlInforomObject_t object, char *version, unsigned int length) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetInforomVersion, - device, object, version, length); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetInforomVersion, device, object, + version, length); } -nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device, - nvmlClockType_t type, +nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxClockInfo, device, - type, clock); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxClockInfo, device, type, clock); } -nvmlReturn_t nvmlDeviceGetMaxCustomerBoostClock(nvmlDevice_t device, - nvmlClockType_t clockType, +nvmlReturn_t nvmlDeviceGetMaxCustomerBoostClock(nvmlDevice_t device, nvmlClockType_t clockType, unsigned int *clockMHz) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxCustomerBoostClock, - device, clockType, clockMHz); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxCustomerBoostClock, device, + clockType, clockMHz); } -nvmlReturn_t nvmlDeviceGetMaxPcieLinkGeneration(nvmlDevice_t device, - unsigned int *maxLinkGen) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxPcieLinkGeneration, - device, maxLinkGen); +nvmlReturn_t nvmlDeviceGetMaxPcieLinkGeneration(nvmlDevice_t device, unsigned int *maxLinkGen) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxPcieLinkGeneration, device, + maxLinkGen); } -nvmlReturn_t nvmlDeviceGetMaxPcieLinkWidth(nvmlDevice_t device, - unsigned int *maxLinkWidth) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxPcieLinkWidth, - device, maxLinkWidth); +nvmlReturn_t nvmlDeviceGetMaxPcieLinkWidth(nvmlDevice_t device, unsigned int *maxLinkWidth) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxPcieLinkWidth, device, + maxLinkWidth); } -nvmlReturn_t nvmlDeviceGetMemoryErrorCounter(nvmlDevice_t device, - nvmlMemoryErrorType_t errorType, +nvmlReturn_t nvmlDeviceGetMemoryErrorCounter(nvmlDevice_t device, nvmlMemoryErrorType_t errorType, nvmlEccCounterType_t counterType, nvmlMemoryLocation_t locationType, unsigned long long *count) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMemoryErrorCounter, - device, errorType, counterType, locationType, count); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMemoryErrorCounter, device, + errorType, counterType, locationType, count); } /* nvmlReturn_t nvmlDeviceGetMemoryInfo(nvmlDevice_t device, @@ -454,47 +384,37 @@ nvmlReturn_t nvmlDeviceGetMemoryInfo(nvmlDevice_t device, memory); } */ -nvmlReturn_t nvmlDeviceGetMinorNumber(nvmlDevice_t device, - unsigned int *minorNumber) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMinorNumber, device, - minorNumber); +nvmlReturn_t nvmlDeviceGetMinorNumber(nvmlDevice_t device, unsigned int *minorNumber) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMinorNumber, device, minorNumber); } // Guessed function proto type -nvmlReturn_t nvmlDeviceGetMPSComputeRunningProcesses(nvmlDevice_t device, - unsigned int *infoCount, +nvmlReturn_t nvmlDeviceGetMPSComputeRunningProcesses(nvmlDevice_t device, unsigned int *infoCount, void *infos) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetMPSComputeRunningProcesses, device, - infoCount, infos); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMPSComputeRunningProcesses, device, + infoCount, infos); } -nvmlReturn_t nvmlDeviceGetMultiGpuBoard(nvmlDevice_t device, - unsigned int *multiGpuBool) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMultiGpuBoard, device, - multiGpuBool); +nvmlReturn_t nvmlDeviceGetMultiGpuBoard(nvmlDevice_t device, unsigned int *multiGpuBool) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMultiGpuBoard, device, multiGpuBool); } -nvmlReturn_t nvmlDeviceGetName(nvmlDevice_t device, char *name, - unsigned int length) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetName, device, name, - length); +nvmlReturn_t nvmlDeviceGetName(nvmlDevice_t device, char *name, unsigned int length) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetName, device, name, length); } -nvmlReturn_t nvmlDeviceGetNvLinkCapability(nvmlDevice_t device, - unsigned int link, +nvmlReturn_t nvmlDeviceGetNvLinkCapability(nvmlDevice_t device, unsigned int link, nvmlNvLinkCapability_t capability, unsigned int *capResult) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkCapability, - device, link, capability, capResult); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkCapability, device, link, + capability, capResult); } -nvmlReturn_t nvmlDeviceGetNvLinkErrorCounter(nvmlDevice_t device, - unsigned int link, +nvmlReturn_t nvmlDeviceGetNvLinkErrorCounter(nvmlDevice_t device, unsigned int link, nvmlNvLinkErrorCounter_t counter, unsigned long long *counterValue) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkErrorCounter, - device, link, counter, counterValue); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkErrorCounter, device, link, + counter, counterValue); } /* nvmlReturn_t nvmlDeviceGetNvLinkRemotePciInfo_v2(nvmlDevice_t device, @@ -514,1039 +434,867 @@ nvmlReturn_t nvmlDeviceGetNvLinkRemotePciInfo(nvmlDevice_t device, nvmlReturn_t nvmlDeviceGetNvLinkState(nvmlDevice_t device, unsigned int link, nvmlEnableState_t *isActive) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkState, device, - link, isActive); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkState, device, link, isActive); } -nvmlReturn_t -nvmlDeviceGetNvLinkUtilizationControl(nvmlDevice_t device, unsigned int link, - unsigned int counter, - nvmlNvLinkUtilizationControl_t *control) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetNvLinkUtilizationControl, device, link, - counter, control); +nvmlReturn_t nvmlDeviceGetNvLinkUtilizationControl(nvmlDevice_t device, unsigned int link, + unsigned int counter, + nvmlNvLinkUtilizationControl_t *control) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkUtilizationControl, device, + link, counter, control); } -nvmlReturn_t nvmlDeviceGetNvLinkUtilizationCounter( - nvmlDevice_t device, unsigned int link, unsigned int counter, - unsigned long long *rxcounter, unsigned long long *txcounter) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetNvLinkUtilizationCounter, device, link, - counter, rxcounter, txcounter); +nvmlReturn_t nvmlDeviceGetNvLinkUtilizationCounter(nvmlDevice_t device, unsigned int link, + unsigned int counter, + unsigned long long *rxcounter, + unsigned long long *txcounter) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkUtilizationCounter, device, + link, counter, rxcounter, txcounter); } nvmlReturn_t nvmlDeviceGetNvLinkVersion(nvmlDevice_t device, unsigned int link, unsigned int *version) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkVersion, device, - link, version); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkVersion, device, link, + version); } nvmlReturn_t nvmlDeviceGetP2PStatus(nvmlDevice_t device1, nvmlDevice_t device2, - nvmlGpuP2PCapsIndex_t p2pIndex, - nvmlGpuP2PStatus_t *p2pStatus) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetP2PStatus, device1, - device2, p2pIndex, p2pStatus); + nvmlGpuP2PCapsIndex_t p2pIndex, nvmlGpuP2PStatus_t *p2pStatus) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetP2PStatus, device1, device2, + p2pIndex, p2pStatus); } -nvmlReturn_t nvmlDeviceGetPcieReplayCounter(nvmlDevice_t device, - unsigned int *value) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPcieReplayCounter, - device, value); +nvmlReturn_t nvmlDeviceGetPcieReplayCounter(nvmlDevice_t device, unsigned int *value) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPcieReplayCounter, device, value); } -nvmlReturn_t nvmlDeviceGetPcieThroughput(nvmlDevice_t device, - nvmlPcieUtilCounter_t counter, +nvmlReturn_t nvmlDeviceGetPcieThroughput(nvmlDevice_t device, nvmlPcieUtilCounter_t counter, unsigned int *value) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPcieThroughput, - device, counter, value); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPcieThroughput, device, counter, + value); } -nvmlReturn_t nvmlDeviceGetPerformanceState(nvmlDevice_t device, - nvmlPstates_t *pState) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPerformanceState, - device, pState); +nvmlReturn_t nvmlDeviceGetPerformanceState(nvmlDevice_t device, nvmlPstates_t *pState) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPerformanceState, device, pState); } -nvmlReturn_t nvmlDeviceGetPersistenceMode(nvmlDevice_t device, - nvmlEnableState_t *mode) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPersistenceMode, - device, mode); +nvmlReturn_t nvmlDeviceGetPersistenceMode(nvmlDevice_t device, nvmlEnableState_t *mode) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPersistenceMode, device, mode); } -nvmlReturn_t -nvmlDeviceGetPowerManagementDefaultLimit(nvmlDevice_t device, - unsigned int *defaultLimit) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetPowerManagementDefaultLimit, device, - defaultLimit); +nvmlReturn_t nvmlDeviceGetPowerManagementDefaultLimit(nvmlDevice_t device, + unsigned int *defaultLimit) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerManagementDefaultLimit, device, + defaultLimit); } -nvmlReturn_t nvmlDeviceGetPowerManagementLimit(nvmlDevice_t device, - unsigned int *limit) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerManagementLimit, - device, limit); +nvmlReturn_t nvmlDeviceGetPowerManagementLimit(nvmlDevice_t device, unsigned int *limit) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerManagementLimit, device, limit); } -nvmlReturn_t nvmlDeviceGetPowerManagementLimitConstraints( - nvmlDevice_t device, unsigned int *minLimit, unsigned int *maxLimit) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetPowerManagementLimitConstraints, device, - minLimit, maxLimit); +nvmlReturn_t nvmlDeviceGetPowerManagementLimitConstraints(nvmlDevice_t device, + unsigned int *minLimit, + unsigned int *maxLimit) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerManagementLimitConstraints, + device, minLimit, maxLimit); } -nvmlReturn_t nvmlDeviceGetPowerManagementMode(nvmlDevice_t device, - nvmlEnableState_t *mode) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerManagementMode, - device, mode); +nvmlReturn_t nvmlDeviceGetPowerManagementMode(nvmlDevice_t device, nvmlEnableState_t *mode) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerManagementMode, device, mode); } -nvmlReturn_t nvmlDeviceGetPowerState(nvmlDevice_t device, - nvmlPstates_t *pState) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerState, device, - pState); +nvmlReturn_t nvmlDeviceGetPowerState(nvmlDevice_t device, nvmlPstates_t *pState) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerState, device, pState); } nvmlReturn_t nvmlDeviceGetPowerUsage(nvmlDevice_t device, unsigned int *power) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerUsage, device, - power); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerUsage, device, power); } -nvmlReturn_t nvmlDeviceGetRetiredPages(nvmlDevice_t device, - nvmlPageRetirementCause_t cause, - unsigned int *pageCount, - unsigned long long *addresses) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRetiredPages, device, - cause, pageCount, addresses); +nvmlReturn_t nvmlDeviceGetRetiredPages(nvmlDevice_t device, nvmlPageRetirementCause_t cause, + unsigned int *pageCount, unsigned long long *addresses) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRetiredPages, device, cause, + pageCount, addresses); } -nvmlReturn_t -nvmlDeviceGetRetiredPagesPendingStatus(nvmlDevice_t device, - nvmlEnableState_t *isPending) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetRetiredPagesPendingStatus, device, - isPending); +nvmlReturn_t nvmlDeviceGetRetiredPagesPendingStatus(nvmlDevice_t device, + nvmlEnableState_t *isPending) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRetiredPagesPendingStatus, device, + isPending); } nvmlReturn_t nvmlDeviceGetSamples(nvmlDevice_t device, nvmlSamplingType_t type, unsigned long long lastSeenTimeStamp, - nvmlValueType_t *sampleValType, - unsigned int *sampleCount, + nvmlValueType_t *sampleValType, unsigned int *sampleCount, nvmlSample_t *samples) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSamples, device, type, - lastSeenTimeStamp, sampleValType, sampleCount, - samples); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSamples, device, type, + lastSeenTimeStamp, sampleValType, sampleCount, samples); } -nvmlReturn_t nvmlDeviceGetSerial(nvmlDevice_t device, char *serial, - unsigned int length) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSerial, device, - serial, length); +nvmlReturn_t nvmlDeviceGetSerial(nvmlDevice_t device, char *serial, unsigned int length) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSerial, device, serial, length); } nvmlReturn_t nvmlDeviceGetSupportedClocksThrottleReasons( nvmlDevice_t device, unsigned long long *supportedClocksThrottleReasons) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetSupportedClocksThrottleReasons, device, - supportedClocksThrottleReasons); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedClocksThrottleReasons, + device, supportedClocksThrottleReasons); } -nvmlReturn_t nvmlDeviceGetSupportedEventTypes(nvmlDevice_t device, - unsigned long long *eventTypes) { - nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedEventTypes, - device, eventTypes); - // nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedEventTypes, - // device, eventTypes); - return res; +nvmlReturn_t nvmlDeviceGetSupportedEventTypes(nvmlDevice_t device, unsigned long long *eventTypes) { + nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedEventTypes, + device, eventTypes); + // nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedEventTypes, + // device, eventTypes); + return res; } -nvmlReturn_t nvmlDeviceGetSupportedGraphicsClocks(nvmlDevice_t device, - unsigned int memoryClockMHz, - unsigned int *count, - unsigned int *clocksMHz) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetSupportedGraphicsClocks, device, - memoryClockMHz, count, clocksMHz); +nvmlReturn_t nvmlDeviceGetSupportedGraphicsClocks(nvmlDevice_t device, unsigned int memoryClockMHz, + unsigned int *count, unsigned int *clocksMHz) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedGraphicsClocks, device, + memoryClockMHz, count, clocksMHz); } -nvmlReturn_t nvmlDeviceGetSupportedMemoryClocks(nvmlDevice_t device, - unsigned int *count, +nvmlReturn_t nvmlDeviceGetSupportedMemoryClocks(nvmlDevice_t device, unsigned int *count, unsigned int *clocksMHz) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedMemoryClocks, - device, count, clocksMHz); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedMemoryClocks, device, count, + clocksMHz); } -nvmlReturn_t nvmlDeviceGetSupportedVgpus(nvmlDevice_t device, - unsigned int *vgpuCount, +nvmlReturn_t nvmlDeviceGetSupportedVgpus(nvmlDevice_t device, unsigned int *vgpuCount, nvmlVgpuTypeId_t *vgpuTypeIds) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedVgpus, - device, vgpuCount, vgpuTypeIds); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedVgpus, device, vgpuCount, + vgpuTypeIds); } -nvmlReturn_t nvmlDeviceGetTemperature(nvmlDevice_t device, - nvmlTemperatureSensors_t sensorType, +nvmlReturn_t nvmlDeviceGetTemperature(nvmlDevice_t device, nvmlTemperatureSensors_t sensorType, unsigned int *temp) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTemperature, device, - sensorType, temp); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTemperature, device, sensorType, + temp); } -nvmlReturn_t -nvmlDeviceGetTemperatureThreshold(nvmlDevice_t device, - nvmlTemperatureThresholds_t thresholdType, - unsigned int *temp) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTemperatureThreshold, - device, thresholdType, temp); +nvmlReturn_t nvmlDeviceGetTemperatureThreshold(nvmlDevice_t device, + nvmlTemperatureThresholds_t thresholdType, + unsigned int *temp) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTemperatureThreshold, device, + thresholdType, temp); } -nvmlReturn_t -nvmlDeviceGetTopologyCommonAncestor(nvmlDevice_t device1, nvmlDevice_t device2, - nvmlGpuTopologyLevel_t *pathInfo) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetTopologyCommonAncestor, device1, device2, - pathInfo); +nvmlReturn_t nvmlDeviceGetTopologyCommonAncestor(nvmlDevice_t device1, nvmlDevice_t device2, + nvmlGpuTopologyLevel_t *pathInfo) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTopologyCommonAncestor, device1, + device2, pathInfo); } -nvmlReturn_t nvmlDeviceGetTopologyNearestGpus(nvmlDevice_t device, - nvmlGpuTopologyLevel_t level, - unsigned int *count, - nvmlDevice_t *deviceArray) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTopologyNearestGpus, - device, level, count, deviceArray); +nvmlReturn_t nvmlDeviceGetTopologyNearestGpus(nvmlDevice_t device, nvmlGpuTopologyLevel_t level, + unsigned int *count, nvmlDevice_t *deviceArray) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTopologyNearestGpus, device, level, + count, deviceArray); } -nvmlReturn_t nvmlDeviceGetTotalEccErrors(nvmlDevice_t device, - nvmlMemoryErrorType_t errorType, +nvmlReturn_t nvmlDeviceGetTotalEccErrors(nvmlDevice_t device, nvmlMemoryErrorType_t errorType, nvmlEccCounterType_t counterType, unsigned long long *eccCounts) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTotalEccErrors, - device, errorType, counterType, eccCounts); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTotalEccErrors, device, errorType, + counterType, eccCounts); } -nvmlReturn_t nvmlDeviceGetTotalEnergyConsumption(nvmlDevice_t device, - unsigned long long *energy) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetTotalEnergyConsumption, device, energy); +nvmlReturn_t nvmlDeviceGetTotalEnergyConsumption(nvmlDevice_t device, unsigned long long *energy) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTotalEnergyConsumption, device, + energy); } -nvmlReturn_t nvmlDeviceGetUtilizationRates(nvmlDevice_t device, - nvmlUtilization_t *utilization) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetUtilizationRates, - device, utilization); +nvmlReturn_t nvmlDeviceGetUtilizationRates(nvmlDevice_t device, nvmlUtilization_t *utilization) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetUtilizationRates, device, + utilization); } -nvmlReturn_t nvmlDeviceGetVbiosVersion(nvmlDevice_t device, char *version, - unsigned int length) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVbiosVersion, device, - version, length); +nvmlReturn_t nvmlDeviceGetVbiosVersion(nvmlDevice_t device, char *version, unsigned int length) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVbiosVersion, device, version, + length); } -nvmlReturn_t nvmlDeviceGetVgpuMetadata(nvmlDevice_t device, - nvmlVgpuPgpuMetadata_t *pgpuMetadata, +nvmlReturn_t nvmlDeviceGetVgpuMetadata(nvmlDevice_t device, nvmlVgpuPgpuMetadata_t *pgpuMetadata, unsigned int *bufferSize) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVgpuMetadata, device, - pgpuMetadata, bufferSize); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVgpuMetadata, device, pgpuMetadata, + bufferSize); } nvmlReturn_t nvmlDeviceGetVgpuProcessUtilization( nvmlDevice_t device, unsigned long long lastSeenTimeStamp, - unsigned int *vgpuProcessSamplesCount, - nvmlVgpuProcessUtilizationSample_t *utilizationSamples) { - return NVML_OVERRIDE_CALL( - nvml_library_entry, nvmlDeviceGetVgpuProcessUtilization, device, - lastSeenTimeStamp, vgpuProcessSamplesCount, utilizationSamples); + unsigned int *vgpuProcessSamplesCount, nvmlVgpuProcessUtilizationSample_t *utilizationSamples) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVgpuProcessUtilization, device, + lastSeenTimeStamp, vgpuProcessSamplesCount, utilizationSamples); } -nvmlReturn_t nvmlDeviceGetVgpuUtilization( - nvmlDevice_t device, unsigned long long lastSeenTimeStamp, - nvmlValueType_t *sampleValType, unsigned int *vgpuInstanceSamplesCount, - nvmlVgpuInstanceUtilizationSample_t *utilizationSamples) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVgpuUtilization, - device, lastSeenTimeStamp, sampleValType, - vgpuInstanceSamplesCount, utilizationSamples); +nvmlReturn_t nvmlDeviceGetVgpuUtilization(nvmlDevice_t device, unsigned long long lastSeenTimeStamp, + nvmlValueType_t *sampleValType, + unsigned int *vgpuInstanceSamplesCount, + nvmlVgpuInstanceUtilizationSample_t *utilizationSamples) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVgpuUtilization, device, + lastSeenTimeStamp, sampleValType, vgpuInstanceSamplesCount, + utilizationSamples); } -nvmlReturn_t nvmlDeviceGetViolationStatus(nvmlDevice_t device, - nvmlPerfPolicyType_t perfPolicyType, +nvmlReturn_t nvmlDeviceGetViolationStatus(nvmlDevice_t device, nvmlPerfPolicyType_t perfPolicyType, nvmlViolationTime_t *violTime) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetViolationStatus, - device, perfPolicyType, violTime); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetViolationStatus, device, + perfPolicyType, violTime); } -nvmlReturn_t -nvmlDeviceGetVirtualizationMode(nvmlDevice_t device, - nvmlGpuVirtualizationMode_t *pVirtualMode) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVirtualizationMode, - device, pVirtualMode); +nvmlReturn_t nvmlDeviceGetVirtualizationMode(nvmlDevice_t device, + nvmlGpuVirtualizationMode_t *pVirtualMode) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVirtualizationMode, device, + pVirtualMode); } -nvmlReturn_t nvmlDeviceModifyDrainState(nvmlPciInfo_t *pciInfo, - nvmlEnableState_t newState) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceModifyDrainState, - pciInfo, newState); +nvmlReturn_t nvmlDeviceModifyDrainState(nvmlPciInfo_t *pciInfo, nvmlEnableState_t newState) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceModifyDrainState, pciInfo, newState); } -nvmlReturn_t nvmlDeviceOnSameBoard(nvmlDevice_t device1, nvmlDevice_t device2, - int *onSameBoard) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceOnSameBoard, device1, - device2, onSameBoard); +nvmlReturn_t nvmlDeviceOnSameBoard(nvmlDevice_t device1, nvmlDevice_t device2, int *onSameBoard) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceOnSameBoard, device1, device2, + onSameBoard); } -nvmlReturn_t nvmlDeviceQueryDrainState(nvmlPciInfo_t *pciInfo, - nvmlEnableState_t *currentState) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceQueryDrainState, pciInfo, - currentState); +nvmlReturn_t nvmlDeviceQueryDrainState(nvmlPciInfo_t *pciInfo, nvmlEnableState_t *currentState) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceQueryDrainState, pciInfo, currentState); } -nvmlReturn_t nvmlDeviceRegisterEvents(nvmlDevice_t device, - unsigned long long eventTypes, +nvmlReturn_t nvmlDeviceRegisterEvents(nvmlDevice_t device, unsigned long long eventTypes, nvmlEventSet_t set) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceRegisterEvents, device, - eventTypes, set); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceRegisterEvents, device, eventTypes, + set); } -nvmlReturn_t nvmlDeviceRemoveGpu_v2(nvmlPciInfo_t *pciInfo, - nvmlDetachGpuState_t gpuState, +nvmlReturn_t nvmlDeviceRemoveGpu_v2(nvmlPciInfo_t *pciInfo, nvmlDetachGpuState_t gpuState, nvmlPcieLinkState_t linkState) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceRemoveGpu_v2, pciInfo, - gpuState, linkState); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceRemoveGpu_v2, pciInfo, gpuState, + linkState); } -nvmlReturn_t nvmlDeviceRemoveGpu(nvmlPciInfo_t *pciInfo, - nvmlDetachGpuState_t gpuState, +nvmlReturn_t nvmlDeviceRemoveGpu(nvmlPciInfo_t *pciInfo, nvmlDetachGpuState_t gpuState, nvmlPcieLinkState_t linkState) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceRemoveGpu, pciInfo, - gpuState, linkState); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceRemoveGpu, pciInfo, gpuState, + linkState); } nvmlReturn_t nvmlDeviceResetApplicationsClocks(nvmlDevice_t device) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceResetApplicationsClocks, - device); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceResetApplicationsClocks, device); } -nvmlReturn_t nvmlDeviceResetNvLinkErrorCounters(nvmlDevice_t device, - unsigned int link) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceResetNvLinkErrorCounters, - device, link); +nvmlReturn_t nvmlDeviceResetNvLinkErrorCounters(nvmlDevice_t device, unsigned int link) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceResetNvLinkErrorCounters, device, link); } -nvmlReturn_t nvmlDeviceResetNvLinkUtilizationCounter(nvmlDevice_t device, - unsigned int link, +nvmlReturn_t nvmlDeviceResetNvLinkUtilizationCounter(nvmlDevice_t device, unsigned int link, unsigned int counter) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceResetNvLinkUtilizationCounter, device, link, - counter); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceResetNvLinkUtilizationCounter, device, + link, counter); } -nvmlReturn_t nvmlDeviceSetAccountingMode(nvmlDevice_t device, - nvmlEnableState_t mode) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetAccountingMode, - device, mode); +nvmlReturn_t nvmlDeviceSetAccountingMode(nvmlDevice_t device, nvmlEnableState_t mode) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetAccountingMode, device, mode); } -nvmlReturn_t nvmlDeviceSetAPIRestriction(nvmlDevice_t device, - nvmlRestrictedAPI_t apiType, +nvmlReturn_t nvmlDeviceSetAPIRestriction(nvmlDevice_t device, nvmlRestrictedAPI_t apiType, nvmlEnableState_t isRestricted) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetAPIRestriction, - device, apiType, isRestricted); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetAPIRestriction, device, apiType, + isRestricted); } -nvmlReturn_t nvmlDeviceSetApplicationsClocks(nvmlDevice_t device, - unsigned int memClockMHz, +nvmlReturn_t nvmlDeviceSetApplicationsClocks(nvmlDevice_t device, unsigned int memClockMHz, unsigned int graphicsClockMHz) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetApplicationsClocks, - device, memClockMHz, graphicsClockMHz); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetApplicationsClocks, device, + memClockMHz, graphicsClockMHz); } -nvmlReturn_t nvmlDeviceSetAutoBoostedClocksEnabled(nvmlDevice_t device, - nvmlEnableState_t enabled) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceSetAutoBoostedClocksEnabled, device, - enabled); +nvmlReturn_t nvmlDeviceSetAutoBoostedClocksEnabled(nvmlDevice_t device, nvmlEnableState_t enabled) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetAutoBoostedClocksEnabled, device, + enabled); } -nvmlReturn_t nvmlDeviceSetComputeMode(nvmlDevice_t device, - nvmlComputeMode_t mode) { - //if (g_vcuda_config.enable) { - // return NVML_ERROR_NOT_SUPPORTED; - //} +nvmlReturn_t nvmlDeviceSetComputeMode(nvmlDevice_t device, nvmlComputeMode_t mode) { + // if (g_vcuda_config.enable) { + // return NVML_ERROR_NOT_SUPPORTED; + // } - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetComputeMode, device, - mode); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetComputeMode, device, mode); } nvmlReturn_t nvmlDeviceSetCpuAffinity(nvmlDevice_t device) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetCpuAffinity, device); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetCpuAffinity, device); } -nvmlReturn_t nvmlDeviceSetDefaultAutoBoostedClocksEnabled( - nvmlDevice_t device, nvmlEnableState_t enabled, unsigned int flags) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceSetDefaultAutoBoostedClocksEnabled, device, - enabled, flags); +nvmlReturn_t nvmlDeviceSetDefaultAutoBoostedClocksEnabled(nvmlDevice_t device, + nvmlEnableState_t enabled, + unsigned int flags) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetDefaultAutoBoostedClocksEnabled, + device, enabled, flags); } -nvmlReturn_t nvmlDeviceSetDriverModel(nvmlDevice_t device, - nvmlDriverModel_t driverModel, +nvmlReturn_t nvmlDeviceSetDriverModel(nvmlDevice_t device, nvmlDriverModel_t driverModel, unsigned int flags) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetDriverModel, device, - driverModel, flags); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetDriverModel, device, driverModel, + flags); } -nvmlReturn_t nvmlDeviceSetGpuOperationMode(nvmlDevice_t device, - nvmlGpuOperationMode_t mode) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetGpuOperationMode, - device, mode); +nvmlReturn_t nvmlDeviceSetGpuOperationMode(nvmlDevice_t device, nvmlGpuOperationMode_t mode) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetGpuOperationMode, device, mode); } -nvmlReturn_t nvmlDeviceSetNvLinkUtilizationControl( - nvmlDevice_t device, unsigned int link, unsigned int counter, - nvmlNvLinkUtilizationControl_t *control, unsigned int reset) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceSetNvLinkUtilizationControl, device, link, - counter, control, reset); +nvmlReturn_t nvmlDeviceSetNvLinkUtilizationControl(nvmlDevice_t device, unsigned int link, + unsigned int counter, + nvmlNvLinkUtilizationControl_t *control, + unsigned int reset) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetNvLinkUtilizationControl, device, + link, counter, control, reset); } -nvmlReturn_t nvmlDeviceSetPersistenceMode(nvmlDevice_t device, - nvmlEnableState_t mode) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetPersistenceMode, - device, mode); +nvmlReturn_t nvmlDeviceSetPersistenceMode(nvmlDevice_t device, nvmlEnableState_t mode) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetPersistenceMode, device, mode); } -nvmlReturn_t nvmlDeviceSetPowerManagementLimit(nvmlDevice_t device, - unsigned int limit) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetPowerManagementLimit, - device, limit); +nvmlReturn_t nvmlDeviceSetPowerManagementLimit(nvmlDevice_t device, unsigned int limit) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetPowerManagementLimit, device, limit); } -nvmlReturn_t -nvmlDeviceSetVirtualizationMode(nvmlDevice_t device, - nvmlGpuVirtualizationMode_t virtualMode) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetVirtualizationMode, - device, virtualMode); +nvmlReturn_t nvmlDeviceSetVirtualizationMode(nvmlDevice_t device, + nvmlGpuVirtualizationMode_t virtualMode) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetVirtualizationMode, device, + virtualMode); } nvmlReturn_t nvmlDeviceValidateInforom(nvmlDevice_t device) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceValidateInforom, device); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceValidateInforom, device); } nvmlReturn_t nvmlEventSetCreate(nvmlEventSet_t *set) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlEventSetCreate, set); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlEventSetCreate, set); } nvmlReturn_t nvmlEventSetFree(nvmlEventSet_t set) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlEventSetFree, set); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlEventSetFree, set); } -nvmlReturn_t nvmlEventSetWait(nvmlEventSet_t set, nvmlEventData_t *data, - unsigned int timeoutms) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlEventSetWait, set, data, - timeoutms); +nvmlReturn_t nvmlEventSetWait(nvmlEventSet_t set, nvmlEventData_t *data, unsigned int timeoutms) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlEventSetWait, set, data, timeoutms); } -nvmlReturn_t -nvmlGetVgpuCompatibility(nvmlVgpuMetadata_t *vgpuMetadata, - nvmlVgpuPgpuMetadata_t *pgpuMetadata, - nvmlVgpuPgpuCompatibility_t *compatibilityInfo) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGetVgpuCompatibility, - vgpuMetadata, pgpuMetadata, compatibilityInfo); +nvmlReturn_t nvmlGetVgpuCompatibility(nvmlVgpuMetadata_t *vgpuMetadata, + nvmlVgpuPgpuMetadata_t *pgpuMetadata, + nvmlVgpuPgpuCompatibility_t *compatibilityInfo) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGetVgpuCompatibility, vgpuMetadata, + pgpuMetadata, compatibilityInfo); } -nvmlReturn_t nvmlInternalGetExportTable(const void **ppExportTable, - void *pExportTableId) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlInternalGetExportTable, - ppExportTable, pExportTableId); +nvmlReturn_t nvmlInternalGetExportTable(const void **ppExportTable, void *pExportTableId) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlInternalGetExportTable, ppExportTable, + pExportTableId); } nvmlReturn_t nvmlSystemGetCudaDriverVersion(int *cudaDriverVersion) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetCudaDriverVersion, - cudaDriverVersion); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetCudaDriverVersion, + cudaDriverVersion); } nvmlReturn_t nvmlSystemGetCudaDriverVersion_v2(int *cudaDriverVersion) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetCudaDriverVersion_v2, - cudaDriverVersion); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetCudaDriverVersion_v2, + cudaDriverVersion); } nvmlReturn_t nvmlSystemGetDriverVersion(char *version, unsigned int length) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetDriverVersion, - version, length); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetDriverVersion, version, length); } -nvmlReturn_t nvmlSystemGetHicVersion(unsigned int *hwbcCount, - nvmlHwbcEntry_t *hwbcEntries) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetHicVersion, hwbcCount, - hwbcEntries); +nvmlReturn_t nvmlSystemGetHicVersion(unsigned int *hwbcCount, nvmlHwbcEntry_t *hwbcEntries) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetHicVersion, hwbcCount, hwbcEntries); } nvmlReturn_t nvmlSystemGetNVMLVersion(char *version, unsigned int length) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetNVMLVersion, version, - length); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetNVMLVersion, version, length); } -nvmlReturn_t nvmlSystemGetProcessName(unsigned int pid, char *name, - unsigned int length) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetProcessName, pid, - name, length); +nvmlReturn_t nvmlSystemGetProcessName(unsigned int pid, char *name, unsigned int length) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetProcessName, pid, name, length); } -nvmlReturn_t nvmlSystemGetTopologyGpuSet(unsigned int cpuNumber, - unsigned int *count, +nvmlReturn_t nvmlSystemGetTopologyGpuSet(unsigned int cpuNumber, unsigned int *count, nvmlDevice_t *deviceArray) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetTopologyGpuSet, - cpuNumber, count, deviceArray); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetTopologyGpuSet, cpuNumber, count, + deviceArray); } nvmlReturn_t nvmlUnitGetCount(unsigned int *unitCount) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetCount, unitCount); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetCount, unitCount); } -nvmlReturn_t nvmlUnitGetDevices(nvmlUnit_t unit, unsigned int *deviceCount, - nvmlDevice_t *devices) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetDevices, unit, - deviceCount, devices); +nvmlReturn_t nvmlUnitGetDevices(nvmlUnit_t unit, unsigned int *deviceCount, nvmlDevice_t *devices) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetDevices, unit, deviceCount, devices); } -nvmlReturn_t nvmlUnitGetFanSpeedInfo(nvmlUnit_t unit, - nvmlUnitFanSpeeds_t *fanSpeeds) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetFanSpeedInfo, unit, - fanSpeeds); +nvmlReturn_t nvmlUnitGetFanSpeedInfo(nvmlUnit_t unit, nvmlUnitFanSpeeds_t *fanSpeeds) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetFanSpeedInfo, unit, fanSpeeds); } nvmlReturn_t nvmlUnitGetHandleByIndex(unsigned int index, nvmlUnit_t *unit) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetHandleByIndex, index, - unit); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetHandleByIndex, index, unit); } nvmlReturn_t nvmlUnitGetLedState(nvmlUnit_t unit, nvmlLedState_t *state) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetLedState, unit, state); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetLedState, unit, state); } nvmlReturn_t nvmlUnitGetPsuInfo(nvmlUnit_t unit, nvmlPSUInfo_t *psu) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetPsuInfo, unit, psu); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetPsuInfo, unit, psu); } -nvmlReturn_t nvmlUnitGetTemperature(nvmlUnit_t unit, unsigned int type, - unsigned int *temp) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetTemperature, unit, type, - temp); +nvmlReturn_t nvmlUnitGetTemperature(nvmlUnit_t unit, unsigned int type, unsigned int *temp) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetTemperature, unit, type, temp); } nvmlReturn_t nvmlUnitGetUnitInfo(nvmlUnit_t unit, nvmlUnitInfo_t *info) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetUnitInfo, unit, info); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetUnitInfo, unit, info); } nvmlReturn_t nvmlUnitSetLedState(nvmlUnit_t unit, nvmlLedColor_t color) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitSetLedState, unit, color); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitSetLedState, unit, color); } nvmlReturn_t nvmlVgpuInstanceGetEncoderCapacity(nvmlVgpuInstance_t vgpuInstance, unsigned int *encoderCapacity) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetEncoderCapacity, - vgpuInstance, encoderCapacity); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetEncoderCapacity, vgpuInstance, + encoderCapacity); } -nvmlReturn_t -nvmlVgpuInstanceGetEncoderSessions(nvmlVgpuInstance_t vgpuInstance, - unsigned int *sessionCount, - nvmlEncoderSessionInfo_t *sessionInfo) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetEncoderSessions, - vgpuInstance, sessionCount, sessionInfo); +nvmlReturn_t nvmlVgpuInstanceGetEncoderSessions(nvmlVgpuInstance_t vgpuInstance, + unsigned int *sessionCount, + nvmlEncoderSessionInfo_t *sessionInfo) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetEncoderSessions, vgpuInstance, + sessionCount, sessionInfo); } nvmlReturn_t nvmlVgpuInstanceGetEncoderStats(nvmlVgpuInstance_t vgpuInstance, - unsigned int *sessionCount, - unsigned int *averageFps, + unsigned int *sessionCount, unsigned int *averageFps, unsigned int *averageLatency) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetEncoderStats, - vgpuInstance, sessionCount, averageFps, - averageLatency); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetEncoderStats, vgpuInstance, + sessionCount, averageFps, averageLatency); } nvmlReturn_t nvmlVgpuInstanceGetFbUsage(nvmlVgpuInstance_t vgpuInstance, unsigned long long *fbUsage) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetFbUsage, - vgpuInstance, fbUsage); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetFbUsage, vgpuInstance, + fbUsage); } nvmlReturn_t nvmlVgpuInstanceGetFrameRateLimit(nvmlVgpuInstance_t vgpuInstance, unsigned int *frameRateLimit) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetFrameRateLimit, - vgpuInstance, frameRateLimit); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetFrameRateLimit, vgpuInstance, + frameRateLimit); } nvmlReturn_t nvmlVgpuInstanceGetLicenseStatus(nvmlVgpuInstance_t vgpuInstance, unsigned int *licensed) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetLicenseStatus, - vgpuInstance, licensed); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetLicenseStatus, vgpuInstance, + licensed); } nvmlReturn_t nvmlVgpuInstanceGetMetadata(nvmlVgpuInstance_t vgpuInstance, nvmlVgpuMetadata_t *vgpuMetadata, unsigned int *bufferSize) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetMetadata, - vgpuInstance, vgpuMetadata, bufferSize); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetMetadata, vgpuInstance, + vgpuMetadata, bufferSize); } nvmlReturn_t nvmlVgpuInstanceGetType(nvmlVgpuInstance_t vgpuInstance, nvmlVgpuTypeId_t *vgpuTypeId) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetType, - vgpuInstance, vgpuTypeId); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetType, vgpuInstance, + vgpuTypeId); } -nvmlReturn_t nvmlVgpuInstanceGetUUID(nvmlVgpuInstance_t vgpuInstance, - char *uuid, unsigned int size) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetUUID, - vgpuInstance, uuid, size); +nvmlReturn_t nvmlVgpuInstanceGetUUID(nvmlVgpuInstance_t vgpuInstance, char *uuid, + unsigned int size) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetUUID, vgpuInstance, uuid, + size); } -nvmlReturn_t nvmlVgpuInstanceGetVmDriverVersion(nvmlVgpuInstance_t vgpuInstance, - char *version, +nvmlReturn_t nvmlVgpuInstanceGetVmDriverVersion(nvmlVgpuInstance_t vgpuInstance, char *version, unsigned int length) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetVmDriverVersion, - vgpuInstance, version, length); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetVmDriverVersion, vgpuInstance, + version, length); } -nvmlReturn_t nvmlVgpuInstanceGetVmID(nvmlVgpuInstance_t vgpuInstance, - char *vmId, unsigned int size, +nvmlReturn_t nvmlVgpuInstanceGetVmID(nvmlVgpuInstance_t vgpuInstance, char *vmId, unsigned int size, nvmlVgpuVmIdType_t *vmIdType) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetVmID, - vgpuInstance, vmId, size, vmIdType); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetVmID, vgpuInstance, vmId, size, + vmIdType); } nvmlReturn_t nvmlVgpuInstanceSetEncoderCapacity(nvmlVgpuInstance_t vgpuInstance, unsigned int encoderCapacity) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceSetEncoderCapacity, - vgpuInstance, encoderCapacity); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceSetEncoderCapacity, vgpuInstance, + encoderCapacity); } -nvmlReturn_t nvmlVgpuTypeGetClass(nvmlVgpuTypeId_t vgpuTypeId, - char *vgpuTypeClass, unsigned int *size) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetClass, vgpuTypeId, - vgpuTypeClass, size); +nvmlReturn_t nvmlVgpuTypeGetClass(nvmlVgpuTypeId_t vgpuTypeId, char *vgpuTypeClass, + unsigned int *size) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetClass, vgpuTypeId, vgpuTypeClass, + size); } -nvmlReturn_t nvmlVgpuTypeGetDeviceID(nvmlVgpuTypeId_t vgpuTypeId, - unsigned long long *deviceID, +nvmlReturn_t nvmlVgpuTypeGetDeviceID(nvmlVgpuTypeId_t vgpuTypeId, unsigned long long *deviceID, unsigned long long *subsystemID) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetDeviceID, - vgpuTypeId, deviceID, subsystemID); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetDeviceID, vgpuTypeId, deviceID, + subsystemID); } nvmlReturn_t nvmlVgpuTypeGetFramebufferSize(nvmlVgpuTypeId_t vgpuTypeId, unsigned long long *fbSize) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetFramebufferSize, - vgpuTypeId, fbSize); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetFramebufferSize, vgpuTypeId, + fbSize); } nvmlReturn_t nvmlVgpuTypeGetFrameRateLimit(nvmlVgpuTypeId_t vgpuTypeId, unsigned int *frameRateLimit) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetFrameRateLimit, - vgpuTypeId, frameRateLimit); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetFrameRateLimit, vgpuTypeId, + frameRateLimit); } -nvmlReturn_t nvmlVgpuTypeGetLicense(nvmlVgpuTypeId_t vgpuTypeId, - char *vgpuTypeLicenseString, +nvmlReturn_t nvmlVgpuTypeGetLicense(nvmlVgpuTypeId_t vgpuTypeId, char *vgpuTypeLicenseString, unsigned int size) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetLicense, vgpuTypeId, - vgpuTypeLicenseString, size); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetLicense, vgpuTypeId, + vgpuTypeLicenseString, size); } -nvmlReturn_t nvmlVgpuTypeGetMaxInstances(nvmlDevice_t device, - nvmlVgpuTypeId_t vgpuTypeId, +nvmlReturn_t nvmlVgpuTypeGetMaxInstances(nvmlDevice_t device, nvmlVgpuTypeId_t vgpuTypeId, unsigned int *vgpuInstanceCount) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetMaxInstances, - device, vgpuTypeId, vgpuInstanceCount); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetMaxInstances, device, vgpuTypeId, + vgpuInstanceCount); } -nvmlReturn_t nvmlVgpuTypeGetName(nvmlVgpuTypeId_t vgpuTypeId, - char *vgpuTypeName, unsigned int *size) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetName, vgpuTypeId, - vgpuTypeName, size); +nvmlReturn_t nvmlVgpuTypeGetName(nvmlVgpuTypeId_t vgpuTypeId, char *vgpuTypeName, + unsigned int *size) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetName, vgpuTypeId, vgpuTypeName, + size); } nvmlReturn_t nvmlVgpuTypeGetNumDisplayHeads(nvmlVgpuTypeId_t vgpuTypeId, unsigned int *numDisplayHeads) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetNumDisplayHeads, - vgpuTypeId, numDisplayHeads); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetNumDisplayHeads, vgpuTypeId, + numDisplayHeads); } -nvmlReturn_t nvmlVgpuTypeGetResolution(nvmlVgpuTypeId_t vgpuTypeId, - unsigned int displayIndex, +nvmlReturn_t nvmlVgpuTypeGetResolution(nvmlVgpuTypeId_t vgpuTypeId, unsigned int displayIndex, unsigned int *xdim, unsigned int *ydim) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetResolution, - vgpuTypeId, displayIndex, xdim, ydim); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetResolution, vgpuTypeId, + displayIndex, xdim, ydim); } -nvmlReturn_t nvmlDeviceGetFBCSessions(nvmlDevice_t device, - unsigned int *sessionCount, +nvmlReturn_t nvmlDeviceGetFBCSessions(nvmlDevice_t device, unsigned int *sessionCount, nvmlFBCSessionInfo_t *sessionInfo) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFBCSessions, device, - sessionCount, sessionInfo); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFBCSessions, device, sessionCount, + sessionInfo); } -nvmlReturn_t nvmlDeviceGetFBCStats(nvmlDevice_t device, - nvmlFBCStats_t *fbcStats) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFBCStats, device, - fbcStats); +nvmlReturn_t nvmlDeviceGetFBCStats(nvmlDevice_t device, nvmlFBCStats_t *fbcStats) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFBCStats, device, fbcStats); } nvmlReturn_t nvmlDeviceGetGridLicensableFeatures_v2( - nvmlDevice_t device, - nvmlGridLicensableFeatures_t *pGridLicensableFeatures) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetGridLicensableFeatures_v2, device, - pGridLicensableFeatures); + nvmlDevice_t device, nvmlGridLicensableFeatures_t *pGridLicensableFeatures) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGridLicensableFeatures_v2, device, + pGridLicensableFeatures); } -nvmlReturn_t nvmlDeviceGetRetiredPages_v2(nvmlDevice_t device, - nvmlPageRetirementCause_t cause, - unsigned int *pageCount, - unsigned long long *addresses) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRetiredPages_v2, - device, cause, pageCount, addresses); +nvmlReturn_t nvmlDeviceGetRetiredPages_v2(nvmlDevice_t device, nvmlPageRetirementCause_t cause, + unsigned int *pageCount, unsigned long long *addresses) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRetiredPages_v2, device, cause, + pageCount, addresses); } nvmlReturn_t nvmlDeviceResetGpuLockedClocks(nvmlDevice_t device) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceResetGpuLockedClocks, - device); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceResetGpuLockedClocks, device); } -nvmlReturn_t nvmlDeviceSetGpuLockedClocks(nvmlDevice_t device, - unsigned int minGpuClockMHz, +nvmlReturn_t nvmlDeviceSetGpuLockedClocks(nvmlDevice_t device, unsigned int minGpuClockMHz, unsigned int maxGpuClockMHz) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetGpuLockedClocks, - device, minGpuClockMHz, maxGpuClockMHz); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetGpuLockedClocks, device, + minGpuClockMHz, maxGpuClockMHz); } nvmlReturn_t nvmlVgpuInstanceGetAccountingMode(nvmlVgpuInstance_t vgpuInstance, nvmlEnableState_t *mode) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetAccountingMode, - vgpuInstance, mode); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetAccountingMode, vgpuInstance, + mode); } -nvmlReturn_t nvmlVgpuInstanceGetAccountingPids(nvmlVgpuInstance_t vgpuInstance, - unsigned int *count, +nvmlReturn_t nvmlVgpuInstanceGetAccountingPids(nvmlVgpuInstance_t vgpuInstance, unsigned int *count, unsigned int *pids) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetAccountingPids, - vgpuInstance, count, pids); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetAccountingPids, vgpuInstance, + count, pids); } -nvmlReturn_t nvmlVgpuInstanceGetAccountingStats(nvmlVgpuInstance_t vgpuInstance, - unsigned int pid, +nvmlReturn_t nvmlVgpuInstanceGetAccountingStats(nvmlVgpuInstance_t vgpuInstance, unsigned int pid, nvmlAccountingStats_t *stats) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetAccountingStats, - vgpuInstance, pid, stats); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetAccountingStats, vgpuInstance, + pid, stats); } nvmlReturn_t nvmlVgpuInstanceGetFBCSessions(nvmlVgpuInstance_t vgpuInstance, unsigned int *sessionCount, nvmlFBCSessionInfo_t *sessionInfo) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetFBCSessions, - vgpuInstance, sessionCount, sessionInfo); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetFBCSessions, vgpuInstance, + sessionCount, sessionInfo); } nvmlReturn_t nvmlVgpuInstanceGetFBCStats(nvmlVgpuInstance_t vgpuInstance, nvmlFBCStats_t *fbcStats) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetFBCStats, - vgpuInstance, fbcStats); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetFBCStats, vgpuInstance, + fbcStats); } -nvmlReturn_t -nvmlVgpuTypeGetMaxInstancesPerVm(nvmlVgpuTypeId_t vgpuTypeId, - unsigned int *vgpuInstanceCountPerVm) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetMaxInstancesPerVm, - vgpuTypeId, vgpuInstanceCountPerVm); +nvmlReturn_t nvmlVgpuTypeGetMaxInstancesPerVm(nvmlVgpuTypeId_t vgpuTypeId, + unsigned int *vgpuInstanceCountPerVm) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetMaxInstancesPerVm, vgpuTypeId, + vgpuInstanceCountPerVm); } -nvmlReturn_t nvmlGetVgpuVersion(nvmlVgpuVersion_t *supported, - nvmlVgpuVersion_t *current) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGetVgpuVersion, supported, - current); +nvmlReturn_t nvmlGetVgpuVersion(nvmlVgpuVersion_t *supported, nvmlVgpuVersion_t *current) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGetVgpuVersion, supported, current); } nvmlReturn_t nvmlSetVgpuVersion(nvmlVgpuVersion_t *vgpuVersion) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSetVgpuVersion, vgpuVersion); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSetVgpuVersion, vgpuVersion); } nvmlReturn_t nvmlDeviceGetGridLicensableFeatures_v3( - nvmlDevice_t device, - nvmlGridLicensableFeatures_t *pGridLicensableFeatures) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetGridLicensableFeatures_v3, device, - pGridLicensableFeatures); + nvmlDevice_t device, nvmlGridLicensableFeatures_t *pGridLicensableFeatures) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGridLicensableFeatures_v3, device, + pGridLicensableFeatures); } -nvmlReturn_t nvmlDeviceGetHostVgpuMode(nvmlDevice_t device, - nvmlHostVgpuMode_t *pHostVgpuMode) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetHostVgpuMode, device, - pHostVgpuMode); +nvmlReturn_t nvmlDeviceGetHostVgpuMode(nvmlDevice_t device, nvmlHostVgpuMode_t *pHostVgpuMode) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetHostVgpuMode, device, pHostVgpuMode); } -nvmlReturn_t nvmlDeviceGetPgpuMetadataString(nvmlDevice_t device, - char *pgpuMetadata, +nvmlReturn_t nvmlDeviceGetPgpuMetadataString(nvmlDevice_t device, char *pgpuMetadata, unsigned int *bufferSize) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPgpuMetadataString, - device, pgpuMetadata, bufferSize); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPgpuMetadataString, device, + pgpuMetadata, bufferSize); } nvmlReturn_t nvmlVgpuInstanceGetEccMode(nvmlVgpuInstance_t vgpuInstance, nvmlEnableState_t *eccMode) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetEccMode, - vgpuInstance, eccMode); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetEccMode, vgpuInstance, + eccMode); } nvmlReturn_t nvmlComputeInstanceDestroy(nvmlComputeInstance_t computeInstance) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlComputeInstanceDestroy, - computeInstance); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlComputeInstanceDestroy, computeInstance); } nvmlReturn_t nvmlComputeInstanceGetInfo(nvmlComputeInstance_t computeInstance, nvmlComputeInstanceInfo_t *info) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlComputeInstanceGetInfo, - computeInstance, info); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlComputeInstanceGetInfo, computeInstance, + info); } -nvmlReturn_t nvmlDeviceCreateGpuInstance(nvmlDevice_t device, - unsigned int profileId, +nvmlReturn_t nvmlDeviceCreateGpuInstance(nvmlDevice_t device, unsigned int profileId, nvmlGpuInstance_t *gpuInstance) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceCreateGpuInstance, - device, profileId, gpuInstance); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceCreateGpuInstance, device, profileId, + gpuInstance); } -nvmlReturn_t nvmlDeviceGetArchitecture(nvmlDevice_t device, - nvmlDeviceArchitecture_t *arch) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetArchitecture, device, - arch); +nvmlReturn_t nvmlDeviceGetArchitecture(nvmlDevice_t device, nvmlDeviceArchitecture_t *arch) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetArchitecture, device, arch); } -nvmlReturn_t nvmlDeviceGetAttributes(nvmlDevice_t device, - nvmlDeviceAttributes_t *attributes) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAttributes, device, - attributes); +nvmlReturn_t nvmlDeviceGetAttributes(nvmlDevice_t device, nvmlDeviceAttributes_t *attributes) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAttributes, device, attributes); } -nvmlReturn_t nvmlDeviceGetAttributes_v2(nvmlDevice_t device, - nvmlDeviceAttributes_t *attributes) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAttributes_v2, device, - attributes); +nvmlReturn_t nvmlDeviceGetAttributes_v2(nvmlDevice_t device, nvmlDeviceAttributes_t *attributes) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAttributes_v2, device, attributes); } -nvmlReturn_t nvmlDeviceGetComputeInstanceId(nvmlDevice_t device, - unsigned int *id) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetComputeInstanceId, - device, id); +nvmlReturn_t nvmlDeviceGetComputeInstanceId(nvmlDevice_t device, unsigned int *id) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetComputeInstanceId, device, id); } -nvmlReturn_t nvmlDeviceGetCpuAffinityWithinScope(nvmlDevice_t device, - unsigned int cpuSetSize, - unsigned long *cpuSet, - nvmlAffinityScope_t scope) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetCpuAffinityWithinScope, device, - cpuSetSize, cpuSet, scope); +nvmlReturn_t nvmlDeviceGetCpuAffinityWithinScope(nvmlDevice_t device, unsigned int cpuSetSize, + unsigned long *cpuSet, nvmlAffinityScope_t scope) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCpuAffinityWithinScope, device, + cpuSetSize, cpuSet, scope); } -nvmlReturn_t -nvmlDeviceGetDeviceHandleFromMigDeviceHandle(nvmlDevice_t migDevice, - nvmlDevice_t *device) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetDeviceHandleFromMigDeviceHandle, - migDevice, device); +nvmlReturn_t nvmlDeviceGetDeviceHandleFromMigDeviceHandle(nvmlDevice_t migDevice, + nvmlDevice_t *device) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDeviceHandleFromMigDeviceHandle, + migDevice, device); } nvmlReturn_t nvmlDeviceGetGpuInstanceById(nvmlDevice_t device, unsigned int id, nvmlGpuInstance_t *gpuInstance) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstanceById, - device, id, gpuInstance); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstanceById, device, id, + gpuInstance); } nvmlReturn_t nvmlDeviceGetGpuInstanceId(nvmlDevice_t device, unsigned int *id) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstanceId, device, - id); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstanceId, device, id); } -nvmlReturn_t nvmlDeviceGetGpuInstancePossiblePlacements( - nvmlDevice_t device, unsigned int profileId, - nvmlGpuInstancePlacement_t *placements, unsigned int *count) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetGpuInstancePossiblePlacements, device, - profileId, placements, count); +nvmlReturn_t nvmlDeviceGetGpuInstancePossiblePlacements(nvmlDevice_t device, unsigned int profileId, + nvmlGpuInstancePlacement_t *placements, + unsigned int *count) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstancePossiblePlacements, + device, profileId, placements, count); } -nvmlReturn_t -nvmlDeviceGetGpuInstanceProfileInfo(nvmlDevice_t device, unsigned int profile, - nvmlGpuInstanceProfileInfo_t *info) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetGpuInstanceProfileInfo, device, profile, - info); +nvmlReturn_t nvmlDeviceGetGpuInstanceProfileInfo(nvmlDevice_t device, unsigned int profile, + nvmlGpuInstanceProfileInfo_t *info) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstanceProfileInfo, device, + profile, info); } -nvmlReturn_t nvmlDeviceGetGpuInstanceRemainingCapacity(nvmlDevice_t device, - unsigned int profileId, +nvmlReturn_t nvmlDeviceGetGpuInstanceRemainingCapacity(nvmlDevice_t device, unsigned int profileId, unsigned int *count) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetGpuInstanceRemainingCapacity, device, - profileId, count); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstanceRemainingCapacity, device, + profileId, count); } -nvmlReturn_t nvmlDeviceGetGpuInstances(nvmlDevice_t device, - unsigned int profileId, - nvmlGpuInstance_t *gpuInstances, - unsigned int *count) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstances, device, - profileId, gpuInstances, count); +nvmlReturn_t nvmlDeviceGetGpuInstances(nvmlDevice_t device, unsigned int profileId, + nvmlGpuInstance_t *gpuInstances, unsigned int *count) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstances, device, profileId, + gpuInstances, count); } -nvmlReturn_t nvmlDeviceGetMaxMigDeviceCount(nvmlDevice_t device, - unsigned int *count) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxMigDeviceCount, - device, count); +nvmlReturn_t nvmlDeviceGetMaxMigDeviceCount(nvmlDevice_t device, unsigned int *count) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxMigDeviceCount, device, count); } -nvmlReturn_t nvmlDeviceGetMemoryAffinity(nvmlDevice_t device, - unsigned int nodeSetSize, - unsigned long *nodeSet, - nvmlAffinityScope_t scope) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMemoryAffinity, - device, nodeSetSize, nodeSet, scope); +nvmlReturn_t nvmlDeviceGetMemoryAffinity(nvmlDevice_t device, unsigned int nodeSetSize, + unsigned long *nodeSet, nvmlAffinityScope_t scope) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMemoryAffinity, device, nodeSetSize, + nodeSet, scope); } -nvmlReturn_t nvmlDeviceGetMigDeviceHandleByIndex(nvmlDevice_t device, - unsigned int index, +nvmlReturn_t nvmlDeviceGetMigDeviceHandleByIndex(nvmlDevice_t device, unsigned int index, nvmlDevice_t *migDevice) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetMigDeviceHandleByIndex, device, index, - migDevice); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMigDeviceHandleByIndex, device, + index, migDevice); } -nvmlReturn_t nvmlDeviceGetMigMode(nvmlDevice_t device, - unsigned int *currentMode, +nvmlReturn_t nvmlDeviceGetMigMode(nvmlDevice_t device, unsigned int *currentMode, unsigned int *pendingMode) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMigMode, device, - currentMode, pendingMode); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMigMode, device, currentMode, + pendingMode); } -nvmlReturn_t nvmlDeviceGetRemappedRows(nvmlDevice_t device, - unsigned int *corrRows, - unsigned int *uncRows, - unsigned int *isPending, +nvmlReturn_t nvmlDeviceGetRemappedRows(nvmlDevice_t device, unsigned int *corrRows, + unsigned int *uncRows, unsigned int *isPending, unsigned int *failureOccurred) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRemappedRows, device, - corrRows, uncRows, isPending, failureOccurred); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRemappedRows, device, corrRows, + uncRows, isPending, failureOccurred); } -nvmlReturn_t -nvmlDeviceGetRowRemapperHistogram(nvmlDevice_t device, - nvmlRowRemapperHistogramValues_t *values) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRowRemapperHistogram, - device, values); +nvmlReturn_t nvmlDeviceGetRowRemapperHistogram(nvmlDevice_t device, + nvmlRowRemapperHistogramValues_t *values) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRowRemapperHistogram, device, + values); } -nvmlReturn_t nvmlDeviceIsMigDeviceHandle(nvmlDevice_t device, - unsigned int *isMigDevice) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceIsMigDeviceHandle, - device, isMigDevice); +nvmlReturn_t nvmlDeviceIsMigDeviceHandle(nvmlDevice_t device, unsigned int *isMigDevice) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceIsMigDeviceHandle, device, isMigDevice); } nvmlReturn_t nvmlDeviceSetMigMode(nvmlDevice_t device, unsigned int mode, nvmlReturn_t *activationStatus) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetMigMode, device, mode, - activationStatus); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetMigMode, device, mode, + activationStatus); } nvmlReturn_t nvmlEventSetWait_v2(nvmlEventSet_t set, nvmlEventData_t *data, unsigned int timeoutms) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlEventSetWait_v2, set, data, - timeoutms); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlEventSetWait_v2, set, data, timeoutms); } -nvmlReturn_t -nvmlGpuInstanceCreateComputeInstance(nvmlGpuInstance_t gpuInstance, - unsigned int profileId, - nvmlComputeInstance_t *computeInstance) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlGpuInstanceCreateComputeInstance, gpuInstance, - profileId, computeInstance); +nvmlReturn_t nvmlGpuInstanceCreateComputeInstance(nvmlGpuInstance_t gpuInstance, + unsigned int profileId, + nvmlComputeInstance_t *computeInstance) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceCreateComputeInstance, gpuInstance, + profileId, computeInstance); } nvmlReturn_t nvmlGpuInstanceDestroy(nvmlGpuInstance_t gpuInstance) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceDestroy, - gpuInstance); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceDestroy, gpuInstance); } -nvmlReturn_t -nvmlGpuInstanceGetComputeInstanceById(nvmlGpuInstance_t gpuInstance, - unsigned int id, - nvmlComputeInstance_t *computeInstance) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlGpuInstanceGetComputeInstanceById, gpuInstance, id, - computeInstance); +nvmlReturn_t nvmlGpuInstanceGetComputeInstanceById(nvmlGpuInstance_t gpuInstance, unsigned int id, + nvmlComputeInstance_t *computeInstance) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceGetComputeInstanceById, + gpuInstance, id, computeInstance); } -nvmlReturn_t nvmlGpuInstanceGetComputeInstanceProfileInfo( - nvmlGpuInstance_t gpuInstance, unsigned int profile, - unsigned int engProfile, nvmlComputeInstanceProfileInfo_t *info) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlGpuInstanceGetComputeInstanceProfileInfo, - gpuInstance, profile, engProfile, info); +nvmlReturn_t nvmlGpuInstanceGetComputeInstanceProfileInfo(nvmlGpuInstance_t gpuInstance, + unsigned int profile, + unsigned int engProfile, + nvmlComputeInstanceProfileInfo_t *info) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceGetComputeInstanceProfileInfo, + gpuInstance, profile, engProfile, info); } -nvmlReturn_t nvmlGpuInstanceGetComputeInstanceRemainingCapacity( - nvmlGpuInstance_t gpuInstance, unsigned int profileId, - unsigned int *count) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlGpuInstanceGetComputeInstanceRemainingCapacity, - gpuInstance, profileId, count); +nvmlReturn_t nvmlGpuInstanceGetComputeInstanceRemainingCapacity(nvmlGpuInstance_t gpuInstance, + unsigned int profileId, + unsigned int *count) { + return NVML_OVERRIDE_CALL(nvml_library_entry, + nvmlGpuInstanceGetComputeInstanceRemainingCapacity, gpuInstance, + profileId, count); } -nvmlReturn_t nvmlGpuInstanceGetComputeInstances( - nvmlGpuInstance_t gpuInstance, unsigned int profileId, - nvmlComputeInstance_t *computeInstances, unsigned int *count) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceGetComputeInstances, - gpuInstance, profileId, computeInstances, count); +nvmlReturn_t nvmlGpuInstanceGetComputeInstances(nvmlGpuInstance_t gpuInstance, + unsigned int profileId, + nvmlComputeInstance_t *computeInstances, + unsigned int *count) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceGetComputeInstances, gpuInstance, + profileId, computeInstances, count); } -nvmlReturn_t nvmlGpuInstanceGetInfo(nvmlGpuInstance_t gpuInstance, - nvmlGpuInstanceInfo_t *info) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceGetInfo, - gpuInstance, info); +nvmlReturn_t nvmlGpuInstanceGetInfo(nvmlGpuInstance_t gpuInstance, nvmlGpuInstanceInfo_t *info) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceGetInfo, gpuInstance, info); } -nvmlReturn_t -nvmlVgpuInstanceClearAccountingPids(nvmlVgpuInstance_t vgpuInstance) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlVgpuInstanceClearAccountingPids, vgpuInstance); +nvmlReturn_t nvmlVgpuInstanceClearAccountingPids(nvmlVgpuInstance_t vgpuInstance) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceClearAccountingPids, + vgpuInstance); } -nvmlReturn_t nvmlVgpuInstanceGetMdevUUID(nvmlVgpuInstance_t vgpuInstance, - char *mdevUuid, unsigned int size) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetMdevUUID, - vgpuInstance, mdevUuid, size); +nvmlReturn_t nvmlVgpuInstanceGetMdevUUID(nvmlVgpuInstance_t vgpuInstance, char *mdevUuid, + unsigned int size) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetMdevUUID, vgpuInstance, + mdevUuid, size); } -nvmlReturn_t -nvmlComputeInstanceGetInfo_v2(nvmlComputeInstance_t computeInstance, - nvmlComputeInstanceInfo_t *info) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlComputeInstanceGetInfo_v2, - computeInstance, info); +nvmlReturn_t nvmlComputeInstanceGetInfo_v2(nvmlComputeInstance_t computeInstance, + nvmlComputeInstanceInfo_t *info) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlComputeInstanceGetInfo_v2, computeInstance, + info); } -nvmlReturn_t nvmlDeviceGetComputeRunningProcesses_v2(nvmlDevice_t device, - unsigned int *infoCount, +nvmlReturn_t nvmlDeviceGetComputeRunningProcesses_v2(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_t *infos) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetComputeRunningProcesses_v2, device, - infoCount, infos); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetComputeRunningProcesses_v2, device, + infoCount, infos); } -nvmlReturn_t nvmlDeviceGetGraphicsRunningProcesses_v2( - nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_t *infos) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlDeviceGetGraphicsRunningProcesses_v2, device, - infoCount, infos); +nvmlReturn_t nvmlDeviceGetGraphicsRunningProcesses_v2(nvmlDevice_t device, unsigned int *infoCount, + nvmlProcessInfo_t *infos) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGraphicsRunningProcesses_v2, device, + infoCount, infos); } -nvmlReturn_t nvmlDeviceSetTemperatureThreshold( - nvmlDevice_t device, nvmlTemperatureThresholds_t thresholdType, int *temp) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetTemperatureThreshold, - device, thresholdType, temp); +nvmlReturn_t nvmlDeviceSetTemperatureThreshold(nvmlDevice_t device, + nvmlTemperatureThresholds_t thresholdType, + int *temp) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetTemperatureThreshold, device, + thresholdType, temp); } /** no prototype @@ -1555,13 +1303,11 @@ nvmlReturn_t nvmlRetry_NvRmControl() {} nvmlReturn_t nvmlVgpuInstanceGetGpuInstanceId(nvmlVgpuInstance_t vgpuInstance, unsigned int *gpuInstanceId) { - return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetGpuInstanceId, - vgpuInstance, gpuInstanceId); + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetGpuInstanceId, vgpuInstance, + gpuInstanceId); } -nvmlReturn_t -nvmlVgpuTypeGetGpuInstanceProfileId(nvmlVgpuTypeId_t vgpuTypeId, - unsigned int *gpuInstanceProfileId) { - return NVML_OVERRIDE_CALL(nvml_library_entry, - nvmlVgpuTypeGetGpuInstanceProfileId, vgpuTypeId, - gpuInstanceProfileId); +nvmlReturn_t nvmlVgpuTypeGetGpuInstanceProfileId(nvmlVgpuTypeId_t vgpuTypeId, + unsigned int *gpuInstanceProfileId) { + return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetGpuInstanceProfileId, vgpuTypeId, + gpuInstanceProfileId); } \ No newline at end of file diff --git a/src/utils.c b/src/utils.c old mode 100755 new mode 100644 index 449fa714..2eedbb8b --- a/src/utils.c +++ b/src/utils.c @@ -1,17 +1,19 @@ +#include "include/utils.h" + +#include +#include +#include #include #include -#include -#include #include -#include "include/utils.h" + +#include "include/libcuda_hook.h" #include "include/log_utils.h" -#include "include/nvml_prefix.h" -#include #include "include/nvml_override.h" -#include "include/libcuda_hook.h" +#include "include/nvml_prefix.h" #include "multiprocess/multiprocess_memory_limit.h" -const char* unified_lock="/tmp/vgpulock/lock"; +const char *unified_lock = "/tmp/vgpulock/lock"; static int lock_fd = -1; extern size_t context_size; extern int cuda_to_nvml_map_array[CUDA_DEVICE_MAX_COUNT]; @@ -48,20 +50,21 @@ int try_unlock_unified_lock() { return res == 0 ? 0 : -1; } -int mergepid(unsigned int *prev, unsigned int *current, nvmlProcessInfo_t1 *sub, nvmlProcessInfo_t1 *merged) { - int i,j; - int found=0; - for (i=0;i<*prev;i++){ - found=0; - for (j=0;j<*current;j++) { - LOG_INFO("merge pid=%d",sub[i].pid); +int mergepid(unsigned int *prev, unsigned int *current, nvmlProcessInfo_t1 *sub, + nvmlProcessInfo_t1 *merged) { + int i, j; + int found = 0; + for (i = 0; i < *prev; i++) { + found = 0; + for (j = 0; j < *current; j++) { + LOG_INFO("merge pid=%d", sub[i].pid); if (sub[i].pid == merged[j].pid) { found = 1; break; - } + } } if (!found) { - LOG_DEBUG("merged pid=%d\n",sub[i].pid); + LOG_DEBUG("merged pid=%d\n", sub[i].pid); merged[*current].pid = sub[i].pid; (*current)++; } @@ -69,33 +72,32 @@ int mergepid(unsigned int *prev, unsigned int *current, nvmlProcessInfo_t1 *sub, return 0; } -int getextrapid(unsigned int prev, unsigned int current, nvmlProcessInfo_t1 *pre_pids_on_device, nvmlProcessInfo_t1 *pids_on_device) { - int i,j; +int getextrapid(unsigned int prev, unsigned int current, nvmlProcessInfo_t1 *pre_pids_on_device, + nvmlProcessInfo_t1 *pids_on_device) { + int i, j; int found = 0; - for (i=0; i %d",i,cuda_to_nvml_map(i)); + LOG_INFO("device %d -> %d", i, cuda_to_nvml_map(i)); } LOG_INFO("get default cuda from %s", getenv("CUDA_VISIBLE_DEVICES")); return count; @@ -201,31 +207,30 @@ int map_cuda_visible_devices() { int getenvcount() { char *s = getenv("CUDA_VISIBLE_DEVICES"); - if ((s == NULL) || (strlen(s)==0)){ + if ((s == NULL) || (strlen(s) == 0)) { return -1; } - LOG_DEBUG("get from env %s",s); - int i,count=0; - for (i=0;i