diff --git a/.github/workflows/style.yaml b/.github/workflows/style.yaml
index 87e39337..d346da5d 100644
--- a/.github/workflows/style.yaml
+++ b/.github/workflows/style.yaml
@@ -6,11 +6,12 @@ on:
   pull_request:
 
 jobs:
-  cpplint:
+  lint:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@master
-    - uses: reviewdog/action-cpplint@master
+    - uses: actions/checkout@v3
+    - name: Run clang-format style check for C/C++ programs.
+      uses: jidicula/clang-format-action@v4.18.0
       with:
-        github_token: ${{ secrets.github_token }}
-        args: --linelength=120
+        clang-format-version: '18'
+        check-path: 'src'
diff --git a/src/.clang-format b/src/.clang-format
new file mode 100644
index 00000000..4ca6bcb7
--- /dev/null
+++ b/src/.clang-format
@@ -0,0 +1,3 @@
+BasedOnStyle: Google
+IndentWidth: 4
+ColumnLimit: 100
diff --git a/src/allocator/allocator.c b/src/allocator/allocator.c
old mode 100755
new mode 100644
index b990129f..0ce5eb87
--- a/src/allocator/allocator.c
+++ b/src/allocator/allocator.c
@@ -1,44 +1,42 @@
 #include "allocator.h"
-#include "include/log_utils.h"
+
 #include "include/libcuda_hook.h"
+#include "include/log_utils.h"
 #include "multiprocess/multiprocess_memory_limit.h"
 
-
 size_t BITSIZE = 512;
 size_t IPCSIZE = 2097152;
 size_t OVERSIZE = 134217728;
-//int pidfound;
+// int pidfound;
 
 region_list *r_list;
 allocated_list *device_overallocated;
 allocated_list *device_allocasync;
 
-#define ALIGN       2097152
+#define ALIGN 2097152
 #define MULTI_PARAM 1
 
-#define CHUNK_SIZE  (OVERSIZE/BITSIZE)
-#define __CHUNK_SIZE__  CHUNK_SIZE
+#define CHUNK_SIZE (OVERSIZE / BITSIZE)
+#define __CHUNK_SIZE__ CHUNK_SIZE
 
 extern size_t initial_offset;
-extern CUresult
-    cuMemoryAllocate(CUdeviceptr* dptr, size_t bytesize, void* data);
+extern CUresult cuMemoryAllocate(CUdeviceptr *dptr, size_t bytesize, void *data);
 extern CUresult cuMemoryFree(CUdeviceptr dptr);
 
 pthread_once_t allocator_allocate_flag = PTHREAD_ONCE_INIT;
 pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
 
 size_t round_up(size_t size, size_t unit) {
-    if (size & (unit-1))
-        return ((size / unit) + 1 ) * unit;
+    if (size & (unit - 1)) return ((size / unit) + 1) * unit;
     return size;
 }
 
 int oom_check(const int dev, size_t addon) {
     CUdevice d;
-    if (dev==-1)
+    if (dev == -1)
         cuCtxGetDevice(&d);
     else
-        d=dev;
+        d = dev;
     uint64_t limit = get_current_device_memory_limit(d);
     size_t _usage = get_gpu_memory_usage(d);
 
@@ -47,12 +45,11 @@ int oom_check(const int dev, size_t addon) {
     }
 
     size_t new_allocated = _usage + addon;
-    LOG_INFO("_usage=%lu limit=%lu new_allocated=%lu",_usage,limit,new_allocated);
+    LOG_INFO("_usage=%lu limit=%lu new_allocated=%lu", _usage, limit, new_allocated);
     if (new_allocated > limit) {
         LOG_ERROR("Device %d OOM %lu / %lu", d, new_allocated, limit);
 
-        if (clear_proc_slot_nolock(1) > 0)
-            return oom_check(dev,addon);
+        if (clear_proc_slot_nolock(1) > 0) return oom_check(dev, addon);
         return 1;
     }
     return 0;
@@ -61,27 +58,27 @@ int oom_check(const int dev, size_t addon) {
 CUresult view_vgpu_allocator() {
     allocated_list_entry *al;
     size_t total;
-    total=0;
+    total = 0;
     LOG_INFO("[view1]:overallocated:");
-    for (al=device_overallocated->head;al!=NULL;al=al->next){
-        LOG_INFO("(%p %lu)\t",(void *)al->entry->address,al->entry->length);
-        total+=al->entry->length;
+    for (al = device_overallocated->head; al != NULL; al = al->next) {
+        LOG_INFO("(%p %lu)\t", (void *)al->entry->address, al->entry->length);
+        total += al->entry->length;
     }
-    LOG_INFO("total=%lu",total);
+    LOG_INFO("total=%lu", total);
     size_t t = get_current_device_memory_usage(0);
-    LOG_INFO("current_device_memory_usage:%lu",t);
+    LOG_INFO("current_device_memory_usage:%lu", t);
     return 0;
 }
 
 CUresult get_listsize(allocated_list *al, size_t *size) {
-    if (al->length == 0){
+    if (al->length == 0) {
         *size = 0;
         return CUDA_SUCCESS;
     }
-    size_t count=0;
+    size_t count = 0;
     allocated_list_entry *val;
-    for (val=al->head;val!=NULL;val=val->next){
-        count+=val->entry->length;
+    for (val = al->head; val != NULL; val = val->next) {
+        count += val->entry->length;
     }
     *size = count;
     return CUDA_SUCCESS;
@@ -92,10 +89,10 @@ void allocator_init() {
 
     device_overallocated = malloc(sizeof(allocated_list));
     LIST_INIT(device_overallocated);
-    device_allocasync=malloc(sizeof(allocated_list));
+    device_allocasync = malloc(sizeof(allocated_list));
     LIST_INIT(device_allocasync);
 
-    pthread_mutex_init(&mutex,NULL);
+    pthread_mutex_init(&mutex, NULL);
 }
 
 int add_chunk(CUdeviceptr *address, size_t size) {
@@ -105,8 +102,7 @@ int add_chunk(CUdeviceptr *address, size_t size) {
     cuCtxGetDevice(&dev);
 
     /* OOM pre-check without lock */
-    if (oom_check(dev, size))
-        return CUDA_ERROR_OUT_OF_MEMORY;
+    if (oom_check(dev, size)) return CUDA_ERROR_OUT_OF_MEMORY;
 
     /* GPU allocation outside lock — the expensive part */
     if (size <= IPCSIZE) {
@@ -141,17 +137,17 @@ int add_chunk(CUdeviceptr *address, size_t size) {
 
 int add_chunk_only(CUdeviceptr address, size_t size, CUdevice dev) {
     pthread_mutex_lock(&mutex);
-    size_t addr=0;
+    size_t addr = 0;
     size_t allocsize;
-    if (oom_check(dev,size)){
+    if (oom_check(dev, size)) {
         pthread_mutex_unlock(&mutex);
         return -1;
     }
     allocated_list_entry *e;
     INIT_ALLOCATED_LIST_ENTRY(e, addr, size, dev);
-    LIST_ADD(device_overallocated,e);
-    //uint64_t t_size;
-    e->entry->address=address;
+    LIST_ADD(device_overallocated, e);
+    // uint64_t t_size;
+    e->entry->address = address;
     allocsize = size;
     add_gpu_device_memory_usage(getpid(), dev, allocsize, 2);
     pthread_mutex_unlock(&mutex);
@@ -161,8 +157,9 @@ int add_chunk_only(CUdeviceptr address, size_t size, CUdevice dev) {
 int check_memory_type(CUdeviceptr address) {
     allocated_list_entry *cursor;
     cursor = device_overallocated->head;
-    for (cursor=device_overallocated->head;cursor!=NULL;cursor=cursor->next){
-        if ((cursor->entry->address <= address) && (cursor->entry->address+cursor->entry->length>=address))
+    for (cursor = device_overallocated->head; cursor != NULL; cursor = cursor->next) {
+        if ((cursor->entry->address <= address) &&
+            (cursor->entry->address + cursor->entry->length >= address))
             return CU_MEMORYTYPE_DEVICE;
     }
     return CU_MEMORYTYPE_HOST;
@@ -218,16 +215,11 @@ int remove_chunk_only(CUdeviceptr dptr) {
     return -1;
 }
 
-int allocate_raw(CUdeviceptr *dptr, size_t size) {
-    return add_chunk(dptr, size);
-}
+int allocate_raw(CUdeviceptr *dptr, size_t size) { return add_chunk(dptr, size); }
 
-int free_raw(CUdeviceptr dptr) {
-    return remove_chunk(device_overallocated, dptr);
-}
+int free_raw(CUdeviceptr dptr) { return remove_chunk(device_overallocated, dptr); }
 
-int remove_chunk_async(
-    allocated_list *a_list, CUdeviceptr dptr, CUstream hStream) {
+int remove_chunk_async(allocated_list *a_list, CUdeviceptr dptr, CUstream hStream) {
     size_t t_size;
     if (a_list->length == 0) {
         return -1;
@@ -235,13 +227,13 @@ int remove_chunk_async(
     allocated_list_entry *val;
     for (val = a_list->head; val != NULL; val = val->next) {
         if (val->entry->address == dptr) {
-            t_size=val->entry->length;
-            CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemFreeAsync,dptr,hStream);
-            LIST_REMOVE(a_list,val);
-            a_list->limit-=t_size;
+            t_size = val->entry->length;
+            CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemFreeAsync, dptr, hStream);
+            LIST_REMOVE(a_list, val);
+            a_list->limit -= t_size;
             CUdevice dev;
             cuCtxGetDevice(&dev);
-            rm_gpu_device_memory_usage(getpid(),dev,t_size,2);
+            rm_gpu_device_memory_usage(getpid(), dev, t_size, 2);
             return 0;
         }
     }
@@ -256,53 +248,56 @@ int free_raw_async(CUdeviceptr dptr, CUstream hStream) {
 }
 
 int add_chunk_async(CUdeviceptr *address, size_t size, CUstream hStream) {
-    size_t addr=0;
+    size_t addr = 0;
     size_t allocsize;
     CUresult res = CUDA_SUCCESS;
     CUdevice dev;
     cuCtxGetDevice(&dev);
-    if (oom_check(dev,size))
-        return -1;
+    if (oom_check(dev, size)) return -1;
 
     allocated_list_entry *e;
     INIT_ALLOCATED_LIST_ENTRY(e, addr, size, dev);
-    res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemAllocAsync,&e->entry->address,size,hStream);
+    res =
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocAsync, &e->entry->address, size, hStream);
     if (res != CUDA_SUCCESS) {
-        LOG_ERROR("cuMemoryAllocate failed res=%d",res);
+        LOG_ERROR("cuMemoryAllocate failed res=%d", res);
         return res;
     }
     *address = e->entry->address;
     CUmemoryPool pool;
-    res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceGetMemPool,&pool,dev);
+    res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetMemPool, &pool, dev);
     if (res != CUDA_SUCCESS) {
-        LOG_ERROR("cuDeviceGetMemPool failed res=%d",res);
+        LOG_ERROR("cuDeviceGetMemPool failed res=%d", res);
         return res;
     }
     size_t poollimit;
-    res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolGetAttribute,pool,CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH,&poollimit);
+    res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolGetAttribute, pool,
+                             CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH, &poollimit);
     if (res != CUDA_SUCCESS) {
-        LOG_ERROR("cuMemPoolGetAttribute failed res=%d",res);
+        LOG_ERROR("cuMemPoolGetAttribute failed res=%d", res);
         return res;
     }
     if (poollimit != 0) {
-        if (poollimit> device_allocasync->limit) {
-            allocsize = (poollimit-device_allocasync->limit < size)? poollimit-device_allocasync->limit : size;
+        if (poollimit > device_allocasync->limit) {
+            allocsize = (poollimit - device_allocasync->limit < size)
+                            ? poollimit - device_allocasync->limit
+                            : size;
             cuCtxGetDevice(&dev);
             add_gpu_device_memory_usage(getpid(), dev, allocsize, 2);
-            device_allocasync->limit=device_allocasync->limit+allocsize;
-            e->entry->length=allocsize;
-        }else{
-            e->entry->length=0;
+            device_allocasync->limit = device_allocasync->limit + allocsize;
+            e->entry->length = allocsize;
+        } else {
+            e->entry->length = 0;
         }
     }
-    LIST_ADD(device_allocasync,e);
+    LIST_ADD(device_allocasync, e);
     return 0;
 }
 
 int allocate_async_raw(CUdeviceptr *dptr, size_t size, CUstream hStream) {
     int tmp;
     pthread_mutex_lock(&mutex);
-    tmp = add_chunk_async(dptr,size,hStream);
+    tmp = add_chunk_async(dptr, size, hStream);
     pthread_mutex_unlock(&mutex);
     return tmp;
 }
diff --git a/src/allocator/allocator.h b/src/allocator/allocator.h
old mode 100755
new mode 100644
index 38933287..518a4022
--- a/src/allocator/allocator.h
+++ b/src/allocator/allocator.h
@@ -1,16 +1,16 @@
-#include <stdio.h>
-#include <cuda.h>
 #include <assert.h>
-#include <memory.h>
+#include <cuda.h>
 #include <dlfcn.h>
+#include <errno.h>
 #include <fcntl.h>
+#include <memory.h>
 #include <pthread.h>
-#include <errno.h>
+#include <stdio.h>
 
 #define CUMALLOC 0
 #define CUCREATE 1
 
-struct allocated_device_memory_struct{
+struct allocated_device_memory_struct {
     CUdeviceptr address;
     size_t length;
     CUcontext ctx;
@@ -19,13 +19,13 @@ struct allocated_device_memory_struct{
 };
 typedef struct allocated_device_memory_struct allocated_device_memory;
 
-struct allocated_list_entry_struct{
+struct allocated_list_entry_struct {
     allocated_device_memory *entry;
-    struct allocated_list_entry_struct *next,*prev;
+    struct allocated_list_entry_struct *next, *prev;
 };
 typedef struct allocated_list_entry_struct allocated_list_entry;
 
-struct allocated_list_struct{
+struct allocated_list_struct {
     allocated_list_entry *head;
     allocated_list_entry *tail;
     size_t length;
@@ -33,7 +33,7 @@ struct allocated_list_struct{
 };
 typedef struct allocated_list_struct allocated_list;
 
-struct region_struct{
+struct region_struct {
     size_t start;
     size_t freemark;
     size_t freed_map;
@@ -45,15 +45,15 @@ struct region_struct{
 };
 typedef struct region_struct region;
 
-struct region_list_entry_struct{
+struct region_list_entry_struct {
     region *entry;
-    struct region_list_entry_struct *next,*prev;
+    struct region_list_entry_struct *next, *prev;
 };
 typedef struct region_list_entry_struct region_list_entry;
 
-struct region_list_struct{
-    region_list_entry   *head;
-    region_list_entry   *tail;
+struct region_list_struct {
+    region_list_entry *head;
+    region_list_entry *tail;
     size_t length;
 };
 typedef struct region_list_struct region_list;
@@ -63,90 +63,90 @@ extern allocated_list *device_overallocated;
 extern allocated_list *device_allocasync;
 extern pthread_mutex_t mutex;
 
-#define LIST_INIT(list) {   \
-    list->head=NULL;         \
-    list->tail=NULL;         \
-    list->length=0;          \
-    list->limit=0;           \
+#define LIST_INIT(list)    \
+    {                      \
+        list->head = NULL; \
+        list->tail = NULL; \
+        list->length = 0;  \
+        list->limit = 0;   \
     }
 #define __LIST_INIT(list) LIST_INIT(list)
 
-#define QUIT_WITH_ERROR(__message) {    \
-    LOG_ERROR("%s\n",#__message);  \
-    return -1;                          \
-}
-
-#define LIST_REMOVE(list,val) {             \
-    if (val->prev!=NULL)                    \
-        val->prev->next=val->next;          \
-    if (val->next!=NULL)                    \
-        val->next->prev=val->prev;          \
-    if (val == list->tail)                  \
-        list->tail = val->prev;             \
-    if (val == list->head)                  \
-        list->head = val->next;             \
-    free(val->entry->allocHandle);          \
-    free(val->entry);                       \
-    free(val);                              \
-    list->length--;                         \
-}   
-
-#define INIT_ALLOCATED_LIST_ENTRY(__list_entry, __address, __size, __dev) {             \
-    CUcontext __ctx;                                                           \
-    CUresult __res=cuCtxGetCurrent(&__ctx);                                    \
-    if (__res!=CUDA_SUCCESS) QUIT_WITH_ERROR("cuCtxGetCurrent failed");        \
-    __list_entry = malloc(sizeof(allocated_list_entry));                       \
-    if (__list_entry == NULL) QUIT_WITH_ERROR("malloc failed");                \
-    __list_entry->entry = malloc(sizeof(allocated_device_memory));             \
-    if (__list_entry->entry == NULL) QUIT_WITH_ERROR("malloc failed");         \
-    __list_entry->entry->address=__address;                                    \
-    __list_entry->entry->length=__size;                                        \
-    __list_entry->entry->dev = __dev;                                            \
-    __list_entry->entry->allocHandle=malloc(sizeof(CUmemGenericAllocationHandle)); \
-    __list_entry->entry->ctx=__ctx;                                            \
-    __list_entry->next=NULL;                                                   \
-    __list_entry->prev=NULL;                                                   \
-}
-
-#define INIT_REGION_LIST_ENTRY(__list_entry,__address,__size)                      \
-    do{                                                                            \
-        CUcontext __ctx;                                                           \
-        CUresult __res;                                                            \
-        __res = cuCtxGetCurrent(&__ctx);                                           \
-        if (__res!=CUDA_SUCCESS) QUIT_WITH_ERROR("cuCtxGetCurrent failed");        \
-        __list_entry = malloc(sizeof(region_list_entry));                          \
-        if (__list_entry == NULL) QUIT_WITH_ERROR("malloc failed");                \
-        __list_entry->entry = malloc(sizeof(region));                              \
-        if (__list_entry->entry == NULL) QUIT_WITH_ERROR("malloc failed")          \
-        __list_entry->entry->region_allocs = malloc(sizeof(allocated_list));       \
-        if (__list_entry->entry->region_allocs == NULL) QUIT_WITH_ERROR("malloc failed") \
-        __list_entry->entry->start=__address;                                      \
-        __list_entry->entry->freed_map=__CHUNK_SIZE__;                             \
-        __list_entry->entry->freemark=0;                                           \
-        __list_entry->entry->length=0;                                             \
-        __list_entry->entry->ctx=__ctx;                                            \
-        __list_entry->entry->allocHandle=malloc(sizeof(CUmemGenericAllocationHandle)); \
-        __list_entry->entry->bitmap=malloc(__CHUNK_SIZE__);                        \
-        memset(__list_entry->entry->bitmap,0,__CHUNK_SIZE__);                      \
-        __LIST_INIT(__list_entry->entry->region_allocs);                           \
-        region_fill(__list_entry->entry,0,__size);                                 \
-        __list_entry->next=NULL;                                                   \
-        __list_entry->prev=NULL;                                                   \
-    }while(0);                                                                     
-
-#define LIST_ADD(list,__entry) { \
-    if (list->head == NULL) {    \
-        list->head = __entry;    \
-        list->tail = __entry;    \
-        list->length=1;          \
-    }else{                      \
-        __entry->prev = list->tail; \
-        list->tail->next=__entry;   \
-        list->tail = __entry;       \
-        list->length++;             \
-    }                               \
-}                                   
+#define QUIT_WITH_ERROR(__message)     \
+    {                                  \
+        LOG_ERROR("%s\n", #__message); \
+        return -1;                     \
+    }
+
+#define LIST_REMOVE(list, val)                              \
+    {                                                       \
+        if (val->prev != NULL) val->prev->next = val->next; \
+        if (val->next != NULL) val->next->prev = val->prev; \
+        if (val == list->tail) list->tail = val->prev;      \
+        if (val == list->head) list->head = val->next;      \
+        free(val->entry->allocHandle);                      \
+        free(val->entry);                                   \
+        free(val);                                          \
+        list->length--;                                     \
+    }
+
+#define INIT_ALLOCATED_LIST_ENTRY(__list_entry, __address, __size, __dev)                \
+    {                                                                                    \
+        CUcontext __ctx;                                                                 \
+        CUresult __res = cuCtxGetCurrent(&__ctx);                                        \
+        if (__res != CUDA_SUCCESS) QUIT_WITH_ERROR("cuCtxGetCurrent failed");            \
+        __list_entry = malloc(sizeof(allocated_list_entry));                             \
+        if (__list_entry == NULL) QUIT_WITH_ERROR("malloc failed");                      \
+        __list_entry->entry = malloc(sizeof(allocated_device_memory));                   \
+        if (__list_entry->entry == NULL) QUIT_WITH_ERROR("malloc failed");               \
+        __list_entry->entry->address = __address;                                        \
+        __list_entry->entry->length = __size;                                            \
+        __list_entry->entry->dev = __dev;                                                \
+        __list_entry->entry->allocHandle = malloc(sizeof(CUmemGenericAllocationHandle)); \
+        __list_entry->entry->ctx = __ctx;                                                \
+        __list_entry->next = NULL;                                                       \
+        __list_entry->prev = NULL;                                                       \
+    }
 
+#define INIT_REGION_LIST_ENTRY(__list_entry, __address, __size)                          \
+    do {                                                                                 \
+        CUcontext __ctx;                                                                 \
+        CUresult __res;                                                                  \
+        __res = cuCtxGetCurrent(&__ctx);                                                 \
+        if (__res != CUDA_SUCCESS) QUIT_WITH_ERROR("cuCtxGetCurrent failed");            \
+        __list_entry = malloc(sizeof(region_list_entry));                                \
+        if (__list_entry == NULL) QUIT_WITH_ERROR("malloc failed");                      \
+        __list_entry->entry = malloc(sizeof(region));                                    \
+        if (__list_entry->entry == NULL) QUIT_WITH_ERROR("malloc failed")                \
+        __list_entry->entry->region_allocs = malloc(sizeof(allocated_list));             \
+        if (__list_entry->entry->region_allocs == NULL) QUIT_WITH_ERROR("malloc failed") \
+        __list_entry->entry->start = __address;                                          \
+        __list_entry->entry->freed_map = __CHUNK_SIZE__;                                 \
+        __list_entry->entry->freemark = 0;                                               \
+        __list_entry->entry->length = 0;                                                 \
+        __list_entry->entry->ctx = __ctx;                                                \
+        __list_entry->entry->allocHandle = malloc(sizeof(CUmemGenericAllocationHandle)); \
+        __list_entry->entry->bitmap = malloc(__CHUNK_SIZE__);                            \
+        memset(__list_entry->entry->bitmap, 0, __CHUNK_SIZE__);                          \
+        __LIST_INIT(__list_entry->entry->region_allocs);                                 \
+        region_fill(__list_entry->entry, 0, __size);                                     \
+        __list_entry->next = NULL;                                                       \
+        __list_entry->prev = NULL;                                                       \
+    } while (0);
+
+#define LIST_ADD(list, __entry)         \
+    {                                   \
+        if (list->head == NULL) {       \
+            list->head = __entry;       \
+            list->tail = __entry;       \
+            list->length = 1;           \
+        } else {                        \
+            __entry->prev = list->tail; \
+            list->tail->next = __entry; \
+            list->tail = __entry;       \
+            list->length++;             \
+        }                               \
+    }
 
 int getallochandle(CUmemGenericAllocationHandle *handle, size_t size, size_t *allocsize);
 
@@ -154,7 +154,7 @@ int getallochandle(CUmemGenericAllocationHandle *handle, size_t size, size_t *al
 CUresult view_vgpu_allocator();
 
 // Checks if oom
-int oom_check(const int dev,size_t addon);
+int oom_check(const int dev, size_t addon);
 
 // Allocate and free device memory
 int allocate_raw(CUdeviceptr *dptr, size_t size);
@@ -166,4 +166,3 @@ int free_raw_async(CUdeviceptr dptr, CUstream hStream);
 
 // Checks memory type
 int check_memory_type(CUdeviceptr address);
-
diff --git a/src/cuda/context.c b/src/cuda/context.c
old mode 100755
new mode 100644
index 3838970e..9e33fc13
--- a/src/cuda/context.c
+++ b/src/cuda/context.c
@@ -4,149 +4,154 @@
 extern size_t context_size;
 extern int ctx_activate[16];
 
-
-CUresult cuDevicePrimaryCtxGetState( CUdevice dev, unsigned int* flags, int* active ){
-    LOG_DEBUG("into cuDevicePrimaryCtxGetState dev=%d",dev);
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDevicePrimaryCtxGetState,dev,flags,active);
+CUresult cuDevicePrimaryCtxGetState(CUdevice dev, unsigned int* flags, int* active) {
+    LOG_DEBUG("into cuDevicePrimaryCtxGetState dev=%d", dev);
+    CUresult res =
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuDevicePrimaryCtxGetState, dev, flags, active);
     return res;
 }
 
-CUresult cuDevicePrimaryCtxRetain(CUcontext *pctx, CUdevice dev){
-    LOG_INFO("dev=%d context_size=%ld",dev,context_size);
-    //for Initialization only
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDevicePrimaryCtxRetain,pctx,dev);
+CUresult cuDevicePrimaryCtxRetain(CUcontext* pctx, CUdevice dev) {
+    LOG_INFO("dev=%d context_size=%ld", dev, context_size);
+    // for Initialization only
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDevicePrimaryCtxRetain, pctx, dev);
     if (ctx_activate[dev] == 0) {
-        add_gpu_device_memory_usage(getpid(),dev,context_size,0); 
+        add_gpu_device_memory_usage(getpid(), dev, context_size, 0);
     }
-    if (context_size>0) {
+    if (context_size > 0) {
         ctx_activate[dev] = 1;
     }
     return res;
 }
 
-
-CUresult cuDevicePrimaryCtxSetFlags_v2( CUdevice dev, unsigned int  flags ){
-    LOG_DEBUG("into cuDevicePrimaryCtxSetFlags dev=%d flags=%d",dev,flags);
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuDevicePrimaryCtxSetFlags_v2,dev,flags);
+CUresult cuDevicePrimaryCtxSetFlags_v2(CUdevice dev, unsigned int flags) {
+    LOG_DEBUG("into cuDevicePrimaryCtxSetFlags dev=%d flags=%d", dev, flags);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDevicePrimaryCtxSetFlags_v2, dev, flags);
 }
 
-CUresult cuDevicePrimaryCtxRelease_v2( CUdevice dev ){
+CUresult cuDevicePrimaryCtxRelease_v2(CUdevice dev) {
     if (ctx_activate[dev] == 1) {
-        rm_gpu_device_memory_usage(getpid(),dev,context_size,0);
+        rm_gpu_device_memory_usage(getpid(), dev, context_size, 0);
     }
     ctx_activate[dev] = 0;
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDevicePrimaryCtxRelease_v2,dev);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDevicePrimaryCtxRelease_v2, dev);
     return res;
 }
 
 CUresult cuCtxGetDevice(CUdevice* device) {
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxGetDevice,device);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetDevice, device);
     return res;
 }
 
 #if CUDA_VERSION < 13000
-CUresult cuCtxCreate_v2 ( CUcontext* pctx, unsigned int  flags, CUdevice dev ){
-    LOG_DEBUG("into cuCtxCreate pctx=%p flags=%d dev=%d",pctx,flags,dev);
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxCreate_v2,pctx,flags,dev);
+CUresult cuCtxCreate_v2(CUcontext* pctx, unsigned int flags, CUdevice dev) {
+    LOG_DEBUG("into cuCtxCreate pctx=%p flags=%d dev=%d", pctx, flags, dev);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxCreate_v2, pctx, flags, dev);
     return res;
 }
 
-CUresult cuCtxCreate_v3 ( CUcontext* pctx, CUexecAffinityParam* paramsArray, int  numParams, unsigned int  flags, CUdevice dev ){
-    LOG_DEBUG("into cuCtxCreate_v3 pctx=%p paramsArray=%p numParams=%d flags=%d dev=%d",pctx,paramsArray,numParams,flags,dev);
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxCreate_v3,pctx,paramsArray,numParams,flags,dev);
+CUresult cuCtxCreate_v3(CUcontext* pctx, CUexecAffinityParam* paramsArray, int numParams,
+                        unsigned int flags, CUdevice dev) {
+    LOG_DEBUG("into cuCtxCreate_v3 pctx=%p paramsArray=%p numParams=%d flags=%d dev=%d", pctx,
+              paramsArray, numParams, flags, dev);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxCreate_v3, pctx, paramsArray,
+                                      numParams, flags, dev);
     return res;
 }
 #endif
 
-CUresult cuCtxCreate_v4(CUcontext* pctx, CUctxCreateParams* ctxCreateParams, unsigned int flags, CUdevice dev) {
-    LOG_DEBUG("into cuCtxCreate_v4 pctx=%p ctxCreateParams=%p flags=%d dev=%d", pctx, ctxCreateParams, flags, dev);
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxCreate_v4, pctx, ctxCreateParams, flags, dev);
+CUresult cuCtxCreate_v4(CUcontext* pctx, CUctxCreateParams* ctxCreateParams, unsigned int flags,
+                        CUdevice dev) {
+    LOG_DEBUG("into cuCtxCreate_v4 pctx=%p ctxCreateParams=%p flags=%d dev=%d", pctx,
+              ctxCreateParams, flags, dev);
+    CUresult res =
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxCreate_v4, pctx, ctxCreateParams, flags, dev);
     return res;
 }
 
-CUresult cuCtxDestroy_v2 ( CUcontext ctx ){
-    LOG_DEBUG("into cuCtxDestroy_v2 ctx=%p",ctx);
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxDestroy_v2,ctx);
+CUresult cuCtxDestroy_v2(CUcontext ctx) {
+    LOG_DEBUG("into cuCtxDestroy_v2 ctx=%p", ctx);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxDestroy_v2, ctx);
 }
 
-CUresult cuCtxGetApiVersion ( CUcontext ctx, unsigned int* version ){
-    LOG_INFO("into cuCtxGetApiVersion ctx=%p",ctx);
-    CUresult res =  CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxGetApiVersion,ctx,version);
-    if (res!=CUDA_SUCCESS){
-        LOG_ERROR("cuCtxGetApiVersion res=%d",res);
+CUresult cuCtxGetApiVersion(CUcontext ctx, unsigned int* version) {
+    LOG_INFO("into cuCtxGetApiVersion ctx=%p", ctx);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetApiVersion, ctx, version);
+    if (res != CUDA_SUCCESS) {
+        LOG_ERROR("cuCtxGetApiVersion res=%d", res);
     }
     return res;
 }
 
-CUresult cuCtxGetCacheConfig ( CUfunc_cache* pconfig ){
+CUresult cuCtxGetCacheConfig(CUfunc_cache* pconfig) {
     LOG_DEBUG("into cuCtxGetCacheConfig");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxGetCacheConfig,pconfig);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetCacheConfig, pconfig);
 }
 
-CUresult cuCtxGetCurrent ( CUcontext* pctx ){
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxGetCurrent,pctx);
+CUresult cuCtxGetCurrent(CUcontext* pctx) {
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetCurrent, pctx);
     return res;
 }
 
-CUresult cuCtxGetFlags ( unsigned int* flags ){
-    LOG_DEBUG("into cuCtxGetFlags flags=%p",flags);
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxGetFlags,flags);
+CUresult cuCtxGetFlags(unsigned int* flags) {
+    LOG_DEBUG("into cuCtxGetFlags flags=%p", flags);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetFlags, flags);
 }
 
-CUresult cuCtxGetLimit ( size_t* pvalue, CUlimit limit ){
-    LOG_DEBUG("into cuCtxGetLimit pvalue=%p",pvalue);
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxGetLimit,pvalue,limit);
+CUresult cuCtxGetLimit(size_t* pvalue, CUlimit limit) {
+    LOG_DEBUG("into cuCtxGetLimit pvalue=%p", pvalue);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetLimit, pvalue, limit);
 }
 
-CUresult cuCtxGetSharedMemConfig ( CUsharedconfig* pConfig ){
-    LOG_DEBUG("cuCtxGetSharedMemConfig pConfig=%p",pConfig);
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxGetSharedMemConfig,pConfig);
+CUresult cuCtxGetSharedMemConfig(CUsharedconfig* pConfig) {
+    LOG_DEBUG("cuCtxGetSharedMemConfig pConfig=%p", pConfig);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetSharedMemConfig, pConfig);
 }
 
-CUresult cuCtxGetStreamPriorityRange ( int* leastPriority, int* greatestPriority ){
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxGetStreamPriorityRange,leastPriority,greatestPriority);
-    if (res!=CUDA_SUCCESS){
-        LOG_ERROR("cuCtxGetStreamPriorityRange err=%d",res);
+CUresult cuCtxGetStreamPriorityRange(int* leastPriority, int* greatestPriority) {
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetStreamPriorityRange,
+                                      leastPriority, greatestPriority);
+    if (res != CUDA_SUCCESS) {
+        LOG_ERROR("cuCtxGetStreamPriorityRange err=%d", res);
     }
     return res;
 }
 
-CUresult cuCtxPopCurrent_v2 ( CUcontext* pctx ){
-    LOG_INFO("cuCtxPopCurrent pctx=%p",pctx);
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxPopCurrent_v2,pctx);
+CUresult cuCtxPopCurrent_v2(CUcontext* pctx) {
+    LOG_INFO("cuCtxPopCurrent pctx=%p", pctx);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxPopCurrent_v2, pctx);
 }
 
-CUresult cuCtxPushCurrent_v2 ( CUcontext ctx ){
-    LOG_INFO("cuCtxPushCurrent ctx=%p",ctx);
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxPushCurrent_v2,ctx);
+CUresult cuCtxPushCurrent_v2(CUcontext ctx) {
+    LOG_INFO("cuCtxPushCurrent ctx=%p", ctx);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxPushCurrent_v2, ctx);
 }
 
-CUresult cuCtxSetCacheConfig ( CUfunc_cache config ){
-    LOG_DEBUG("cuCtxSetCacheConfig config=%d",config);
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxSetCacheConfig,config);
+CUresult cuCtxSetCacheConfig(CUfunc_cache config) {
+    LOG_DEBUG("cuCtxSetCacheConfig config=%d", config);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxSetCacheConfig, config);
 }
 
-CUresult cuCtxSetCurrent ( CUcontext ctx ){
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxSetCurrent,ctx);
-    if (res!=CUDA_SUCCESS){
-        LOG_ERROR("cuCtxSetCurrent111 failed res=%d ctx=%p",res,ctx);
+CUresult cuCtxSetCurrent(CUcontext ctx) {
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxSetCurrent, ctx);
+    if (res != CUDA_SUCCESS) {
+        LOG_ERROR("cuCtxSetCurrent111 failed res=%d ctx=%p", res, ctx);
     }
     return res;
 }
 
-CUresult cuCtxSetLimit ( CUlimit limit, size_t value ){
+CUresult cuCtxSetLimit(CUlimit limit, size_t value) {
     LOG_DEBUG("cuCtxSetLimit");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxSetLimit,limit,value);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxSetLimit, limit, value);
 }
 
-CUresult cuCtxSetSharedMemConfig ( CUsharedconfig config ){
+CUresult cuCtxSetSharedMemConfig(CUsharedconfig config) {
     LOG_DEBUG("cuCtxSetSharedMemConfig");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxSetSharedMemConfig,config);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxSetSharedMemConfig, config);
 }
 
-CUresult cuCtxSynchronize ( void ){
+CUresult cuCtxSynchronize(void) {
     LOG_DEBUG("INTO CtxSync");
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuCtxSynchronize);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxSynchronize);
     return res;
 }
-
diff --git a/src/cuda/device.c b/src/cuda/device.c
old mode 100755
new mode 100644
index ac1833ab..bd7efad2
--- a/src/cuda/device.c
+++ b/src/cuda/device.c
@@ -1,27 +1,26 @@
+#include "allocator/allocator.h"
 #include "include/libcuda_hook.h"
-#include "multiprocess/multiprocess_memory_limit.h"
-#include "include/nvml_prefix.h"
 #include "include/libnvml_hook.h"
-
-#include "allocator/allocator.h"
 #include "include/memory_limit.h"
+#include "include/nvml_prefix.h"
+#include "multiprocess/multiprocess_memory_limit.h"
 
-CUresult CUDAAPI cuDeviceGetAttribute ( int* pi, CUdevice_attribute attrib, CUdevice dev ) {
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceGetAttribute,pi,attrib,dev);
-    //LOG_DEBUG("[%d]cuDeviceGetAttribute dev=%d attrib=%d %d",res,dev,(int)attrib,*pi);
+CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev) {
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetAttribute, pi, attrib, dev);
+    // LOG_DEBUG("[%d]cuDeviceGetAttribute dev=%d attrib=%d %d",res,dev,(int)attrib,*pi);
     return res;
 }
 
-CUresult cuDeviceGet(CUdevice *device,int ordinal){
-    LOG_DEBUG("into cuDeviceGet ordinal=%d\n",ordinal);
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceGet,device,ordinal);
+CUresult cuDeviceGet(CUdevice *device, int ordinal) {
+    LOG_DEBUG("into cuDeviceGet ordinal=%d\n", ordinal);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGet, device, ordinal);
     return res;
 }
 
-CUresult cuDeviceGetCount( int* count ) {
+CUresult cuDeviceGetCount(int *count) {
     LOG_DEBUG("into cuDeviceGetCount");
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceGetCount,count);
-    LOG_DEBUG("cuDeviceGetCount res=%d count=%d",res,*count);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetCount, count);
+    LOG_DEBUG("cuDeviceGetCount res=%d count=%d", res, *count);
     return res;
 }
 
@@ -31,39 +30,38 @@ CUresult cuDeviceGetName(char *name, int len, CUdevice dev) {
     return res;
 }
 
-CUresult cuDeviceCanAccessPeer( int* canAccessPeer, CUdevice dev, CUdevice peerDev ) {
-    LOG_INFO("into cuDeviceCanAccessPeer %d %d",dev,peerDev);
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceCanAccessPeer,canAccessPeer,dev,peerDev);
+CUresult cuDeviceCanAccessPeer(int *canAccessPeer, CUdevice dev, CUdevice peerDev) {
+    LOG_INFO("into cuDeviceCanAccessPeer %d %d", dev, peerDev);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceCanAccessPeer, canAccessPeer, dev,
+                              peerDev);
 }
 
-CUresult cuDeviceGetP2PAttribute(int *value, CUdevice_P2PAttribute attrib,
-                                 CUdevice srcDevice, CUdevice dstDevice) {
+CUresult cuDeviceGetP2PAttribute(int *value, CUdevice_P2PAttribute attrib, CUdevice srcDevice,
+                                 CUdevice dstDevice) {
     LOG_DEBUG("into cuDeviceGetP2PAttribute\n");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetP2PAttribute, value,
-                         attrib, srcDevice, dstDevice);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetP2PAttribute, value, attrib, srcDevice,
+                              dstDevice);
 }
 
 CUresult cuDeviceGetByPCIBusId(CUdevice *dev, const char *pciBusId) {
-    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetByPCIBusId, dev,
-                         pciBusId);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetByPCIBusId, dev, pciBusId);
 }
 
 CUresult cuDeviceGetPCIBusId(char *pciBusId, int len, CUdevice dev) {
-    LOG_INFO("into cuDeviceGetPCIBusId dev=%d len=%d",dev,len);
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetPCIBusId, pciBusId, len,
-                        dev);
+    LOG_INFO("into cuDeviceGetPCIBusId dev=%d len=%d", dev, len);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetPCIBusId, pciBusId, len, dev);
     return res;
 }
 
 #if CUDA_VERSION < 13000
-CUresult cuDeviceGetUuid(CUuuid* uuid,CUdevice dev) {
-    LOG_DEBUG("into cuDeviceGetUuid dev=%d",dev);
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceGetUuid,uuid,dev);
+CUresult cuDeviceGetUuid(CUuuid *uuid, CUdevice dev) {
+    LOG_DEBUG("into cuDeviceGetUuid dev=%d", dev);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetUuid, uuid, dev);
     return res;
 }
 #endif
 
-CUresult cuDeviceGetUuid_v2(CUuuid* uuid, CUdevice dev) {
+CUresult cuDeviceGetUuid_v2(CUuuid *uuid, CUdevice dev) {
     LOG_DEBUG("into cuDeviceGetUuid_v2 dev=%d", dev);
     CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetUuid_v2, uuid, dev);
     return res;
@@ -71,23 +69,20 @@ CUresult cuDeviceGetUuid_v2(CUuuid* uuid, CUdevice dev) {
 
 CUresult cuDeviceGetDefaultMemPool(CUmemoryPool *pool_out, CUdevice dev) {
     LOG_DEBUG("cuDeviceGetDefaultMemPool");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetDefaultMemPool,
-                         pool_out, dev);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetDefaultMemPool, pool_out, dev);
 }
 
-CUresult cuDeviceGetMemPool(CUmemoryPool *pool, CUdevice dev){
+CUresult cuDeviceGetMemPool(CUmemoryPool *pool, CUdevice dev) {
     LOG_DEBUG("cuDeviceGetMemPool");
     return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetMemPool, pool, dev);
 }
 
-CUresult cuDeviceGetLuid(char *luid, unsigned int *deviceNodeMask,
-                         CUdevice dev) {
-  LOG_DEBUG("cuDeviceGetLuid");
-  return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetLuid, luid,
-                         deviceNodeMask, dev);
+CUresult cuDeviceGetLuid(char *luid, unsigned int *deviceNodeMask, CUdevice dev) {
+    LOG_DEBUG("cuDeviceGetLuid");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetLuid, luid, deviceNodeMask, dev);
 }
 
-CUresult cuDeviceTotalMem_v2 ( size_t* bytes, CUdevice dev ) {
+CUresult cuDeviceTotalMem_v2(size_t *bytes, CUdevice dev) {
     LOG_DEBUG("into cuDeviceTotalMem");
     ENSURE_INITIALIZED();
     size_t limit = get_current_device_memory_limit(dev);
@@ -97,29 +92,32 @@ CUresult cuDeviceTotalMem_v2 ( size_t* bytes, CUdevice dev ) {
 
 CUresult cuDriverGetVersion(int *driverVersion) {
     LOG_DEBUG("into cuDriverGetVersion__");
-    
-    //stub dlsym to prelaod cuda functions
-    dlsym(RTLD_DEFAULT,"cuDriverGetVersion");
 
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDriverGetVersion,driverVersion);
+    // stub dlsym to prelaod cuda functions
+    dlsym(RTLD_DEFAULT, "cuDriverGetVersion");
+
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDriverGetVersion, driverVersion);
     //*driverVersion=11030;
-    if ((res==CUDA_SUCCESS) && (driverVersion!=NULL)) {
-        LOG_INFO("driver version=%d",*driverVersion);
+    if ((res == CUDA_SUCCESS) && (driverVersion != NULL)) {
+        LOG_INFO("driver version=%d", *driverVersion);
     }
     return res;
 }
 
-CUresult cuDeviceGetTexture1DLinearMaxWidth(size_t *maxWidthInElements, CUarray_format format, unsigned numChannels, CUdevice dev){
+CUresult cuDeviceGetTexture1DLinearMaxWidth(size_t *maxWidthInElements, CUarray_format format,
+                                            unsigned numChannels, CUdevice dev) {
     LOG_DEBUG("cuDeviceGetTexture1DLinearMaxWidth");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceGetTexture1DLinearMaxWidth,maxWidthInElements,format,numChannels,dev);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetTexture1DLinearMaxWidth,
+                              maxWidthInElements, format, numChannels, dev);
 }
 
 CUresult cuDeviceSetMemPool(CUdevice dev, CUmemoryPool pool) {
     LOG_DEBUG("cuDeviceSetMemPool");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceSetMemPool,dev,pool);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceSetMemPool, dev, pool);
 }
 
-CUresult cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, CUflushGPUDirectRDMAWritesScope scope) {
-   LOG_DEBUG("cuFlushGPUDirectRDMAWrites");
-   return CUDA_OVERRIDE_CALL(cuda_library_entry,cuFlushGPUDirectRDMAWrites,target,scope);
+CUresult cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target,
+                                    CUflushGPUDirectRDMAWritesScope scope) {
+    LOG_DEBUG("cuFlushGPUDirectRDMAWrites");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuFlushGPUDirectRDMAWrites, target, scope);
 }
diff --git a/src/cuda/event.c b/src/cuda/event.c
index 469c199b..9ca5648b 100644
--- a/src/cuda/event.c
+++ b/src/cuda/event.c
@@ -1,103 +1,113 @@
-#include "include/libcuda_hook.h"
 #include <nvml.h>
 
-CUresult cuEventCreate ( CUevent* phEvent, unsigned int  Flags ){
-    LOG_DEBUG("cuEventCreate Event=%p",phEvent);
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuEventCreate,phEvent,Flags);
+#include "include/libcuda_hook.h"
+
+CUresult cuEventCreate(CUevent* phEvent, unsigned int Flags) {
+    LOG_DEBUG("cuEventCreate Event=%p", phEvent);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuEventCreate, phEvent, Flags);
 }
 
-CUresult cuEventDestroy_v2 ( CUevent hEvent ){
-    LOG_DEBUG("cuEventDestroy_v2 hEvent=%p",hEvent);
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuEventDestroy_v2,hEvent);
+CUresult cuEventDestroy_v2(CUevent hEvent) {
+    LOG_DEBUG("cuEventDestroy_v2 hEvent=%p", hEvent);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuEventDestroy_v2, hEvent);
 }
 
-CUresult cuModuleLoad ( CUmodule* module, const char* fname ){
-    LOG_DEBUG(" cuModuleLoad fname=%s",fname);
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleLoad,module,fname);
+CUresult cuModuleLoad(CUmodule* module, const char* fname) {
+    LOG_DEBUG(" cuModuleLoad fname=%s", fname);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleLoad, module, fname);
 }
 
-CUresult cuModuleLoadData( CUmodule* module, const void* image){
-    LOG_DEBUG("cuModuleLoadData module=%p",module);
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleLoadData,module,image);
+CUresult cuModuleLoadData(CUmodule* module, const void* image) {
+    LOG_DEBUG("cuModuleLoadData module=%p", module);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleLoadData, module, image);
 }
 
-CUresult cuModuleLoadDataEx ( CUmodule* module, const void* image, unsigned int  numOptions, CUjit_option* options, void** optionValues ){
-    LOG_DEBUG("cuModuleLoadDataEx module=%p",module);
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleLoadDataEx,module,image,numOptions,options,optionValues);
+CUresult cuModuleLoadDataEx(CUmodule* module, const void* image, unsigned int numOptions,
+                            CUjit_option* options, void** optionValues) {
+    LOG_DEBUG("cuModuleLoadDataEx module=%p", module);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleLoadDataEx, module, image, numOptions,
+                              options, optionValues);
 }
 
-CUresult cuModuleLoadFatBinary ( CUmodule* module, const void* fatCubin ){
-    LOG_DEBUG("cuModuleLoadFatBinary module=%p",module);
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleLoadFatBinary,module,fatCubin);
+CUresult cuModuleLoadFatBinary(CUmodule* module, const void* fatCubin) {
+    LOG_DEBUG("cuModuleLoadFatBinary module=%p", module);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleLoadFatBinary, module, fatCubin);
 }
 
-CUresult cuModuleGetFunction ( CUfunction* hfunc, CUmodule hmod, const char* name ){
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleGetFunction,hfunc,hmod,name);
+CUresult cuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name) {
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleGetFunction, hfunc, hmod, name);
     return res;
 }
 
 CUresult cuModuleUnload(CUmodule hmod) {
     LOG_DEBUG("cuModuleUnload");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleUnload,hmod);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleUnload, hmod);
 }
 
-CUresult cuModuleGetGlobal_v2(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name) {
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleGetGlobal_v2,dptr,bytes,hmod,name);
+CUresult cuModuleGetGlobal_v2(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name) {
+    CUresult res =
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleGetGlobal_v2, dptr, bytes, hmod, name);
     return res;
 }
 
-CUresult cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const char *name) {
+CUresult cuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name) {
     LOG_INFO("cuModuleGetTexRef");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleGetTexRef,pTexRef,hmod,name);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleGetTexRef, pTexRef, hmod, name);
 }
 
-CUresult cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, const char *name) {
+CUresult cuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name) {
     LOG_INFO("cuModuleGetSurfRef");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuModuleGetSurfRef,pSurfRef,hmod,name);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuModuleGetSurfRef, pSurfRef, hmod, name);
 }
 
-CUresult cuLinkAddData_v2 ( CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned int  numOptions, CUjit_option* options, void** optionValues ) {
+CUresult cuLinkAddData_v2(CUlinkState state, CUjitInputType type, void* data, size_t size,
+                          const char* name, unsigned int numOptions, CUjit_option* options,
+                          void** optionValues) {
     LOG_DEBUG("into cuLinkAddData_v2");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuLinkAddData_v2,state,type,data,size,name,numOptions,options,optionValues);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuLinkAddData_v2, state, type, data, size, name,
+                              numOptions, options, optionValues);
 }
 
-CUresult cuLinkCreate_v2 ( unsigned int  numOptions, CUjit_option* options, void** optionValues, CUlinkState* stateOut ) {
+CUresult cuLinkCreate_v2(unsigned int numOptions, CUjit_option* options, void** optionValues,
+                         CUlinkState* stateOut) {
     LOG_DEBUG("into cuLinkCreate_v2");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuLinkCreate_v2,numOptions,options,optionValues,stateOut);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuLinkCreate_v2, numOptions, options,
+                              optionValues, stateOut);
 }
 
-CUresult cuLinkAddFile_v2(CUlinkState state, CUjitInputType type, const char *path,
-    unsigned int numOptions, CUjit_option *options, void **optionValues) {
+CUresult cuLinkAddFile_v2(CUlinkState state, CUjitInputType type, const char* path,
+                          unsigned int numOptions, CUjit_option* options, void** optionValues) {
     LOG_DEBUG("cuLinkAddFile_v2");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuLinkAddFile_v2,state,type,path,numOptions,options,optionValues);
-    }
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuLinkAddFile_v2, state, type, path, numOptions,
+                              options, optionValues);
+}
 
-CUresult cuLinkComplete(CUlinkState state, void **cubinOut, size_t *sizeOut) {
+CUresult cuLinkComplete(CUlinkState state, void** cubinOut, size_t* sizeOut) {
     LOG_DEBUG("cuLinkComplete");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuLinkComplete,state,cubinOut,sizeOut);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuLinkComplete, state, cubinOut, sizeOut);
 }
 
 CUresult cuLinkDestroy(CUlinkState state) {
     LOG_DEBUG("cuLinkDestroy");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuLinkDestroy,state);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuLinkDestroy, state);
 }
 
-CUresult cuFuncSetCacheConfig ( CUfunction hfunc, CUfunc_cache config ){
+CUresult cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config) {
     LOG_INFO("cuFUncSetCacheConfig");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuFuncSetCacheConfig,hfunc,config);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuFuncSetCacheConfig, hfunc, config);
 }
 
 CUresult cuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config) {
     LOG_INFO("cuFuncSetSharedMemConfig");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuFuncSetSharedMemConfig,hfunc,config);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuFuncSetSharedMemConfig, hfunc, config);
 }
 
-CUresult cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunction hfunc) {
+CUresult cuFuncGetAttribute(int* pi, CUfunction_attribute attrib, CUfunction hfunc) {
     LOG_DEBUG("cuFuncGetAttribute");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuFuncGetAttribute,pi,attrib,hfunc);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuFuncGetAttribute, pi, attrib, hfunc);
 }
 
 CUresult cuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value) {
     LOG_DEBUG("cuFuncSetAttribute");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuFuncSetAttribute,hfunc,attrib,value);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuFuncSetAttribute, hfunc, attrib, value);
 }
diff --git a/src/cuda/graph.c b/src/cuda/graph.c
index 412743fb..04472692 100644
--- a/src/cuda/graph.c
+++ b/src/cuda/graph.c
@@ -1,280 +1,341 @@
 #include "include/libcuda_hook.h"
 
-CUresult cuGraphCreate(CUgraph *phGraph, unsigned int flags){
-	LOG_DEBUG("cuGraphCreate");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphCreate,phGraph,flags);
+CUresult cuGraphCreate(CUgraph *phGraph, unsigned int flags) {
+    LOG_DEBUG("cuGraphCreate");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphCreate, phGraph, flags);
 }
 
-CUresult cuGraphAddKernelNode_v2(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS *nodeParams) {
-	LOG_DEBUG("cuGraphAddKernelNode_v2");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddKernelNode_v2,phGraphNode,hGraph,dependencies,numDependencies,nodeParams);
+CUresult cuGraphAddKernelNode_v2(CUgraphNode *phGraphNode, CUgraph hGraph,
+                                 const CUgraphNode *dependencies, size_t numDependencies,
+                                 const CUDA_KERNEL_NODE_PARAMS *nodeParams) {
+    LOG_DEBUG("cuGraphAddKernelNode_v2");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddKernelNode_v2, phGraphNode, hGraph,
+                              dependencies, numDependencies, nodeParams);
 }
 
 CUresult cuGraphKernelNodeGetParams_v2(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS *nodeParams) {
-	LOG_DEBUG("cuGraphKernelNodeGetParams_v2");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphKernelNodeGetParams_v2,hNode,nodeParams);
+    LOG_DEBUG("cuGraphKernelNodeGetParams_v2");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphKernelNodeGetParams_v2, hNode, nodeParams);
 }
 
-CUresult cuGraphKernelNodeSetParams_v2(CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS *nodeParams) {
-	LOG_DEBUG("cuGraphKernelNodeSetParams_v2");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphKernelNodeSetParams_v2,hNode,nodeParams);
+CUresult cuGraphKernelNodeSetParams_v2(CUgraphNode hNode,
+                                       const CUDA_KERNEL_NODE_PARAMS *nodeParams) {
+    LOG_DEBUG("cuGraphKernelNodeSetParams_v2");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphKernelNodeSetParams_v2, hNode, nodeParams);
 }
 
-CUresult cuGraphAddMemcpyNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_MEMCPY3D *copyParams, CUcontext ctx) {
-	LOG_DEBUG("cuGraphAddMemcpyNode");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddMemcpyNode,phGraphNode,hGraph,dependencies,numDependencies,copyParams,ctx);
+CUresult cuGraphAddMemcpyNode(CUgraphNode *phGraphNode, CUgraph hGraph,
+                              const CUgraphNode *dependencies, size_t numDependencies,
+                              const CUDA_MEMCPY3D *copyParams, CUcontext ctx) {
+    LOG_DEBUG("cuGraphAddMemcpyNode");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddMemcpyNode, phGraphNode, hGraph,
+                              dependencies, numDependencies, copyParams, ctx);
 }
 
 CUresult cuGraphMemcpyNodeGetParams(CUgraphNode hNode, CUDA_MEMCPY3D *nodeParams) {
-	LOG_DEBUG("cuGraphMemcpyNodeGetParams");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphMemcpyNodeGetParams,hNode,nodeParams);
+    LOG_DEBUG("cuGraphMemcpyNodeGetParams");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphMemcpyNodeGetParams, hNode, nodeParams);
 }
 
 CUresult cuGraphMemcpyNodeSetParams(CUgraphNode hNode, const CUDA_MEMCPY3D *nodeParams) {
-	LOG_DEBUG("cuGraphMemcpyNodeSetParams");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphMemcpyNodeSetParams,hNode,nodeParams);
+    LOG_DEBUG("cuGraphMemcpyNodeSetParams");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphMemcpyNodeSetParams, hNode, nodeParams);
 }
 
-CUresult cuGraphAddMemsetNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS *memsetParams, CUcontext ctx) {
-	LOG_DEBUG("cuGraphAddMemsetNode");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddMemsetNode,phGraphNode,hGraph,dependencies,numDependencies,memsetParams,ctx);
+CUresult cuGraphAddMemsetNode(CUgraphNode *phGraphNode, CUgraph hGraph,
+                              const CUgraphNode *dependencies, size_t numDependencies,
+                              const CUDA_MEMSET_NODE_PARAMS *memsetParams, CUcontext ctx) {
+    LOG_DEBUG("cuGraphAddMemsetNode");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddMemsetNode, phGraphNode, hGraph,
+                              dependencies, numDependencies, memsetParams, ctx);
 }
 
 CUresult cuGraphMemsetNodeGetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS *nodeParams) {
-	LOG_DEBUG("cuGraphMemsetNodeGetParams");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphMemsetNodeGetParams,hNode,nodeParams);
+    LOG_DEBUG("cuGraphMemsetNodeGetParams");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphMemsetNodeGetParams, hNode, nodeParams);
 }
 
 CUresult cuGraphMemsetNodeSetParams(CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS *nodeParams) {
-	LOG_DEBUG("cuGraphMemsetNodeSetParams");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphMemsetNodeSetParams,hNode,nodeParams);
+    LOG_DEBUG("cuGraphMemsetNodeSetParams");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphMemsetNodeSetParams, hNode, nodeParams);
 }
 
-CUresult cuGraphAddHostNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_HOST_NODE_PARAMS *nodeParams) {
-	LOG_DEBUG("cuGraphAddHostNode");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddHostNode,phGraphNode,hGraph,dependencies,numDependencies,nodeParams);
+CUresult cuGraphAddHostNode(CUgraphNode *phGraphNode, CUgraph hGraph,
+                            const CUgraphNode *dependencies, size_t numDependencies,
+                            const CUDA_HOST_NODE_PARAMS *nodeParams) {
+    LOG_DEBUG("cuGraphAddHostNode");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddHostNode, phGraphNode, hGraph,
+                              dependencies, numDependencies, nodeParams);
 }
 
 CUresult cuGraphHostNodeGetParams(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS *nodeParams) {
-	LOG_DEBUG("cuGraphHostNodeGetParams");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphHostNodeGetParams,hNode,nodeParams);
+    LOG_DEBUG("cuGraphHostNodeGetParams");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphHostNodeGetParams, hNode, nodeParams);
 }
 
 CUresult cuGraphHostNodeSetParams(CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS *nodeParams) {
-	LOG_DEBUG("cuGraphHostNodeSetParams");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphHostNodeSetParams,hNode,nodeParams);
+    LOG_DEBUG("cuGraphHostNodeSetParams");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphHostNodeSetParams, hNode, nodeParams);
 }
 
-CUresult cuGraphAddChildGraphNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUgraph childGraph) {
-	LOG_DEBUG("cuGraphAddChildGraphNode");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddChildGraphNode,phGraphNode,hGraph,dependencies,numDependencies,childGraph);
+CUresult cuGraphAddChildGraphNode(CUgraphNode *phGraphNode, CUgraph hGraph,
+                                  const CUgraphNode *dependencies, size_t numDependencies,
+                                  CUgraph childGraph) {
+    LOG_DEBUG("cuGraphAddChildGraphNode");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddChildGraphNode, phGraphNode, hGraph,
+                              dependencies, numDependencies, childGraph);
 }
 
 CUresult cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, CUgraph *phGraph) {
-	LOG_DEBUG("cuGraphChildGraphNodeGetGraph");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphChildGraphNodeGetGraph,hNode,phGraph);
+    LOG_DEBUG("cuGraphChildGraphNodeGetGraph");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphChildGraphNodeGetGraph, hNode, phGraph);
 }
 
-CUresult cuGraphAddEmptyNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies) {
-	LOG_DEBUG("cuGraphAddEmptyNode");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddEmptyNode,phGraphNode,hGraph,dependencies,numDependencies);
+CUresult cuGraphAddEmptyNode(CUgraphNode *phGraphNode, CUgraph hGraph,
+                             const CUgraphNode *dependencies, size_t numDependencies) {
+    LOG_DEBUG("cuGraphAddEmptyNode");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddEmptyNode, phGraphNode, hGraph,
+                              dependencies, numDependencies);
 }
 
-CUresult cuGraphAddEventRecordNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUevent event) {
-	LOG_DEBUG("cuGraphAddEventRecordNode");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddEventRecordNode,phGraphNode,hGraph,dependencies,numDependencies,event);
+CUresult cuGraphAddEventRecordNode(CUgraphNode *phGraphNode, CUgraph hGraph,
+                                   const CUgraphNode *dependencies, size_t numDependencies,
+                                   CUevent event) {
+    LOG_DEBUG("cuGraphAddEventRecordNode");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddEventRecordNode, phGraphNode, hGraph,
+                              dependencies, numDependencies, event);
 }
 
 CUresult cuGraphEventRecordNodeGetEvent(CUgraphNode hNode, CUevent *event_out) {
-	LOG_DEBUG("cuGraphEventRecordNodeGetEvent");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphEventRecordNodeGetEvent,hNode,event_out);
+    LOG_DEBUG("cuGraphEventRecordNodeGetEvent");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphEventRecordNodeGetEvent, hNode, event_out);
 }
 
 CUresult cuGraphEventRecordNodeSetEvent(CUgraphNode hNode, CUevent event) {
-	LOG_DEBUG("cuGraphEventRecordNodeSetEvent");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphEventRecordNodeSetEvent,hNode,event);
+    LOG_DEBUG("cuGraphEventRecordNodeSetEvent");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphEventRecordNodeSetEvent, hNode, event);
 }
 
-CUresult cuGraphAddEventWaitNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUevent event) {
-	LOG_DEBUG("cuGraphAddEventWaitNode");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddEventWaitNode,phGraphNode,hGraph,dependencies,numDependencies,event);
+CUresult cuGraphAddEventWaitNode(CUgraphNode *phGraphNode, CUgraph hGraph,
+                                 const CUgraphNode *dependencies, size_t numDependencies,
+                                 CUevent event) {
+    LOG_DEBUG("cuGraphAddEventWaitNode");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddEventWaitNode, phGraphNode, hGraph,
+                              dependencies, numDependencies, event);
 }
 
 CUresult cuGraphEventWaitNodeGetEvent(CUgraphNode hNode, CUevent *event_out) {
-	LOG_DEBUG("cuGraphEventWaitNodeGetEvent");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphEventWaitNodeGetEvent,hNode,event_out);
+    LOG_DEBUG("cuGraphEventWaitNodeGetEvent");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphEventWaitNodeGetEvent, hNode, event_out);
 }
 
 CUresult cuGraphEventWaitNodeSetEvent(CUgraphNode hNode, CUevent event) {
-	LOG_DEBUG("cuGraphEventWaitNodeSetEvent");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphEventWaitNodeSetEvent,hNode,event);
+    LOG_DEBUG("cuGraphEventWaitNodeSetEvent");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphEventWaitNodeSetEvent, hNode, event);
 }
 
-CUresult cuGraphAddExternalSemaphoresSignalNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) {
-	LOG_DEBUG("cuGraphAddExternalSemaphoresSignalNode");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddExternalSemaphoresSignalNode,phGraphNode,hGraph,dependencies,numDependencies,nodeParams);
+CUresult cuGraphAddExternalSemaphoresSignalNode(CUgraphNode *phGraphNode, CUgraph hGraph,
+                                                const CUgraphNode *dependencies,
+                                                size_t numDependencies,
+                                                const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) {
+    LOG_DEBUG("cuGraphAddExternalSemaphoresSignalNode");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddExternalSemaphoresSignalNode,
+                              phGraphNode, hGraph, dependencies, numDependencies, nodeParams);
 }
 
-CUresult cuGraphExternalSemaphoresSignalNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *params_out) {
-	LOG_DEBUG("cuGraphExternalSemaphoresSignalNodeGetParams");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphExternalSemaphoresSignalNodeGetParams,hNode,params_out);
+CUresult cuGraphExternalSemaphoresSignalNodeGetParams(CUgraphNode hNode,
+                                                      CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *params_out) {
+    LOG_DEBUG("cuGraphExternalSemaphoresSignalNodeGetParams");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphExternalSemaphoresSignalNodeGetParams,
+                              hNode, params_out);
 }
 
-CUresult cuGraphExternalSemaphoresSignalNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) {
-	LOG_DEBUG("cuGraphExternalSemaphoresSignalNodeSetParams");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphExternalSemaphoresSignalNodeSetParams,hNode,nodeParams);
+CUresult cuGraphExternalSemaphoresSignalNodeSetParams(
+    CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) {
+    LOG_DEBUG("cuGraphExternalSemaphoresSignalNodeSetParams");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphExternalSemaphoresSignalNodeSetParams,
+                              hNode, nodeParams);
 }
 
-CUresult cuGraphAddExternalSemaphoresWaitNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) {
-	LOG_DEBUG("cuGraphAddExternalSemaphoresWaitNode");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddExternalSemaphoresWaitNode,phGraphNode,hGraph,dependencies,numDependencies,nodeParams);
+CUresult cuGraphAddExternalSemaphoresWaitNode(CUgraphNode *phGraphNode, CUgraph hGraph,
+                                              const CUgraphNode *dependencies,
+                                              size_t numDependencies,
+                                              const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) {
+    LOG_DEBUG("cuGraphAddExternalSemaphoresWaitNode");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddExternalSemaphoresWaitNode, phGraphNode,
+                              hGraph, dependencies, numDependencies, nodeParams);
 }
 
-CUresult cuGraphExternalSemaphoresWaitNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS *params_out) {
-	LOG_DEBUG("cuGraphExternalSemaphoresWaitNodeGetParams");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphExternalSemaphoresWaitNodeGetParams,hNode,params_out);
+CUresult cuGraphExternalSemaphoresWaitNodeGetParams(CUgraphNode hNode,
+                                                    CUDA_EXT_SEM_WAIT_NODE_PARAMS *params_out) {
+    LOG_DEBUG("cuGraphExternalSemaphoresWaitNodeGetParams");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphExternalSemaphoresWaitNodeGetParams, hNode,
+                              params_out);
 }
 
-CUresult cuGraphExternalSemaphoresWaitNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) {
-	LOG_DEBUG("cuGraphExternalSemaphoresWaitNodeSetParams");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphExternalSemaphoresWaitNodeSetParams,hNode,nodeParams);
+CUresult cuGraphExternalSemaphoresWaitNodeSetParams(
+    CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) {
+    LOG_DEBUG("cuGraphExternalSemaphoresWaitNodeSetParams");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphExternalSemaphoresWaitNodeSetParams, hNode,
+                              nodeParams);
 }
 
-CUresult cuGraphExecExternalSemaphoresSignalNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) {
-	LOG_DEBUG("cuGraphExecExternalSemaphoresSignalNodeSetParams");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphExecExternalSemaphoresSignalNodeSetParams,hGraphExec,hNode,nodeParams);
+CUresult cuGraphExecExternalSemaphoresSignalNodeSetParams(
+    CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) {
+    LOG_DEBUG("cuGraphExecExternalSemaphoresSignalNodeSetParams");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphExecExternalSemaphoresSignalNodeSetParams,
+                              hGraphExec, hNode, nodeParams);
 }
 
-CUresult cuGraphExecExternalSemaphoresWaitNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) {
-	LOG_DEBUG("cuGraphExecExternalSemaphoresWaitNodeSetParams");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphExecExternalSemaphoresWaitNodeSetParams,hGraphExec,hNode,nodeParams);
+CUresult cuGraphExecExternalSemaphoresWaitNodeSetParams(
+    CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) {
+    LOG_DEBUG("cuGraphExecExternalSemaphoresWaitNodeSetParams");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphExecExternalSemaphoresWaitNodeSetParams,
+                              hGraphExec, hNode, nodeParams);
 }
 
 CUresult cuGraphClone(CUgraph *phGraphClone, CUgraph originalGraph) {
-	LOG_DEBUG("cuGraphClone");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphClone,phGraphClone,originalGraph);
+    LOG_DEBUG("cuGraphClone");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphClone, phGraphClone, originalGraph);
 }
 
-CUresult cuGraphNodeFindInClone(CUgraphNode *phNode, CUgraphNode hOriginalNode, CUgraph hClonedGraph) {
-	LOG_DEBUG("cuGraphNodeFindInClone");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphNodeFindInClone,phNode,hOriginalNode,hClonedGraph);
+CUresult cuGraphNodeFindInClone(CUgraphNode *phNode, CUgraphNode hOriginalNode,
+                                CUgraph hClonedGraph) {
+    LOG_DEBUG("cuGraphNodeFindInClone");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphNodeFindInClone, phNode, hOriginalNode,
+                              hClonedGraph);
 }
 
 CUresult cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType *type) {
-	LOG_DEBUG("cuGraphNodeGetType");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphNodeGetType,hNode,type);
+    LOG_DEBUG("cuGraphNodeGetType");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphNodeGetType, hNode, type);
 }
 
-CUresult cuGraphGetNodes(CUgraph hGraph, CUgraphNode *nodes, size_t *numNodes){
-	LOG_DEBUG("cuGraphGetNodes");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphGetNodes,hGraph,nodes,numNodes);
+CUresult cuGraphGetNodes(CUgraph hGraph, CUgraphNode *nodes, size_t *numNodes) {
+    LOG_DEBUG("cuGraphGetNodes");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphGetNodes, hGraph, nodes, numNodes);
 }
 
 CUresult cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode *rootNodes, size_t *numRootNodes) {
-	LOG_DEBUG("cuGraphGetRootNodes");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphGetRootNodes,hGraph,rootNodes,numRootNodes);
+    LOG_DEBUG("cuGraphGetRootNodes");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphGetRootNodes, hGraph, rootNodes,
+                              numRootNodes);
 }
 
 #if CUDA_VERSION < 13000
 CUresult cuGraphGetEdges(CUgraph hGraph, CUgraphNode *from, CUgraphNode *to, size_t *numEdges) {
-	LOG_DEBUG("cuGraphGetEdges");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphGetEdges,hGraph,from,to,numEdges);
+    LOG_DEBUG("cuGraphGetEdges");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphGetEdges, hGraph, from, to, numEdges);
 }
 #endif
 
-CUresult cuGraphGetEdges_v2(CUgraph hGraph, CUgraphNode *from, CUgraphNode *to, CUgraphEdgeData *edgeData,
-                            size_t *numEdges) {
+CUresult cuGraphGetEdges_v2(CUgraph hGraph, CUgraphNode *from, CUgraphNode *to,
+                            CUgraphEdgeData *edgeData, size_t *numEdges) {
     LOG_DEBUG("cuGraphGetEdges_v2");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphGetEdges_v2, hGraph, from, to, edgeData, numEdges);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphGetEdges_v2, hGraph, from, to, edgeData,
+                              numEdges);
 }
 
 #if CUDA_VERSION < 13000
-CUresult cuGraphNodeGetDependencies(CUgraphNode hNode, CUgraphNode *dependencies, size_t *numDependencies) {
-	LOG_DEBUG("cuGraphNodeGetDependencies");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphNodeGetDependencies,hNode,dependencies,numDependencies);
+CUresult cuGraphNodeGetDependencies(CUgraphNode hNode, CUgraphNode *dependencies,
+                                    size_t *numDependencies) {
+    LOG_DEBUG("cuGraphNodeGetDependencies");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphNodeGetDependencies, hNode, dependencies,
+                              numDependencies);
 }
 #endif
 
-CUresult cuGraphNodeGetDependencies_v2(CUgraphNode hNode, CUgraphNode *dependencies, CUgraphEdgeData *edgeData,
-                                       size_t *numDependencies) {
+CUresult cuGraphNodeGetDependencies_v2(CUgraphNode hNode, CUgraphNode *dependencies,
+                                       CUgraphEdgeData *edgeData, size_t *numDependencies) {
     LOG_DEBUG("cuGraphNodeGetDependencies_v2");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphNodeGetDependencies_v2, hNode, dependencies, edgeData,
-                              numDependencies);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphNodeGetDependencies_v2, hNode,
+                              dependencies, edgeData, numDependencies);
 }
 
 #if CUDA_VERSION < 13000
-CUresult cuGraphNodeGetDependentNodes(CUgraphNode hNode, CUgraphNode *dependentNodes, size_t *numDependentNodes) {
-	LOG_DEBUG("cuGraphNodeGetDependentNodes");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphNodeGetDependentNodes,hNode,dependentNodes,numDependentNodes);
+CUresult cuGraphNodeGetDependentNodes(CUgraphNode hNode, CUgraphNode *dependentNodes,
+                                      size_t *numDependentNodes) {
+    LOG_DEBUG("cuGraphNodeGetDependentNodes");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphNodeGetDependentNodes, hNode,
+                              dependentNodes, numDependentNodes);
 }
 #endif
 
-CUresult cuGraphNodeGetDependentNodes_v2(CUgraphNode hNode, CUgraphNode *dependentNodes, CUgraphEdgeData *edgeData,
-                                         size_t *numDependentNodes) {
+CUresult cuGraphNodeGetDependentNodes_v2(CUgraphNode hNode, CUgraphNode *dependentNodes,
+                                         CUgraphEdgeData *edgeData, size_t *numDependentNodes) {
     LOG_DEBUG("cuGraphNodeGetDependentNodes_v2");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphNodeGetDependentNodes_v2, hNode, dependentNodes, edgeData,
-                              numDependentNodes);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphNodeGetDependentNodes_v2, hNode,
+                              dependentNodes, edgeData, numDependentNodes);
 }
 
 #if CUDA_VERSION < 13000
-CUresult cuGraphAddDependencies(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to, size_t numDependencies) {
-	LOG_DEBUG("cuGraphAddDependencies");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphAddDependencies,hGraph,from,to,numDependencies);
+CUresult cuGraphAddDependencies(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to,
+                                size_t numDependencies) {
+    LOG_DEBUG("cuGraphAddDependencies");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddDependencies, hGraph, from, to,
+                              numDependencies);
 }
 #endif
 
 CUresult cuGraphAddDependencies_v2(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to,
                                    const CUgraphEdgeData *edgeData, size_t numDependencies) {
     LOG_DEBUG("cuGraphAddDependencies_v2");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddDependencies_v2, hGraph, from, to, edgeData,
-                              numDependencies);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphAddDependencies_v2, hGraph, from, to,
+                              edgeData, numDependencies);
 }
 
 #if CUDA_VERSION < 13000
-CUresult cuGraphRemoveDependencies(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to, size_t numDependencies) {
-	LOG_DEBUG("cuGraphRemoveDependencies");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphRemoveDependencies,hGraph,from,to,numDependencies);
+CUresult cuGraphRemoveDependencies(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to,
+                                   size_t numDependencies) {
+    LOG_DEBUG("cuGraphRemoveDependencies");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphRemoveDependencies, hGraph, from, to,
+                              numDependencies);
 }
 #endif
 
-CUresult cuGraphRemoveDependencies_v2(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to,
-                                      const CUgraphEdgeData *edgeData, size_t numDependencies) {
+CUresult cuGraphRemoveDependencies_v2(CUgraph hGraph, const CUgraphNode *from,
+                                      const CUgraphNode *to, const CUgraphEdgeData *edgeData,
+                                      size_t numDependencies) {
     LOG_DEBUG("cuGraphRemoveDependencies_v2");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphRemoveDependencies_v2, hGraph, from, to, edgeData,
-                              numDependencies);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphRemoveDependencies_v2, hGraph, from, to,
+                              edgeData, numDependencies);
 }
 
 CUresult cuGraphDestroyNode(CUgraphNode hNode) {
-	LOG_DEBUG("cuGraphDestroyNode");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphDestroyNode,hNode);
+    LOG_DEBUG("cuGraphDestroyNode");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphDestroyNode, hNode);
 }
 
-CUresult cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, CUgraphNode *phErrorNode, char *logBuffer, size_t bufferSize) {
-	LOG_DEBUG("cuGraphInstantiate");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphInstantiate,phGraphExec,hGraph,phErrorNode,logBuffer,bufferSize);
+CUresult cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, CUgraphNode *phErrorNode,
+                            char *logBuffer, size_t bufferSize) {
+    LOG_DEBUG("cuGraphInstantiate");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphInstantiate, phGraphExec, hGraph,
+                              phErrorNode, logBuffer, bufferSize);
 }
 
-CUresult cuGraphInstantiateWithFlags(CUgraphExec *phGraphExec, CUgraph hGraph, unsigned long long flags) {
-	LOG_DEBUG("cuGraphInstantiateWithFlags");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphInstantiateWithFlags,phGraphExec,hGraph,flags);
+CUresult cuGraphInstantiateWithFlags(CUgraphExec *phGraphExec, CUgraph hGraph,
+                                     unsigned long long flags) {
+    LOG_DEBUG("cuGraphInstantiateWithFlags");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphInstantiateWithFlags, phGraphExec, hGraph,
+                              flags);
 }
 
 CUresult cuGraphUpload(CUgraphExec hGraphExec, CUstream hStream) {
-	LOG_DEBUG("cuGraphUpload");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphUpload,hGraphExec,hStream);
+    LOG_DEBUG("cuGraphUpload");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphUpload, hGraphExec, hStream);
 }
 
 CUresult cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) {
-	LOG_DEBUG("cuGraphLaunch");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphLaunch,hGraphExec,hStream);
+    LOG_DEBUG("cuGraphLaunch");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphLaunch, hGraphExec, hStream);
 }
 
 CUresult cuGraphExecDestroy(CUgraphExec hGraphExec) {
-	LOG_DEBUG("cuGraphExecDestroy");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphExecDestroy,hGraphExec);
+    LOG_DEBUG("cuGraphExecDestroy");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphExecDestroy, hGraphExec);
 }
 
 CUresult cuGraphDestroy(CUgraph hGraph) {
-	LOG_DEBUG("cuGraphDestroy");
-	return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGraphDestroy,hGraph);
+    LOG_DEBUG("cuGraphDestroy");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGraphDestroy, hGraph);
 }
diff --git a/src/cuda/hook.c b/src/cuda/hook.c
index d2b143a9..3a81e770 100644
--- a/src/cuda/hook.c
+++ b/src/cuda/hook.c
@@ -1,14 +1,14 @@
-#include "include/libcuda_hook.h"
 #include <string.h>
+
+#include "include/libcuda_hook.h"
 #include "include/libvgpu.h"
 #include "include/multi_func_hook.h"
 
-
-typedef void* (*fp_dlsym)(void*, const char*);
+typedef void *(*fp_dlsym)(void *, const char *);
 extern fp_dlsym real_dlsym;
 
 cuda_entry_t cuda_library_entry[] = {
-    /* Init Part    */ 
+    /* Init Part    */
     {.name = "cuInit"},
     /* Device Part */
     {.name = "cuDeviceGetAttribute"},
@@ -236,9 +236,9 @@ cuda_entry_t cuda_library_entry[] = {
 
 int prior_function(char tmp[500]) {
     char *pos = tmp + strlen(tmp) - 3;
-    if (pos[0]=='_' && pos[1]=='v') {
-        if (pos[2]=='2')
-            pos[0]='\0';
+    if (pos[0] == '_' && pos[1] == 'v') {
+        if (pos[2] == '2')
+            pos[0] = '\0';
         else
             pos[2]--;
         return 1;
@@ -254,7 +254,7 @@ void load_cuda_libraries() {
 
     LOG_INFO("Start hijacking");
 
-    snprintf(cuda_filename, FILENAME_MAX - 1, "%s","libcuda.so.1");
+    snprintf(cuda_filename, FILENAME_MAX - 1, "%s", "libcuda.so.1");
     cuda_filename[FILENAME_MAX - 1] = '\0';
 
     table = dlopen(cuda_filename, RTLD_NOW | RTLD_NODELETE);
@@ -263,55 +263,54 @@ void load_cuda_libraries() {
     }
 
     for (i = 0; i < CUDA_ENTRY_END; i++) {
-        LOG_DEBUG("LOADING %s %d",cuda_library_entry[i].name,i);
+        LOG_DEBUG("LOADING %s %d", cuda_library_entry[i].name, i);
         cuda_library_entry[i].fn_ptr = real_dlsym(table, cuda_library_entry[i].name);
         if (!cuda_library_entry[i].fn_ptr) {
-            cuda_library_entry[i].fn_ptr=real_dlsym(RTLD_NEXT,cuda_library_entry[i].name);
-            if (!cuda_library_entry[i].fn_ptr){
-                LOG_INFO("can't find function %s in %s", cuda_library_entry[i].name,cuda_filename);
-                memset(tmpfunc,0,500);
-                strcpy(tmpfunc,cuda_library_entry[i].name);
+            cuda_library_entry[i].fn_ptr = real_dlsym(RTLD_NEXT, cuda_library_entry[i].name);
+            if (!cuda_library_entry[i].fn_ptr) {
+                LOG_INFO("can't find function %s in %s", cuda_library_entry[i].name, cuda_filename);
+                memset(tmpfunc, 0, 500);
+                strcpy(tmpfunc, cuda_library_entry[i].name);
                 while (prior_function(tmpfunc)) {
-                    cuda_library_entry[i].fn_ptr=real_dlsym(RTLD_NEXT,tmpfunc);
+                    cuda_library_entry[i].fn_ptr = real_dlsym(RTLD_NEXT, tmpfunc);
                     if (cuda_library_entry[i].fn_ptr) {
-                        LOG_INFO("found prior function %s",tmpfunc);
+                        LOG_INFO("found prior function %s", tmpfunc);
                         break;
-                    } 
+                    }
                 }
             }
         }
     }
     LOG_INFO("loaded_cuda_libraries");
-    if (cuda_library_entry[0].fn_ptr==NULL){
+    if (cuda_library_entry[0].fn_ptr == NULL) {
         LOG_WARN("is NULL");
     }
     dlclose(table);
 }
 
-
 // find func by cuda version
-const char* get_real_func_name(const char* base_name,int cuda_version) {
-  int i = 0;
-  for (i = 0; i < sizeof(g_func_map)/sizeof(g_func_map[0]); ++i) {
-    CudaFuncMapEntry *entry = &g_func_map[i];
-    // check fun name
-    if (strcmp(entry->func_name, base_name) != 0) continue;
-    // check cuda version
-    if (cuda_version >= entry->min_ver && cuda_version <= entry->max_ver) {
-      return entry->real_name;
+const char *get_real_func_name(const char *base_name, int cuda_version) {
+    int i = 0;
+    for (i = 0; i < sizeof(g_func_map) / sizeof(g_func_map[0]); ++i) {
+        CudaFuncMapEntry *entry = &g_func_map[i];
+        // check fun name
+        if (strcmp(entry->func_name, base_name) != 0) continue;
+        // check cuda version
+        if (cuda_version >= entry->min_ver && cuda_version <= entry->max_ver) {
+            return entry->real_name;
+        }
     }
-  }
-  return NULL; // if not found
+    return NULL;  // if not found
 }
 
-void* find_real_symbols_in_table(const char *symbol) {
-  void *pfn;
-  //this symbol always has suffix like _v2,_v3
-  pfn = __dlsym_hook_section(NULL,symbol);
-  if (pfn!=NULL) {
-    return pfn;
-  }
-  return NULL;
+void *find_real_symbols_in_table(const char *symbol) {
+    void *pfn;
+    // this symbol always has suffix like _v2,_v3
+    pfn = __dlsym_hook_section(NULL, symbol);
+    if (pfn != NULL) {
+        return pfn;
+    }
+    return NULL;
 }
 
 void *find_symbols_in_table(const char *symbol) {
@@ -321,101 +320,110 @@ void *find_symbols_in_table(const char *symbol) {
     if (strncmp(symbol, "cuGraph", 7) == 0) {
         return NULL;
     }
-    strcpy(symbol_v,symbol);
-    strcat(symbol_v,"_v3");
-    pfn = __dlsym_hook_section(NULL,symbol_v);
-    if (pfn!=NULL) {
+    strcpy(symbol_v, symbol);
+    strcat(symbol_v, "_v3");
+    pfn = __dlsym_hook_section(NULL, symbol_v);
+    if (pfn != NULL) {
         return pfn;
     }
-    symbol_v[strlen(symbol_v)-1]='2';
-    pfn = __dlsym_hook_section(NULL,symbol_v);
-    if (pfn!=NULL) {
+    symbol_v[strlen(symbol_v) - 1] = '2';
+    pfn = __dlsym_hook_section(NULL, symbol_v);
+    if (pfn != NULL) {
         return pfn;
     }
-    pfn = __dlsym_hook_section(NULL,symbol);
-    if (pfn!=NULL) {
+    pfn = __dlsym_hook_section(NULL, symbol);
+    if (pfn != NULL) {
         return pfn;
     }
     return NULL;
 }
 
-void *find_symbols_in_table_by_cudaversion(const char *symbol,int  cudaVersion) {
-  void *pfn;
-  const char *real_symbol;
-  real_symbol = get_real_func_name(symbol,cudaVersion);
-  if (real_symbol == NULL) {
-    // if not find in multi func version def, use origin logic
-    pfn = find_symbols_in_table(symbol);
-  } else {
-    pfn = find_real_symbols_in_table(real_symbol);
-  }
-  return pfn;
+void *find_symbols_in_table_by_cudaversion(const char *symbol, int cudaVersion) {
+    void *pfn;
+    const char *real_symbol;
+    real_symbol = get_real_func_name(symbol, cudaVersion);
+    if (real_symbol == NULL) {
+        // if not find in multi func version def, use origin logic
+        pfn = find_symbols_in_table(symbol);
+    } else {
+        pfn = find_real_symbols_in_table(real_symbol);
+    }
+    return pfn;
 }
 
+CUresult (*cuGetProcAddress_real)(const char *symbol, void **pfn, int cudaVersion,
+                                  cuuint64_t flags);
 
-CUresult (*cuGetProcAddress_real) ( const char* symbol, void** pfn, int  cudaVersion, cuuint64_t flags ); 
-
-CUresult _cuGetProcAddress ( const char* symbol, void** pfn, int  cudaVersion, cuuint64_t flags ) {
-    LOG_INFO("into _cuGetProcAddress symbol=%s:%d",symbol,cudaVersion);
+CUresult _cuGetProcAddress(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags) {
+    LOG_INFO("into _cuGetProcAddress symbol=%s:%d", symbol, cudaVersion);
     *pfn = find_symbols_in_table_by_cudaversion(symbol, cudaVersion);
-    if (*pfn==NULL){
-        CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuGetProcAddress,symbol,pfn,cudaVersion,flags);
+    if (*pfn == NULL) {
+        CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuGetProcAddress, symbol, pfn,
+                                          cudaVersion, flags);
         return res;
-    }else{
-        LOG_DEBUG("found symbol %s",symbol);
+    } else {
+        LOG_DEBUG("found symbol %s", symbol);
         return CUDA_SUCCESS;
     }
 }
 
-CUresult cuGetProcAddress ( const char* symbol, void** pfn, int  cudaVersion, cuuint64_t flags ) {
-    LOG_INFO("into cuGetProcAddress symbol=%s:%d",symbol,cudaVersion);
+CUresult cuGetProcAddress(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags) {
+    LOG_INFO("into cuGetProcAddress symbol=%s:%d", symbol, cudaVersion);
     *pfn = find_symbols_in_table_by_cudaversion(symbol, cudaVersion);
-    if (strcmp(symbol,"cuGetProcAddress")==0) {
-        CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuGetProcAddress,symbol,pfn,cudaVersion,flags); 
-        if (res==CUDA_SUCCESS) {
-            cuGetProcAddress_real=*pfn;
-            *pfn=_cuGetProcAddress;
+    if (strcmp(symbol, "cuGetProcAddress") == 0) {
+        CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuGetProcAddress, symbol, pfn,
+                                          cudaVersion, flags);
+        if (res == CUDA_SUCCESS) {
+            cuGetProcAddress_real = *pfn;
+            *pfn = _cuGetProcAddress;
         }
         return res;
     }
-    if (*pfn==NULL){
-        CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuGetProcAddress,symbol,pfn,cudaVersion,flags);
+    if (*pfn == NULL) {
+        CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuGetProcAddress, symbol, pfn,
+                                          cudaVersion, flags);
         return res;
-    }else{
-        LOG_DEBUG("found symbol %s",symbol);
+    } else {
+        LOG_DEBUG("found symbol %s", symbol);
         return CUDA_SUCCESS;
     }
 }
 
-CUresult _cuGetProcAddress_v2(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags, CUdriverProcAddressQueryResult *symbolStatus){
-    LOG_INFO("into _cuGetProcAddress_v2 symbol=%s:%d",symbol,cudaVersion);
+CUresult _cuGetProcAddress_v2(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags,
+                              CUdriverProcAddressQueryResult *symbolStatus) {
+    LOG_INFO("into _cuGetProcAddress_v2 symbol=%s:%d", symbol, cudaVersion);
     *pfn = find_symbols_in_table_by_cudaversion(symbol, cudaVersion);
-    if (*pfn==NULL){
-        CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuGetProcAddress_v2,symbol,pfn,cudaVersion,flags,symbolStatus);
+    if (*pfn == NULL) {
+        CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuGetProcAddress_v2, symbol, pfn,
+                                          cudaVersion, flags, symbolStatus);
         return res;
-    }else{
-        LOG_DEBUG("found symbol %s",symbol);
+    } else {
+        LOG_DEBUG("found symbol %s", symbol);
         return CUDA_SUCCESS;
-    } 
+    }
 }
 
-CUresult cuGetProcAddress_v2(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags, CUdriverProcAddressQueryResult *symbolStatus){
-    LOG_INFO("into cuGetProcAddress_v2 symbol=%s:%d",symbol,cudaVersion);
+CUresult cuGetProcAddress_v2(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags,
+                             CUdriverProcAddressQueryResult *symbolStatus) {
+    LOG_INFO("into cuGetProcAddress_v2 symbol=%s:%d", symbol, cudaVersion);
     *pfn = find_symbols_in_table_by_cudaversion(symbol, cudaVersion);
-    if (strcmp(symbol,"cuGetProcAddress_v2")==0) {
-        CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuGetProcAddress_v2,symbol,pfn,cudaVersion,flags,symbolStatus); 
-        if (res==CUDA_SUCCESS) {
-            cuGetProcAddress_real=*pfn;
-            *pfn=_cuGetProcAddress_v2;
+    if (strcmp(symbol, "cuGetProcAddress_v2") == 0) {
+        CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuGetProcAddress_v2, symbol, pfn,
+                                          cudaVersion, flags, symbolStatus);
+        if (res == CUDA_SUCCESS) {
+            cuGetProcAddress_real = *pfn;
+            *pfn = _cuGetProcAddress_v2;
         }
         return res;
     }
-    if (*pfn==NULL){
-        CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuGetProcAddress_v2,symbol,pfn,cudaVersion,flags,symbolStatus);
+    if (*pfn == NULL) {
+        CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuGetProcAddress_v2, symbol, pfn,
+                                          cudaVersion, flags, symbolStatus);
         return res;
-    }else{
-        LOG_DEBUG("found symbol %s",symbol);
+    } else {
+        LOG_DEBUG("found symbol %s", symbol);
         void *optr;
-        return CUDA_OVERRIDE_CALL(cuda_library_entry,cuGetProcAddress_v2,symbol,&optr,cudaVersion,flags,symbolStatus);
-    } 
+        return CUDA_OVERRIDE_CALL(cuda_library_entry, cuGetProcAddress_v2, symbol, &optr,
+                                  cudaVersion, flags, symbolStatus);
+    }
 }
diff --git a/src/cuda/memory.c b/src/cuda/memory.c
old mode 100755
new mode 100644
index 00857f30..3375de49
--- a/src/cuda/memory.c
+++ b/src/cuda/memory.c
@@ -39,25 +39,25 @@ const size_t cuarray_format_bytes[33] = {
     0,  // 0x1c
     0,  // 0x1d
     0,  // 0x1e
-    0,  // 0x1f       
+    0,  // 0x1f
     4   // CU_AD_FORMAT_FLOAT = 0x20
 };
 
-extern size_t round_up(size_t size,size_t align);
+extern size_t round_up(size_t size, size_t align);
 extern void rate_limiter(int grids, int blocks);
 
 int check_oom() {
-//    return 0;
+    //    return 0;
     CUdevice dev;
     CHECK_DRV_API(cuCtxGetDevice(&dev));
-    return oom_check(dev,0);
+    return oom_check(dev, 0);
 }
 
-uint64_t compute_3d_array_alloc_bytes(const CUDA_ARRAY3D_DESCRIPTOR* desc) {
-    if (desc==NULL) {
+uint64_t compute_3d_array_alloc_bytes(const CUDA_ARRAY3D_DESCRIPTOR *desc) {
+    if (desc == NULL) {
         LOG_WARN("compute_3d_array_alloc_bytes desc is null");
-    }else{
-        LOG_DEBUG("compute_3d_array_alloc_bytes height=%ld width=%ld",desc->Height,desc->Width);
+    } else {
+        LOG_DEBUG("compute_3d_array_alloc_bytes height=%ld width=%ld", desc->Height, desc->Width);
     }
     uint64_t bytes = desc->Width * desc->NumChannels;
     if (desc->Height != 0) {
@@ -73,12 +73,11 @@ uint64_t compute_3d_array_alloc_bytes(const CUDA_ARRAY3D_DESCRIPTOR* desc) {
     return bytes;
 }
 
-
-uint64_t compute_array_alloc_bytes(const CUDA_ARRAY_DESCRIPTOR* desc) {
-    if (desc==NULL) {
+uint64_t compute_array_alloc_bytes(const CUDA_ARRAY_DESCRIPTOR *desc) {
+    if (desc == NULL) {
         LOG_WARN("compute_array_alloc_bytes desc is null");
-    }else{
-        LOG_DEBUG("compute_array_alloc_bytes height=%ld width=%ld",desc->Height,desc->Width);
+    } else {
+        LOG_DEBUG("compute_array_alloc_bytes height=%ld width=%ld", desc->Height, desc->Width);
     }
 
     uint64_t bytes = desc->Width * desc->NumChannels;
@@ -92,97 +91,95 @@ uint64_t compute_array_alloc_bytes(const CUDA_ARRAY_DESCRIPTOR* desc) {
     return bytes;
 }
 
-CUresult cuArray3DCreate_v2(CUarray* arr, const CUDA_ARRAY3D_DESCRIPTOR* desc) {
+CUresult cuArray3DCreate_v2(CUarray *arr, const CUDA_ARRAY3D_DESCRIPTOR *desc) {
     LOG_DEBUG("cuArray3DCreate_v2");
     /*uint64_t bytes*/
     compute_3d_array_alloc_bytes(desc);
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuArray3DCreate_v2, arr, desc);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuArray3DCreate_v2, arr, desc);
     if (res != CUDA_SUCCESS) {
         return res;
     }
     return res;
 }
 
-
-CUresult cuArrayCreate_v2(CUarray* arr, const CUDA_ARRAY_DESCRIPTOR* desc) {
+CUresult cuArrayCreate_v2(CUarray *arr, const CUDA_ARRAY_DESCRIPTOR *desc) {
     LOG_DEBUG("cuArrayCreate_v2");
     /*uint64_t bytes*/
     compute_array_alloc_bytes(desc);
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuArrayCreate_v2, arr, desc);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuArrayCreate_v2, arr, desc);
     if (res != CUDA_SUCCESS) {
         return res;
     }
     return res;
 }
 
-
 CUresult cuArrayDestroy(CUarray arr) {
     CUDA_ARRAY3D_DESCRIPTOR desc;
     LOG_DEBUG("cuArrayDestroy");
     CHECK_DRV_API(cuArray3DGetDescriptor(&desc, arr));
     /*uint64_t bytes*/
     compute_3d_array_alloc_bytes(&desc);
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuArrayDestroy, arr);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuArrayDestroy, arr);
     return res;
 }
 
-CUresult cuMemoryAllocate(CUdeviceptr* dptr, size_t bytesize, void* data) {
+CUresult cuMemoryAllocate(CUdeviceptr *dptr, size_t bytesize, void *data) {
     CUresult res;
-    res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemAlloc_v2,dptr,bytesize);
+    res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAlloc_v2, dptr, bytesize);
     return res;
 }
 
-CUresult cuMemAlloc_v2(CUdeviceptr* dptr, size_t bytesize) {
-    LOG_INFO("into cuMemAllocing_v2 dptr=%p bytesize=%ld",dptr,bytesize);
+CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize) {
+    LOG_INFO("into cuMemAllocing_v2 dptr=%p bytesize=%ld", dptr, bytesize);
     ENSURE_RUNNING();
-    CUresult res = allocate_raw(dptr,bytesize);
-    if (res!=CUDA_SUCCESS)
-        return res;
-    LOG_INFO("res=%d, cuMemAlloc_v2 success dptr=%p bytesize=%lu",0,(void *)*dptr,bytesize);
+    CUresult res = allocate_raw(dptr, bytesize);
+    if (res != CUDA_SUCCESS) return res;
+    LOG_INFO("res=%d, cuMemAlloc_v2 success dptr=%p bytesize=%lu", 0, (void *)*dptr, bytesize);
     return CUDA_SUCCESS;
 }
 
-CUresult cuMemAllocHost_v2(void** hptr, size_t bytesize) {
-    LOG_DEBUG("cuMemAllocHost_v2 hptr=%p bytesize=%ld",hptr,bytesize);
+CUresult cuMemAllocHost_v2(void **hptr, size_t bytesize) {
+    LOG_DEBUG("cuMemAllocHost_v2 hptr=%p bytesize=%ld", hptr, bytesize);
     ENSURE_RUNNING();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemAllocHost_v2, hptr, bytesize);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocHost_v2, hptr, bytesize);
     if (res != CUDA_SUCCESS) {
         return res;
     }
     if (check_oom()) {
-        CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemFreeHost, *hptr);
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemFreeHost, *hptr);
         return CUDA_ERROR_OUT_OF_MEMORY;
     }
     return res;
 }
 
-CUresult cuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned int flags) {
-    LOG_DEBUG("cuMemAllocManaged dptr=%p bytesize=%ld",dptr,bytesize);
+CUresult cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize, unsigned int flags) {
+    LOG_DEBUG("cuMemAllocManaged dptr=%p bytesize=%ld", dptr, bytesize);
     ENSURE_RUNNING();
     CUdevice dev;
     CHECK_DRV_API(cuCtxGetDevice(&dev));
-    if (oom_check(dev,bytesize)){
+    if (oom_check(dev, bytesize)) {
         return CUDA_ERROR_OUT_OF_MEMORY;
     }
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemAllocManaged, dptr, bytesize, flags);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocManaged, dptr, bytesize, flags);
     if (res == CUDA_SUCCESS) {
         add_chunk_only(*dptr, bytesize, dev);
     }
     return res;
 }
 
-CUresult cuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, 
-                                      size_t Height, unsigned int ElementSizeBytes) {
-    LOG_DEBUG("cuMemAllocPitch_v2 dptr=%p (%ld,%ld)",dptr,WidthInBytes,Height);
+CUresult cuMemAllocPitch_v2(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, size_t Height,
+                            unsigned int ElementSizeBytes) {
+    LOG_DEBUG("cuMemAllocPitch_v2 dptr=%p (%ld,%ld)", dptr, WidthInBytes, Height);
     size_t guess_pitch = (((WidthInBytes - 1) / ElementSizeBytes) + 1) * ElementSizeBytes;
     size_t bytesize = guess_pitch * Height;
     ENSURE_RUNNING();
     CUdevice dev;
     CHECK_DRV_API(cuCtxGetDevice(&dev));
-    if (oom_check(dev,bytesize)){
+    if (oom_check(dev, bytesize)) {
         return CUDA_ERROR_OUT_OF_MEMORY;
     }
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemAllocPitch_v2, dptr, pPitch, WidthInBytes, Height, ElementSizeBytes);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocPitch_v2, dptr, pPitch,
+                                      WidthInBytes, Height, ElementSizeBytes);
     if (res == CUDA_SUCCESS) {
         add_chunk_only(*dptr, bytesize, dev);
     }
@@ -190,69 +187,67 @@ CUresult cuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInByt
 }
 
 CUresult cuMemFree_v2(CUdeviceptr dptr) {
-    LOG_DEBUG("cuMemFree_v2 dptr=%llx",dptr);
+    LOG_DEBUG("cuMemFree_v2 dptr=%llx", dptr);
     if (dptr == 0) {  // NULL
         return CUDA_SUCCESS;
     }
     CUresult res = free_raw(dptr);
-    LOG_INFO("after free_raw dptr=%p res=%d",(void *)dptr,res);
+    LOG_INFO("after free_raw dptr=%p res=%d", (void *)dptr, res);
     return res;
 }
 
-
-CUresult cuMemFreeHost(void* hptr) {
+CUresult cuMemFreeHost(void *hptr) {
     /*CUdeviceptr dptr;*/
     /*CHECK_DRV_API(cuMemHostGetDevicePointer(&dptr, hptr, 0));*/
     /*size_t bytesize;*/
     /*CHECK_DRV_API(cuMemGetAddressRange(NULL, &bytesize, dptr));*/
-    LOG_DEBUG("cuMemFreeHost_v2 hptr=%p",hptr);
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemFreeHost, hptr);
+    LOG_DEBUG("cuMemFreeHost_v2 hptr=%p", hptr);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemFreeHost, hptr);
     return res;
 }
 
-CUresult cuMemHostAlloc(void** hptr, size_t bytesize, unsigned int flags) {
-    LOG_DEBUG("cuMemHostAlloc hptr=%p bytesize=%lu",hptr,bytesize);
+CUresult cuMemHostAlloc(void **hptr, size_t bytesize, unsigned int flags) {
+    LOG_DEBUG("cuMemHostAlloc hptr=%p bytesize=%lu", hptr, bytesize);
     ENSURE_RUNNING();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemHostAlloc, hptr, bytesize, flags);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostAlloc, hptr, bytesize, flags);
     if (res != CUDA_SUCCESS) {
         return res;
     }
     if (check_oom()) {
-        CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemFreeHost, *hptr);
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemFreeHost, *hptr);
         *hptr = NULL;
         return CUDA_ERROR_OUT_OF_MEMORY;
     }
     return res;
 }
 
-
-CUresult cuMemHostRegister_v2(void* hptr, size_t bytesize, unsigned int flags) {
+CUresult cuMemHostRegister_v2(void *hptr, size_t bytesize, unsigned int flags) {
     /*int trackable = 1;*/
     /*if (flags != CU_MEMHOSTREGISTER_DEVICEMAP) {*/
     /*    fprintf(stderr, "only CU_MEMHOSTREGISTER_DEVICEMAP can be freed, current=%u\n", flags);*/
     /*    trackable = 0;*/
     /*}*/
     // TODO: process flags properly
-    LOG_DEBUG("cuMemHostRegister_v2 hptr=%p bytesize=%ld",hptr,bytesize);
+    LOG_DEBUG("cuMemHostRegister_v2 hptr=%p bytesize=%ld", hptr, bytesize);
     CUdevice dev;
     cuCtxGetDevice(&dev);
     ENSURE_RUNNING();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemHostRegister_v2, hptr, bytesize, flags);
-    LOG_DEBUG("cuMemHostRegister_v2 returned :%d(%p:%ld)",res,hptr,bytesize);
+    CUresult res =
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostRegister_v2, hptr, bytesize, flags);
+    LOG_DEBUG("cuMemHostRegister_v2 returned :%d(%p:%ld)", res, hptr, bytesize);
     if (res != CUDA_SUCCESS) {
         return res;
     }
     if (check_oom()) {
-        CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemHostUnregister, hptr);
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostUnregister, hptr);
         return CUDA_ERROR_OUT_OF_MEMORY;
     }
-    //LOG_WARN("222:%d(%p:%ld)",res,hptr,bytesize);
+    // LOG_WARN("222:%d(%p:%ld)",res,hptr,bytesize);
     return res;
-    //return CUDA_SUCCESS;
+    // return CUDA_SUCCESS;
 }
 
-
-CUresult cuMemHostUnregister(void* hptr) {
+CUresult cuMemHostUnregister(void *hptr) {
     /*CUdeviceptr dptr;*/
     /*CUresult flag = cuMemHostGetDevicePointer(&dptr, hptr, 0);*/
     /*size_t bytesize = 0;*/
@@ -260,246 +255,274 @@ CUresult cuMemHostUnregister(void* hptr) {
     /*    // only device map registry is trackable*/
     /*    CHECK_DRV_API(cuMemGetAddressRange(NULL, &bytesize, dptr));*/
     /*}*/
-    LOG_DEBUG("cuMemHostUnregister hptr=%p",hptr);
+    LOG_DEBUG("cuMemHostUnregister hptr=%p", hptr);
     ENSURE_RUNNING();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemHostUnregister, hptr);
-    
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostUnregister, hptr);
+
     /*if (flag == CUDA_SUCCESS && bytesize > 0) {*/
     /*    // only device map registry is trackable*/
     /*    DECL_MEMORY_ON_SUCCESS(res, bytesize);*/
     /*}*/
-    //return CUDA_SUCCESS;    
+    // return CUDA_SUCCESS;
     return res;
 }
 
-
-CUresult cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount ){
-    LOG_DEBUG("cuMemcpy dst=%llx src=%llx count=%lu",dst,src,ByteCount);
+CUresult cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount) {
+    LOG_DEBUG("cuMemcpy dst=%llx src=%llx count=%lu", dst, src, ByteCount);
     ENSURE_RUNNING();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpy,dst,src,ByteCount);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpy, dst, src, ByteCount);
     return res;
 }
 
-CUresult cuPointerGetAttribute ( void* data, CUpointer_attribute attribute, CUdeviceptr ptr ){
-    LOG_DEBUG("cuPointGetAttribute data=%p attribute=%d ptr=%llx", data, (int)attribute,ptr);
+CUresult cuPointerGetAttribute(void *data, CUpointer_attribute attribute, CUdeviceptr ptr) {
+    LOG_DEBUG("cuPointGetAttribute data=%p attribute=%d ptr=%llx", data, (int)attribute, ptr);
     ENSURE_RUNNING();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuPointerGetAttribute,data,attribute,ptr);
+    CUresult res =
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuPointerGetAttribute, data, attribute, ptr);
     return res;
 }
 
-CUresult cuPointerGetAttributes ( unsigned int  numAttributes, CUpointer_attribute* attributes, void** data, CUdeviceptr ptr ) {
+CUresult cuPointerGetAttributes(unsigned int numAttributes, CUpointer_attribute *attributes,
+                                void **data, CUdeviceptr ptr) {
     LOG_DEBUG("cuPointGetAttribute data=%p ptr=%llx", data, ptr);
     ENSURE_RUNNING();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuPointerGetAttributes,numAttributes,attributes,data,ptr);
-    int cur=0;
-    for (cur=0;cur<numAttributes;cur++){
-        if (attributes[cur]==CU_POINTER_ATTRIBUTE_MEMORY_TYPE){
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuPointerGetAttributes, numAttributes,
+                                      attributes, data, ptr);
+    int cur = 0;
+    for (cur = 0; cur < numAttributes; cur++) {
+        if (attributes[cur] == CU_POINTER_ATTRIBUTE_MEMORY_TYPE) {
             int j = check_memory_type(ptr);
             //*(int *)(data[cur])=1;
-            LOG_DEBUG("check result = %d %d",j,*(int *)(data[cur]));
-        }else{
-            if (attributes[cur]==CU_POINTER_ATTRIBUTE_IS_MANAGED){
-                *(int *)(data[cur])=0;    
+            LOG_DEBUG("check result = %d %d", j, *(int *)(data[cur]));
+        } else {
+            if (attributes[cur] == CU_POINTER_ATTRIBUTE_IS_MANAGED) {
+                *(int *)(data[cur]) = 0;
             }
         }
     }
     return res;
 }
 
-CUresult cuPointerSetAttribute ( const void* value, CUpointer_attribute attribute, CUdeviceptr ptr ){
+CUresult cuPointerSetAttribute(const void *value, CUpointer_attribute attribute, CUdeviceptr ptr) {
     LOG_DEBUG("cuPointSetAttribute value=%p attribute=%d ptr=%llx", value, (int)attribute, ptr);
     ENSURE_RUNNING();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuPointerSetAttribute,value,attribute,ptr);
+    CUresult res =
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuPointerSetAttribute, value, attribute, ptr);
     return res;
 }
 
-
-CUresult cuIpcCloseMemHandle(CUdeviceptr dptr){
-    LOG_DEBUG("cuIpcCloseMemHandle dptr=%llx",dptr);
+CUresult cuIpcCloseMemHandle(CUdeviceptr dptr) {
+    LOG_DEBUG("cuIpcCloseMemHandle dptr=%llx", dptr);
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuIpcCloseMemHandle,dptr);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuIpcCloseMemHandle, dptr);
 }
 
-CUresult cuIpcGetMemHandle(CUipcMemHandle* pHandle, CUdeviceptr dptr) {
+CUresult cuIpcGetMemHandle(CUipcMemHandle *pHandle, CUdeviceptr dptr) {
     LOG_MSG("cuIpcGetMemHandle dptr=%llx", dptr);
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuIpcGetMemHandle,pHandle,dptr);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuIpcGetMemHandle, pHandle, dptr);
 }
 
-CUresult cuIpcOpenMemHandle_v2 ( CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned int  Flags ){
-    LOG_DEBUG("cuIpcGetMemHandle dptr=%p",pdptr);
+CUresult cuIpcOpenMemHandle_v2(CUdeviceptr *pdptr, CUipcMemHandle handle, unsigned int Flags) {
+    LOG_DEBUG("cuIpcGetMemHandle dptr=%p", pdptr);
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuIpcOpenMemHandle_v2,pdptr,handle,Flags);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuIpcOpenMemHandle_v2, pdptr, handle, Flags);
 }
 
-
-CUresult cuMemGetAddressRange_v2( CUdeviceptr* pbase, size_t* psize, CUdeviceptr dptr ){
-    //TODO: Translate back
-    LOG_DEBUG("cuMemGetAddressRange_v2,dptr=%llx",dptr);
+CUresult cuMemGetAddressRange_v2(CUdeviceptr *pbase, size_t *psize, CUdeviceptr dptr) {
+    // TODO: Translate back
+    LOG_DEBUG("cuMemGetAddressRange_v2,dptr=%llx", dptr);
     ENSURE_RUNNING();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemGetAddressRange_v2,pbase,psize,dptr);
+    CUresult res =
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemGetAddressRange_v2, pbase, psize, dptr);
     return res;
 }
 
-CUresult cuMemcpyAsync ( CUdeviceptr dst, CUdeviceptr src, size_t ByteCount, CUstream hStream ){
-    LOG_DEBUG("cuMemcpyAsync,dst=%llx src=%llx count=%lu",dst,src,ByteCount);
+CUresult cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount, CUstream hStream) {
+    LOG_DEBUG("cuMemcpyAsync,dst=%llx src=%llx count=%lu", dst, src, ByteCount);
     ENSURE_RUNNING();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpyAsync,dst,src,ByteCount,hStream);
-    return res; 
+    CUresult res =
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpyAsync, dst, src, ByteCount, hStream);
+    return res;
 }
 
-CUresult cuMemcpyAtoD_v2( CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount ){
-    LOG_DEBUG("cuMemcpyAtoD_v2,dst=%llx count=%lu",dstDevice,ByteCount);
+CUresult cuMemcpyAtoD_v2(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset,
+                         size_t ByteCount) {
+    LOG_DEBUG("cuMemcpyAtoD_v2,dst=%llx count=%lu", dstDevice, ByteCount);
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpyAtoD_v2,dstDevice,srcArray,srcOffset,ByteCount);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpyAtoD_v2, dstDevice, srcArray, srcOffset,
+                              ByteCount);
 }
 
-CUresult cuMemcpyDtoA_v2 ( CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount ){
-    LOG_DEBUG("cuMemcpyDtoA_v2,src=%llx count=%lu",srcDevice,ByteCount);
+CUresult cuMemcpyDtoA_v2(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice,
+                         size_t ByteCount) {
+    LOG_DEBUG("cuMemcpyDtoA_v2,src=%llx count=%lu", srcDevice, ByteCount);
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpyDtoA_v2,dstArray,dstOffset,srcDevice,ByteCount);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpyDtoA_v2, dstArray, dstOffset, srcDevice,
+                              ByteCount);
 }
 
-CUresult cuMemcpyDtoD_v2 ( CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount ){
-    LOG_DEBUG("cuMemcpyDtoD_v2,dst=%llx src=%llx count=%lu",dstDevice,srcDevice,ByteCount);
+CUresult cuMemcpyDtoD_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount) {
+    LOG_DEBUG("cuMemcpyDtoD_v2,dst=%llx src=%llx count=%lu", dstDevice, srcDevice, ByteCount);
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpyDtoD_v2,dstDevice,srcDevice,ByteCount);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpyDtoD_v2, dstDevice, srcDevice, ByteCount);
 }
 
-CUresult cuMemcpyDtoDAsync_v2( CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream ){
-    LOG_DEBUG("cuMemcpyDtoDAsync_v2,dst=%llx src=%llx count=%lu",dstDevice,srcDevice,ByteCount);
+CUresult cuMemcpyDtoDAsync_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount,
+                              CUstream hStream) {
+    LOG_DEBUG("cuMemcpyDtoDAsync_v2,dst=%llx src=%llx count=%lu", dstDevice, srcDevice, ByteCount);
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpyDtoDAsync_v2,dstDevice,srcDevice,ByteCount,hStream);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpyDtoDAsync_v2, dstDevice, srcDevice,
+                              ByteCount, hStream);
 }
 
-CUresult cuMemcpyDtoH_v2(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount) {
+CUresult cuMemcpyDtoH_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount) {
     // TODO: compute bytesize
-    LOG_DEBUG("cuMemcpyDtoH_v2,dst=%p src=%llx count=%lu",dstHost,srcDevice,ByteCount);
+    LOG_DEBUG("cuMemcpyDtoH_v2,dst=%p src=%llx count=%lu", dstHost, srcDevice, ByteCount);
     ENSURE_RUNNING();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpyDtoH_v2, dstHost, srcDevice, ByteCount);
+    CUresult res =
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpyDtoH_v2, dstHost, srcDevice, ByteCount);
     return res;
 }
 
-CUresult cuMemcpyDtoHAsync_v2 ( void* dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream ){
-    LOG_DEBUG("cuMemcpyDtoHAsync_v2,dst=%p src=%llx count=%lu",dstHost,srcDevice,ByteCount);
+CUresult cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount,
+                              CUstream hStream) {
+    LOG_DEBUG("cuMemcpyDtoHAsync_v2,dst=%p src=%llx count=%lu", dstHost, srcDevice, ByteCount);
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpyDtoHAsync_v2,dstHost,srcDevice,ByteCount,hStream); 
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpyDtoHAsync_v2, dstHost, srcDevice,
+                              ByteCount, hStream);
 }
 
-
-CUresult cuMemcpyHtoD_v2(CUdeviceptr srcDevice, const void* dstHost, size_t ByteCount) {
+CUresult cuMemcpyHtoD_v2(CUdeviceptr srcDevice, const void *dstHost, size_t ByteCount) {
     // TODO: compute bytesize
-    LOG_DEBUG("cuMemcpyHtoD_v2,srcDevice=%llx dstHost=%p count=%lu",srcDevice,dstHost,ByteCount);
+    LOG_DEBUG("cuMemcpyHtoD_v2,srcDevice=%llx dstHost=%p count=%lu", srcDevice, dstHost, ByteCount);
     ENSURE_RUNNING();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpyHtoD_v2, srcDevice, dstHost, ByteCount);
+    CUresult res =
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpyHtoD_v2, srcDevice, dstHost, ByteCount);
     return res;
 }
 
-CUresult cuMemcpyHtoDAsync_v2( CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount, CUstream hStream ){
-    LOG_DEBUG("cuMemcpyHtoDAsync_v2,dst=%llx src=%p count=%lu",dstDevice,srcHost,ByteCount);
+CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount,
+                              CUstream hStream) {
+    LOG_DEBUG("cuMemcpyHtoDAsync_v2,dst=%llx src=%p count=%lu", dstDevice, srcHost, ByteCount);
     ENSURE_RUNNING();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpyHtoDAsync_v2,dstDevice,srcHost,ByteCount,hStream);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpyHtoDAsync_v2, dstDevice, srcHost,
+                                      ByteCount, hStream);
     return res;
 }
 
-
-CUresult cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount) {
-    LOG_DEBUG("cuMemcpyPeer dstDevice=%llx srcDevice=%llx",dstDevice,srcDevice);
+CUresult cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice,
+                      CUcontext srcContext, size_t ByteCount) {
+    LOG_DEBUG("cuMemcpyPeer dstDevice=%llx srcDevice=%llx", dstDevice, srcDevice);
     ENSURE_RUNNING();
-    CUresult res=CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpyPeer,dstDevice,dstContext,srcDevice,srcContext,ByteCount);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpyPeer, dstDevice, dstContext,
+                                      srcDevice, srcContext, ByteCount);
     return res;
 }
 
-CUresult cuMemcpyPeerAsync ( CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount, CUstream hStream){
-    LOG_DEBUG("into cuMemcpyPeerAsync dstDevice=%llx srcDevice=%llx",dstDevice,srcDevice);
+CUresult cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice,
+                           CUcontext srcContext, size_t ByteCount, CUstream hStream) {
+    LOG_DEBUG("into cuMemcpyPeerAsync dstDevice=%llx srcDevice=%llx", dstDevice, srcDevice);
     ENSURE_RUNNING();
-    CUresult res=CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpyPeerAsync,dstDevice,dstContext,srcDevice,srcContext,ByteCount,hStream);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpyPeerAsync, dstDevice, dstContext,
+                                      srcDevice, srcContext, ByteCount, hStream);
     return res;
 }
 
-CUresult cuMemsetD16_v2 ( CUdeviceptr dstDevice, unsigned short us, size_t N ){
+CUresult cuMemsetD16_v2(CUdeviceptr dstDevice, unsigned short us, size_t N) {
     ENSURE_RUNNING();
-    CUresult res=CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemsetD16_v2,dstDevice,us,N);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemsetD16_v2, dstDevice, us, N);
     return res;
 }
 
-CUresult cuMemsetD16Async ( CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream ){
+CUresult cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream) {
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemsetD16Async,dstDevice,us,N,hStream);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemsetD16Async, dstDevice, us, N, hStream);
 }
 
-CUresult cuMemsetD2D16_v2 ( CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height ){
+CUresult cuMemsetD2D16_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width,
+                          size_t Height) {
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemsetD2D16_v2,dstDevice,dstPitch,us,Width,Height);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemsetD2D16_v2, dstDevice, dstPitch, us, Width,
+                              Height);
 }
 
-CUresult cuMemsetD2D16Async (CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream ){
+CUresult cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width,
+                            size_t Height, CUstream hStream) {
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemsetD2D16Async,dstDevice,dstPitch,us,Width,Height,hStream);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemsetD2D16Async, dstDevice, dstPitch, us,
+                              Width, Height, hStream);
 }
 
-CUresult cuMemsetD2D32_v2 ( CUdeviceptr dstDevice, size_t dstPitch, unsigned int  ui, size_t Width, size_t Height ){
+CUresult cuMemsetD2D32_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width,
+                          size_t Height) {
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemsetD2D32_v2,dstDevice,dstPitch,ui,Width,Height);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemsetD2D32_v2, dstDevice, dstPitch, ui, Width,
+                              Height);
 }
 
-
-CUresult cuMemsetD2D32Async ( CUdeviceptr dstDevice, size_t dstPitch, unsigned int  ui, size_t Width, size_t Height, CUstream hStream ){
+CUresult cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width,
+                            size_t Height, CUstream hStream) {
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemsetD2D32Async,dstDevice,dstPitch,ui,Width,Height,hStream);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemsetD2D32Async, dstDevice, dstPitch, ui,
+                              Width, Height, hStream);
 }
 
-CUresult cuMemsetD2D8_v2 ( CUdeviceptr dstDevice, size_t dstPitch, unsigned char  uc, size_t Width, size_t Height ){
+CUresult cuMemsetD2D8_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width,
+                         size_t Height) {
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemsetD2D8_v2,dstDevice,dstPitch,uc,Width,Height);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemsetD2D8_v2, dstDevice, dstPitch, uc, Width,
+                              Height);
 }
 
-CUresult cuMemsetD2D8Async ( CUdeviceptr dstDevice, size_t dstPitch, unsigned char  uc, size_t Width, size_t Height, CUstream hStream ){
+CUresult cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width,
+                           size_t Height, CUstream hStream) {
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemsetD2D8Async,dstDevice,dstPitch,uc,Width,Height,hStream);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemsetD2D8Async, dstDevice, dstPitch, uc, Width,
+                              Height, hStream);
 }
 
-CUresult cuMemsetD32_v2 ( CUdeviceptr dstDevice, unsigned int  ui, size_t N ){
+CUresult cuMemsetD32_v2(CUdeviceptr dstDevice, unsigned int ui, size_t N) {
     ENSURE_RUNNING();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemsetD32_v2,dstDevice,ui,N);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemsetD32_v2, dstDevice, ui, N);
     return res;
 }
 
-CUresult cuMemsetD32Async ( CUdeviceptr dstDevice, unsigned int  ui, size_t N, CUstream hStream ){
+CUresult cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream) {
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemsetD32Async,dstDevice,ui,N,hStream);
-}   
-
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemsetD32Async, dstDevice, ui, N, hStream);
+}
 
-CUresult cuMemsetD8_v2 ( CUdeviceptr dstDevice, unsigned char  uc, size_t N ){
+CUresult cuMemsetD8_v2(CUdeviceptr dstDevice, unsigned char uc, size_t N) {
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemsetD8_v2,dstDevice,uc,N);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemsetD8_v2, dstDevice, uc, N);
 }
 
-CUresult cuMemsetD8Async ( CUdeviceptr dstDevice, unsigned char  uc, size_t N, CUstream hStream ){
+CUresult cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream) {
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemsetD8Async,dstDevice,uc,N,hStream);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemsetD8Async, dstDevice, uc, N, hStream);
 }
 
 #if CUDA_VERSION < 13000
-CUresult cuMemAdvise( CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUdevice device ){
-    LOG_DEBUG("cuMemAdvise devPtr=%llx count=%lx",devPtr,count);
+CUresult cuMemAdvise(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUdevice device) {
+    LOG_DEBUG("cuMemAdvise devPtr=%llx count=%lx", devPtr, count);
     ENSURE_RUNNING();
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemAdvise,devPtr,count,advice,device);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAdvise, devPtr, count, advice, device);
 }
 #endif
 
 /* On CUDA 13+, cuda.h #defines cuMemAdvise as cuMemAdvise_v2 with a new CUmemLocation
    parameter. The old CUdevice wrapper above would conflict, so it is excluded.
    On CUDA 12 and below, both symbols exist separately and this is the explicit v2 wrapper. */
-CUresult cuMemAdvise_v2(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUmemLocation location) {
+CUresult cuMemAdvise_v2(CUdeviceptr devPtr, size_t count, CUmem_advise advice,
+                        CUmemLocation location) {
     LOG_DEBUG("cuMemAdvise_v2 devPtr=%llx count=%lx", devPtr, count);
     ENSURE_RUNNING();
     return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAdvise_v2, devPtr, count, advice, location);
 }
 
 #ifdef HOOK_MEMINFO_ENABLE
-CUresult cuMemGetInfo_v2(size_t* free, size_t* total) {
+CUresult cuMemGetInfo_v2(size_t *free, size_t *total) {
     CUdevice dev;
     LOG_DEBUG("cuMemGetInfo_v2");
     ENSURE_INITIALIZED();
@@ -507,7 +530,7 @@ CUresult cuMemGetInfo_v2(size_t* free, size_t* total) {
     size_t usage = get_current_device_memory_usage(cuda_to_nvml_map(dev));
     size_t limit = get_current_device_memory_limit(cuda_to_nvml_map(dev));
     if (limit == 0) {
-        CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemGetInfo_v2, free, total);
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemGetInfo_v2, free, total);
         LOG_INFO("orig free=%ld total=%ld", *free, *total);
         *free = *total - usage;
         LOG_INFO("after free=%ld total=%ld", *free, *total);
@@ -516,31 +539,30 @@ CUresult cuMemGetInfo_v2(size_t* free, size_t* total) {
         LOG_WARN("limit < usage; usage=%ld, limit=%ld", usage, limit);
         return CUDA_ERROR_INVALID_VALUE;
     } else {
-        CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemGetInfo_v2, free, total);
-        LOG_INFO("orig free=%ld total=%ld limit=%ld usage=%ld",
-            *free, *total, limit, usage);
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemGetInfo_v2, free, total);
+        LOG_INFO("orig free=%ld total=%ld limit=%ld usage=%ld", *free, *total, limit, usage);
         // Ensure total memory does not exceed the physical or imposed limit.
         size_t actual_limit = (limit > *total) ? *total : limit;
         *free = (actual_limit > usage) ? (actual_limit - usage) : 0;
         *total = actual_limit;
-        LOG_INFO("after free=%ld total=%ld limit=%ld usage=%ld",
-            *free, *total, limit, usage);
+        LOG_INFO("after free=%ld total=%ld limit=%ld usage=%ld", *free, *total, limit, usage);
         return CUDA_SUCCESS;
     }
 }
 #endif
 
-CUresult cuMipmappedArrayCreate(CUmipmappedArray* pHandle, 
-                                          const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, 
-                                          unsigned int numMipmapLevels) {
+CUresult cuMipmappedArrayCreate(CUmipmappedArray *pHandle,
+                                const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc,
+                                unsigned int numMipmapLevels) {
     // TODO: compute bytesize
     LOG_DEBUG("cuMipmappedArrayCreate\n");
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMipmappedArrayCreate, pHandle, pMipmappedArrayDesc, numMipmapLevels);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMipmappedArrayCreate, pHandle,
+                                      pMipmappedArrayDesc, numMipmapLevels);
     if (res != CUDA_SUCCESS) {
         return res;
     }
     if (check_oom()) {
-        CUDA_OVERRIDE_CALL(cuda_library_entry,cuMipmappedArrayDestroy, *pHandle);
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuMipmappedArrayDestroy, *pHandle);
         return CUDA_ERROR_OUT_OF_MEMORY;
     }
     return res;
@@ -549,53 +571,66 @@ CUresult cuMipmappedArrayCreate(CUmipmappedArray* pHandle,
 CUresult cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) {
     // TODO: compute bytesize
     LOG_DEBUG("cuMipmappedArrayDestroy\n");
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMipmappedArrayDestroy, hMipmappedArray);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMipmappedArrayDestroy, hMipmappedArray);
     return res;
 }
 
-CUresult cuLaunchKernel ( CUfunction f, unsigned int  gridDimX, unsigned int  gridDimY, unsigned int  gridDimZ, unsigned int  blockDimX, unsigned int  blockDimY, unsigned int  blockDimZ, unsigned int  sharedMemBytes, CUstream hStream, void** kernelParams, void** extra ){
+CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY,
+                        unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY,
+                        unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream,
+                        void **kernelParams, void **extra) {
     ENSURE_RUNNING();
     pre_launch_kernel();
-    if (pidfound==1){ 
-        rate_limiter(gridDimX * gridDimY * gridDimZ,
-                   blockDimX * blockDimY * blockDimZ);
+    if (pidfound == 1) {
+        rate_limiter(gridDimX * gridDimY * gridDimZ, blockDimX * blockDimY * blockDimZ);
     }
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuLaunchKernel,f,gridDimX,gridDimY,gridDimZ,blockDimX,blockDimY,blockDimZ,sharedMemBytes,hStream,kernelParams,extra);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuLaunchKernel, f, gridDimX, gridDimY,
+                                      gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes,
+                                      hStream, kernelParams, extra);
     return res;
 }
 
-CUresult cuLaunchKernelEx(const CUlaunchConfig *config, CUfunction f, void **kernelParams, void **extra) {
+CUresult cuLaunchKernelEx(const CUlaunchConfig *config, CUfunction f, void **kernelParams,
+                          void **extra) {
     ENSURE_RUNNING();
     pre_launch_kernel();
-    if (pidfound==1){
+    if (pidfound == 1) {
         rate_limiter(config->gridDimX * config->gridDimY * config->gridDimZ,
-                   config->blockDimX * config->blockDimY * config->blockDimZ);
+                     config->blockDimX * config->blockDimY * config->blockDimZ);
     }
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuLaunchKernelEx,config,f,kernelParams,extra);
+    CUresult res =
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuLaunchKernelEx, config, f, kernelParams, extra);
     return res;
 }
 
-CUresult cuLaunchCooperativeKernel ( CUfunction f, unsigned int  gridDimX, unsigned int  gridDimY, unsigned int  gridDimZ, unsigned int  blockDimX, unsigned int  blockDimY, unsigned int  blockDimZ, unsigned int  sharedMemBytes, CUstream hStream, void** kernelParams ){
+CUresult cuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY,
+                                   unsigned int gridDimZ, unsigned int blockDimX,
+                                   unsigned int blockDimY, unsigned int blockDimZ,
+                                   unsigned int sharedMemBytes, CUstream hStream,
+                                   void **kernelParams) {
     ENSURE_RUNNING();
     pre_launch_kernel();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuLaunchCooperativeKernel,f,gridDimX,gridDimY,gridDimZ,blockDimX,blockDimY,blockDimZ,sharedMemBytes,hStream,kernelParams);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuLaunchCooperativeKernel, f, gridDimX,
+                                      gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ,
+                                      sharedMemBytes, hStream, kernelParams);
     return res;
 }
 
 CUresult cuMemoryFree(CUdeviceptr dptr) {
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemFree_v2,dptr);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemFree_v2, dptr);
     return res;
 }
 
-CUresult cuMemAddressReserve(CUdeviceptr* ptr, size_t size,
-    size_t alignment, CUdeviceptr addr, unsigned long long flags ) {
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,
-        cuMemAddressReserve, ptr, size, alignment, addr, flags);
+CUresult cuMemAddressReserve(CUdeviceptr *ptr, size_t size, size_t alignment, CUdeviceptr addr,
+                             unsigned long long flags) {
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAddressReserve, ptr, size, alignment,
+                                      addr, flags);
     LOG_INFO("cuMemAddressReserve:%lx %llx", size, *ptr);
     return res;
 }
 
-CUresult cuMemCreate ( CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags ) {
+CUresult cuMemCreate(CUmemGenericAllocationHandle *handle, size_t size,
+                     const CUmemAllocationProp *prop, unsigned long long flags) {
     LOG_INFO("cuMemCreate:%lld:%d", size, prop->location.id);
     ENSURE_RUNNING();
     CUdevice dev;
@@ -606,8 +641,7 @@ CUresult cuMemCreate ( CUmemGenericAllocationHandle* handle, size_t size, const
     if (do_oom_check && oom_check(dev, size)) {
         return CUDA_ERROR_OUT_OF_MEMORY;
     }
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,
-        cuMemCreate, handle, size, prop, flags);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemCreate, handle, size, prop, flags);
     if (do_oom_check && res == CUDA_SUCCESS) {
         add_chunk_only(*handle, size, dev);
     }
@@ -623,109 +657,117 @@ CUresult cuMemRelease(CUmemGenericAllocationHandle handle) {
     return res;
 }
 
-CUresult cuMemMap( CUdeviceptr ptr, size_t size, size_t offset, CUmemGenericAllocationHandle handle, unsigned long long flags ) {
+CUresult cuMemMap(CUdeviceptr ptr, size_t size, size_t offset, CUmemGenericAllocationHandle handle,
+                  unsigned long long flags) {
     LOG_INFO("cuMemMap:%lld(%llx,%llx)", size, ptr, offset);
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemMap,ptr,size,offset,handle,flags);
+    CUresult res =
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemMap, ptr, size, offset, handle, flags);
     return res;
 }
 
-CUresult cuMemImportFromShareableHandle(CUmemGenericAllocationHandle* handle,
-    void* osHandle, CUmemAllocationHandleType shHandleType) {
+CUresult cuMemImportFromShareableHandle(CUmemGenericAllocationHandle *handle, void *osHandle,
+                                        CUmemAllocationHandleType shHandleType) {
     LOG_INFO("cuMemImportFromSharableHandle");
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,
-        cuMemImportFromShareableHandle, handle, osHandle, shHandleType);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemImportFromShareableHandle, handle,
+                                      osHandle, shHandleType);
     return res;
 }
 
 CUresult cuMemAllocAsync(CUdeviceptr *dptr, size_t bytesize, CUstream hStream) {
-    LOG_DEBUG("cuMemAllocAsync:%ld",bytesize);
-    return allocate_async_raw(dptr,bytesize,hStream);
+    LOG_DEBUG("cuMemAllocAsync:%ld", bytesize);
+    return allocate_async_raw(dptr, bytesize, hStream);
 }
 
 CUresult cuMemFreeAsync(CUdeviceptr dptr, CUstream hStream) {
-    LOG_DEBUG("cuMemFreeAsync dptr=%llx",dptr);
+    LOG_DEBUG("cuMemFreeAsync dptr=%llx", dptr);
     if (dptr == 0) {  // NULL
         return CUDA_SUCCESS;
     }
-    CUresult res = free_raw_async(dptr,hStream);
-    //CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemFreeAsync,dptr,hStream); 
-    LOG_DEBUG("after free_raw_async dptr=%p res=%d",(void *)dptr,res);
+    CUresult res = free_raw_async(dptr, hStream);
+    // CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemFreeAsync,dptr,hStream);
+    LOG_DEBUG("after free_raw_async dptr=%p res=%d", (void *)dptr, res);
     return res;
 }
 
-CUresult cuMemHostGetDevicePointer_v2(CUdeviceptr *pdptr, void *p, unsigned int Flags){
+CUresult cuMemHostGetDevicePointer_v2(CUdeviceptr *pdptr, void *p, unsigned int Flags) {
     LOG_INFO("cuMemHostGetDevicePointer");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemHostGetDevicePointer_v2,pdptr,p,Flags);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostGetDevicePointer_v2, pdptr, p, Flags);
 }
 
-CUresult cuMemHostGetFlags(unsigned int *pFlags, void *p){
+CUresult cuMemHostGetFlags(unsigned int *pFlags, void *p) {
     LOG_INFO("cuMemHostGetFlags");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemHostGetFlags,pFlags,p);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostGetFlags, pFlags, p);
 }
 
-CUresult cuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep){
+CUresult cuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep) {
     LOG_DEBUG("cuMemPoolTrimTo");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolTrimTo,pool,minBytesToKeep);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolTrimTo, pool, minBytesToKeep);
 }
 
 CUresult cuMemPoolSetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void *value) {
     LOG_DEBUG("cuMemPoolSetAttribute");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolSetAttribute,pool,attr,value);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolSetAttribute, pool, attr, value);
 }
 
 CUresult cuMemPoolGetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void *value) {
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolGetAttribute,pool,attr,value);
-    LOG_INFO("cuMemPoolGetAttribute %d %ld",attr,*(long *)value);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolGetAttribute, pool, attr, value);
+    LOG_INFO("cuMemPoolGetAttribute %d %ld", attr, *(long *)value);
     return res;
 }
 
 CUresult cuMemPoolSetAccess(CUmemoryPool pool, const CUmemAccessDesc *map, size_t count) {
     LOG_DEBUG("cuMemPoolSetAccess");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolSetAccess,pool,map,count);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolSetAccess, pool, map, count);
 }
 
-CUresult cuMemPoolGetAccess(CUmemAccess_flags *flags, CUmemoryPool memPool, CUmemLocation *location) {
+CUresult cuMemPoolGetAccess(CUmemAccess_flags *flags, CUmemoryPool memPool,
+                            CUmemLocation *location) {
     LOG_DEBUG("cuMemPoolGetAccess");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolGetAccess,flags,memPool,location);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolGetAccess, flags, memPool, location);
 }
 
 CUresult cuMemPoolCreate(CUmemoryPool *pool, const CUmemPoolProps *poolProps) {
     LOG_DEBUG("cuMemPoolCreate");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolCreate,pool,poolProps);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolCreate, pool, poolProps);
 }
 
 CUresult cuMemPoolDestroy(CUmemoryPool pool) {
     LOG_DEBUG("cuMemPoolDestroy");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolDestroy,pool);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolDestroy, pool);
 }
 
-CUresult cuMemAllocFromPoolAsync(CUdeviceptr *dptr, size_t bytesize, CUmemoryPool pool, CUstream hStream) {
+CUresult cuMemAllocFromPoolAsync(CUdeviceptr *dptr, size_t bytesize, CUmemoryPool pool,
+                                 CUstream hStream) {
     LOG_DEBUG("cuMemAllocFromPoolAsync");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemAllocFromPoolAsync,dptr,bytesize,pool,hStream);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocFromPoolAsync, dptr, bytesize, pool,
+                              hStream);
 }
 
-CUresult cuMemPoolExportToShareableHandle(void *handle_out, CUmemoryPool pool, CUmemAllocationHandleType handleType, unsigned long long flags) {
+CUresult cuMemPoolExportToShareableHandle(void *handle_out, CUmemoryPool pool,
+                                          CUmemAllocationHandleType handleType,
+                                          unsigned long long flags) {
     LOG_DEBUG("cuMemPoolExportToShareableHandle");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolExportToShareableHandle,handle_out,pool,handleType,flags);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolExportToShareableHandle, handle_out,
+                              pool, handleType, flags);
 }
 
-CUresult cuMemPoolImportFromShareableHandle(
-        CUmemoryPool *pool_out,
-        void *handle,
-        CUmemAllocationHandleType handleType,
-        unsigned long long flags) {
-            LOG_DEBUG("cuMemPoolImportFromShareableHandle");
-            return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolImportFromShareableHandle,pool_out,handle,handleType,flags);
-        }
+CUresult cuMemPoolImportFromShareableHandle(CUmemoryPool *pool_out, void *handle,
+                                            CUmemAllocationHandleType handleType,
+                                            unsigned long long flags) {
+    LOG_DEBUG("cuMemPoolImportFromShareableHandle");
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolImportFromShareableHandle, pool_out,
+                              handle, handleType, flags);
+}
 
 CUresult cuMemPoolExportPointer(CUmemPoolPtrExportData *shareData_out, CUdeviceptr ptr) {
     LOG_DEBUG("cuMemPoolExportPointer");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolExportPointer,shareData_out,ptr);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolExportPointer, shareData_out, ptr);
 }
 
-CUresult cuMemPoolImportPointer(CUdeviceptr *ptr_out, CUmemoryPool pool, CUmemPoolPtrExportData *shareData) {
+CUresult cuMemPoolImportPointer(CUdeviceptr *ptr_out, CUmemoryPool pool,
+                                CUmemPoolPtrExportData *shareData) {
     LOG_DEBUG("cuMemPoolImportPointer");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolImportPointer,ptr_out,pool,shareData);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolImportPointer, ptr_out, pool, shareData);
 }
 /*
 CUresult cuMemcpy2D(const CUDA_MEMCPY2D *pCopy) {
@@ -745,7 +787,7 @@ CUresult cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy) {
 
 CUresult cuMemcpy2DUnaligned_v2(const CUDA_MEMCPY2D *pCopy) {
     LOG_DEBUG("cuMemcpy2DUnaligned_v2");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpy2DUnaligned_v2,pCopy);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpy2DUnaligned_v2, pCopy);
 }
 /*
 CUresult cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream) {
@@ -755,7 +797,7 @@ CUresult cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream) {
 
 CUresult cuMemcpy2DAsync_v2(const CUDA_MEMCPY2D *pCopy, CUstream hStream) {
     LOG_DEBUG("cuMemcpy2DAsync_v2");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpy2DAsync,pCopy,hStream);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpy2DAsync, pCopy, hStream);
 }
 
 /*
@@ -766,7 +808,7 @@ CUresult cuMemcpy3D(const CUDA_MEMCPY3D *pCopy) {
 
 CUresult cuMemcpy3D_v2(const CUDA_MEMCPY3D *pCopy) {
     LOG_DEBUG("cuMemcpy3D_v2");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpy3D_v2,pCopy);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpy3D_v2, pCopy);
 }
 /*
 CUresult cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream) {
@@ -776,79 +818,101 @@ CUresult cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream) {
 
 CUresult cuMemcpy3DAsync_v2(const CUDA_MEMCPY3D *pCopy, CUstream hStream) {
     LOG_DEBUG("cuMemcpy3DAsync_v2");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpy3DAsync_v2,pCopy,hStream);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpy3DAsync_v2, pCopy, hStream);
 }
 
 CUresult cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER *pCopy) {
     LOG_DEBUG("cuMemcpy3DPeer");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpy3DPeer,pCopy);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpy3DPeer, pCopy);
 }
 
 CUresult cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER *pCopy, CUstream hStream) {
     LOG_DEBUG("cuMemcpy3DPeerAsync");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemcpy3DPeerAsync,pCopy,hStream);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemcpy3DPeerAsync, pCopy, hStream);
 }
 
 #if CUDA_VERSION < 13000
-CUresult cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, CUdevice dstDevice, CUstream hStream) {
+CUresult cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, CUdevice dstDevice,
+                            CUstream hStream) {
     LOG_DEBUG("cuMemPrefetchAsync");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPrefetchAsync,devPtr,count,dstDevice,hStream);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPrefetchAsync, devPtr, count, dstDevice,
+                              hStream);
 }
 #endif
 
-CUresult cuMemPrefetchAsync_v2(CUdeviceptr devPtr, size_t count, CUmemLocation location, unsigned int flags,
-                               CUstream hStream) {
+CUresult cuMemPrefetchAsync_v2(CUdeviceptr devPtr, size_t count, CUmemLocation location,
+                               unsigned int flags, CUstream hStream) {
     LOG_DEBUG("cuMemPrefetchAsync_v2");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPrefetchAsync_v2, devPtr, count, location, flags, hStream);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPrefetchAsync_v2, devPtr, count, location,
+                              flags, hStream);
 }
 
-CUresult cuMemRangeGetAttribute(void *data, size_t dataSize, CUmem_range_attribute attribute, CUdeviceptr devPtr, size_t count) {
+CUresult cuMemRangeGetAttribute(void *data, size_t dataSize, CUmem_range_attribute attribute,
+                                CUdeviceptr devPtr, size_t count) {
     LOG_DEBUG("cuMemRangeGetAttribute");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemRangeGetAttribute,data,dataSize,attribute,devPtr,count);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemRangeGetAttribute, data, dataSize, attribute,
+                              devPtr, count);
 }
 
-CUresult cuMemRangeGetAttributes(void **data, size_t *dataSizes, CUmem_range_attribute *attributes, size_t numAttributes, CUdeviceptr devPtr, size_t count) {
+CUresult cuMemRangeGetAttributes(void **data, size_t *dataSizes, CUmem_range_attribute *attributes,
+                                 size_t numAttributes, CUdeviceptr devPtr, size_t count) {
     LOG_DEBUG("cuMemRangeGetAttributes");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemRangeGetAttributes,data,dataSizes,attributes,numAttributes,devPtr,count);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemRangeGetAttributes, data, dataSizes,
+                              attributes, numAttributes, devPtr, count);
 }
 
 /* External Resource Management */
-CUresult cuImportExternalMemory(CUexternalMemory *extMem_out, const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *memHandleDesc) {
+CUresult cuImportExternalMemory(CUexternalMemory *extMem_out,
+                                const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *memHandleDesc) {
     LOG_DEBUG("cuImportExternalMemory");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuImportExternalMemory,extMem_out,memHandleDesc);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuImportExternalMemory, extMem_out,
+                              memHandleDesc);
 }
 
-CUresult cuExternalMemoryGetMappedBuffer(CUdeviceptr *devPtr, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *bufferDesc) {
+CUresult cuExternalMemoryGetMappedBuffer(CUdeviceptr *devPtr, CUexternalMemory extMem,
+                                         const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *bufferDesc) {
     LOG_DEBUG("cuExternalMemoryGetMappedBuffer");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuExternalMemoryGetMappedBuffer,devPtr,extMem,bufferDesc);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuExternalMemoryGetMappedBuffer, devPtr, extMem,
+                              bufferDesc);
 }
 
-CUresult cuExternalMemoryGetMappedMipmappedArray(CUmipmappedArray *mipmap, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *mipmapDesc) {
+CUresult cuExternalMemoryGetMappedMipmappedArray(
+    CUmipmappedArray *mipmap, CUexternalMemory extMem,
+    const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *mipmapDesc) {
     LOG_DEBUG("cuExternalMemoryGetMappedMipmappedArray");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuExternalMemoryGetMappedMipmappedArray,mipmap,extMem,mipmapDesc);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuExternalMemoryGetMappedMipmappedArray, mipmap,
+                              extMem, mipmapDesc);
 }
 
 CUresult cuDestroyExternalMemory(CUexternalMemory extMem) {
     LOG_DEBUG("cuDestroyExternalMemory");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuDestroyExternalMemory,extMem);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDestroyExternalMemory, extMem);
 }
 
-CUresult cuImportExternalSemaphore(CUexternalSemaphore *extSem_out, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *semHandleDesc) {
+CUresult cuImportExternalSemaphore(CUexternalSemaphore *extSem_out,
+                                   const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *semHandleDesc) {
     LOG_DEBUG("cuImportExternalSemaphore");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuImportExternalSemaphore,extSem_out,semHandleDesc);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuImportExternalSemaphore, extSem_out,
+                              semHandleDesc);
 }
 
-CUresult cuSignalExternalSemaphoresAsync(const CUexternalSemaphore *extSemArray, const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray, unsigned int numExtSems, CUstream stream) {
+CUresult cuSignalExternalSemaphoresAsync(const CUexternalSemaphore *extSemArray,
+                                         const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray,
+                                         unsigned int numExtSems, CUstream stream) {
     LOG_DEBUG("cuSignalExternalSemaphoresAsync");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuSignalExternalSemaphoresAsync,extSemArray,paramsArray,numExtSems,stream);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuSignalExternalSemaphoresAsync, extSemArray,
+                              paramsArray, numExtSems, stream);
 }
 
-CUresult cuWaitExternalSemaphoresAsync(const CUexternalSemaphore *extSemArray, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray, unsigned int numExtSems, CUstream stream) {
+CUresult cuWaitExternalSemaphoresAsync(const CUexternalSemaphore *extSemArray,
+                                       const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray,
+                                       unsigned int numExtSems, CUstream stream) {
     LOG_DEBUG("cuWaitExternalSemaphoresAsync");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuWaitExternalSemaphoresAsync,extSemArray,paramsArray,numExtSems,stream);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuWaitExternalSemaphoresAsync, extSemArray,
+                              paramsArray, numExtSems, stream);
 }
 
 CUresult cuDestroyExternalSemaphore(CUexternalSemaphore extSem) {
     LOG_DEBUG("cuDestroyExternalSemaphore");
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuDestroyExternalSemaphore,extSem);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuDestroyExternalSemaphore, extSem);
 }
diff --git a/src/cuda/stream.c b/src/cuda/stream.c
old mode 100755
new mode 100644
index a8643e14..c37289d7
--- a/src/cuda/stream.c
+++ b/src/cuda/stream.c
@@ -1,18 +1,18 @@
 #include "include/libcuda_hook.h"
 
-CUresult cuStreamCreate(CUstream *phstream, unsigned int flags){
-    LOG_INFO("cuStreamCreate %p",phstream);
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuStreamCreate,phstream,flags);
+CUresult cuStreamCreate(CUstream *phstream, unsigned int flags) {
+    LOG_INFO("cuStreamCreate %p", phstream);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuStreamCreate, phstream, flags);
     return res;
 }
 
-CUresult cuStreamDestroy_v2 ( CUstream hStream ){
-    LOG_DEBUG("cuStreamDestroy_v2 %p",hStream);
-    return CUDA_OVERRIDE_CALL(cuda_library_entry,cuStreamDestroy_v2,hStream);
+CUresult cuStreamDestroy_v2(CUstream hStream) {
+    LOG_DEBUG("cuStreamDestroy_v2 %p", hStream);
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuStreamDestroy_v2, hStream);
 }
 
-CUresult cuStreamSynchronize(CUstream hstream){
-    LOG_DEBUG("cuStreamSync %p",hstream);
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuStreamSynchronize,hstream);
+CUresult cuStreamSynchronize(CUstream hstream) {
+    LOG_DEBUG("cuStreamSync %p", hstream);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuStreamSynchronize, hstream);
     return res;
 }
diff --git a/src/include/libcuda_hook.h b/src/include/libcuda_hook.h
index 1e5cb777..ed5eaf92 100644
--- a/src/include/libcuda_hook.h
+++ b/src/include/libcuda_hook.h
@@ -1,23 +1,24 @@
 #ifndef __LIBCUDA_HOOK_H__
 #define __LIBCUDA_HOOK_H__
 
+#include <dlfcn.h>
+#include <fcntl.h>
 #include <inttypes.h>
 #include <limits.h>
 #include <stdarg.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <fcntl.h>
 #include <unistd.h>
-#include <dlfcn.h>
 #define NVML_NO_UNVERSIONED_FUNC_DEFS
 #include <cuda.h>
 #include <pthread.h>
+
 #include "include/log_utils.h"
 
 typedef struct {
-  void *fn_ptr;
-  char *name;
+    void *fn_ptr;
+    char *name;
 } cuda_entry_t;
 
 #define FILENAME_MAX 4096
@@ -30,15 +31,15 @@ typedef CUresult (*cuda_sym_t)();
 
 #define CUDA_FIND_ENTRY(table, sym) ({ (table)[CUDA_OVERRIDE_ENUM(sym)].fn_ptr; })
 
-#define CUDA_OVERRIDE_CALL(table, sym, ...)                                    \
-  ({    \
-    LOG_DEBUG("Hijacking %s", #sym);                                           \
-    cuda_sym_t _entry = (cuda_sym_t)CUDA_FIND_ENTRY(table, sym);               \
-    if (_entry == NULL) {                                                      \
-      LOG_ERROR("Hijack failed: %s is NULL", #sym);                            \
-    }                                                                          \
-    _entry(__VA_ARGS__);                                                       \
-  })
+#define CUDA_OVERRIDE_CALL(table, sym, ...)                          \
+    ({                                                               \
+        LOG_DEBUG("Hijacking %s", #sym);                             \
+        cuda_sym_t _entry = (cuda_sym_t)CUDA_FIND_ENTRY(table, sym); \
+        if (_entry == NULL) {                                        \
+            LOG_ERROR("Hijack failed: %s is NULL", #sym);            \
+        }                                                            \
+        _entry(__VA_ARGS__);                                         \
+    })
 
 typedef enum {
     /* cuInit Part */
@@ -87,7 +88,7 @@ typedef enum {
     CUDA_OVERRIDE_ENUM(cuCtxSetLimit),
     CUDA_OVERRIDE_ENUM(cuCtxSetSharedMemConfig),
     CUDA_OVERRIDE_ENUM(cuCtxSynchronize),
-    //CUDA_OVERRIDE_ENUM(cuCtxEnablePeerAccess),
+    // CUDA_OVERRIDE_ENUM(cuCtxEnablePeerAccess),
     CUDA_OVERRIDE_ENUM(cuGetExportTable),
 
     /* cuStream Part */
@@ -268,14 +269,14 @@ typedef enum {
     CUDA_OVERRIDE_ENUM(cuGetProcAddress),
     CUDA_OVERRIDE_ENUM(cuGetProcAddress_v2),
     CUDA_ENTRY_END
-}cuda_override_enum_t;
+} cuda_override_enum_t;
 
 extern cuda_entry_t cuda_library_entry[];
 
 #endif
 
 #undef cuGetProcAddress
-CUresult cuGetProcAddress( const char* symbol, void** pfn, int  cudaVersion, cuuint64_t flags );
+CUresult cuGetProcAddress(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags);
 #undef cuGraphInstantiate
-CUresult cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, CUgraphNode *phErrorNode, char *logBuffer, size_t bufferSize);
-
+CUresult cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, CUgraphNode *phErrorNode,
+                            char *logBuffer, size_t bufferSize);
diff --git a/src/include/libnvml_hook.h b/src/include/libnvml_hook.h
index 0e0b564e..03dd1efe 100644
--- a/src/include/libnvml_hook.h
+++ b/src/include/libnvml_hook.h
@@ -1,19 +1,20 @@
 #ifndef __LIBNVML_HOOK_H__
 #define __LIBNVML_HOOK_H__
 
+#include <cuda.h>
+#include <dlfcn.h>
+#include <fcntl.h>
 #include <inttypes.h>
 #include <limits.h>
+#include <pthread.h>
 #include <stdarg.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <fcntl.h>
 #include <unistd.h>
-#include <dlfcn.h>
-#include <cuda.h>
-#include <pthread.h>
-#include "include/nvml-subset.h"
+
 #include "include/log_utils.h"
+#include "include/nvml-subset.h"
 #include "include/nvml_prefix.h"
 
 #define FILENAME_MAX 4096
@@ -24,512 +25,512 @@ typedef nvmlReturn_t (*driver_sym_t)();
 
 #define NVML_FIND_ENTRY(table, sym) ({ (table)[NVML_OVERRIDE_ENUM(sym)].fn_ptr; })
 
-#define NVML_OVERRIDE_CALL(table, sym, ...)                                    \
-  ({                                                                           \
-    LOG_DEBUG("Hijacking %s", #sym);                                           \
-    driver_sym_t _entry = NVML_FIND_ENTRY(table, sym);                         \
-    _entry(__VA_ARGS__);                                                       \
-  })
+#define NVML_OVERRIDE_CALL(table, sym, ...)                \
+    ({                                                     \
+        LOG_DEBUG("Hijacking %s", #sym);                   \
+        driver_sym_t _entry = NVML_FIND_ENTRY(table, sym); \
+        _entry(__VA_ARGS__);                               \
+    })
 
-#define NVML_OVERRIDE_CALL_NO_LOG(table, sym, ...)                             \
-  ({                                                                           \
-    driver_sym_t _entry = NVML_FIND_ENTRY(table, sym);                         \
-    _entry(__VA_ARGS__);                                                       \
-  })
+#define NVML_OVERRIDE_CALL_NO_LOG(table, sym, ...)         \
+    ({                                                     \
+        driver_sym_t _entry = NVML_FIND_ENTRY(table, sym); \
+        _entry(__VA_ARGS__);                               \
+    })
 
 /**
  * NVML management library enumerator entry
  */
 typedef enum {
-  /** nvmlInit */
-  NVML_OVERRIDE_ENUM(nvmlInit),
-  /** nvmlShutdown */
-  NVML_OVERRIDE_ENUM(nvmlShutdown),
-  /** nvmlErrorString */
-  NVML_OVERRIDE_ENUM(nvmlErrorString),
-  /** nvmlDeviceGetHandleByIndex */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByIndex),
-  /** nvmlDeviceGetComputeRunningProcesses */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetComputeRunningProcesses),
-  /** nvmlDeviceGetPciInfo */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetPciInfo),
-  /** nvmlDeviceGetProcessUtilization */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetProcessUtilization),
-  /** nvmlDeviceGetCount */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetCount),
-  /** nvmlDeviceClearAccountingPids */
-  NVML_OVERRIDE_ENUM(nvmlDeviceClearAccountingPids),
-  /** nvmlDeviceClearCpuAffinity */
-  NVML_OVERRIDE_ENUM(nvmlDeviceClearCpuAffinity),
-  /** nvmlDeviceClearEccErrorCounts */
-  NVML_OVERRIDE_ENUM(nvmlDeviceClearEccErrorCounts),
-  /** nvmlDeviceDiscoverGpus */
-  NVML_OVERRIDE_ENUM(nvmlDeviceDiscoverGpus),
-  /** nvmlDeviceFreezeNvLinkUtilizationCounter */
-  NVML_OVERRIDE_ENUM(nvmlDeviceFreezeNvLinkUtilizationCounter),
-  /** nvmlDeviceGetAccountingBufferSize */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetAccountingBufferSize),
-  /** nvmlDeviceGetAccountingMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetAccountingMode),
-  /** nvmlDeviceGetAccountingPids */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetAccountingPids),
-  /** nvmlDeviceGetAccountingStats */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetAccountingStats),
-  /** nvmlDeviceGetActiveVgpus */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetActiveVgpus),
-  /** nvmlDeviceGetAPIRestriction */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetAPIRestriction),
-  /** nvmlDeviceGetApplicationsClock */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetApplicationsClock),
-  /** nvmlDeviceGetAutoBoostedClocksEnabled */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetAutoBoostedClocksEnabled),
-  /** nvmlDeviceGetBAR1MemoryInfo */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetBAR1MemoryInfo),
-  /** nvmlDeviceGetBoardId */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetBoardId),
-  /** nvmlDeviceGetBoardPartNumber */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetBoardPartNumber),
-  /** nvmlDeviceGetBrand */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetBrand),
-  /** nvmlDeviceGetBridgeChipInfo */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetBridgeChipInfo),
-  /** nvmlDeviceGetClock */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetClock),
-  /** nvmlDeviceGetClockInfo */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetClockInfo),
-  /** nvmlDeviceGetComputeMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetComputeMode),
-  /** nvmlDeviceGetCount_v2 */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetCount_v2),
-  /** nvmlDeviceGetCpuAffinity */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetCpuAffinity),
-  /** nvmlDeviceGetCreatableVgpus */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetCreatableVgpus),
-  /** nvmlDeviceGetCudaComputeCapability */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetCudaComputeCapability),
-  /** nvmlDeviceGetCurrentClocksThrottleReasons */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetCurrentClocksThrottleReasons),
-  /** nvmlDeviceGetCurrPcieLinkGeneration */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetCurrPcieLinkGeneration),
-  /** nvmlDeviceGetCurrPcieLinkWidth */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetCurrPcieLinkWidth),
-  /** nvmlDeviceGetDecoderUtilization */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetDecoderUtilization),
-  /** nvmlDeviceGetDefaultApplicationsClock */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetDefaultApplicationsClock),
-  /** nvmlDeviceGetDetailedEccErrors */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetDetailedEccErrors),
-  /** nvmlDeviceGetDisplayActive */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetDisplayActive),
-  /** nvmlDeviceGetDisplayMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetDisplayMode),
-  /** nvmlDeviceGetDriverModel */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetDriverModel),
-  /** nvmlDeviceGetEccMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetEccMode),
-  /** nvmlDeviceGetEncoderCapacity */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetEncoderCapacity),
-  /** nvmlDeviceGetEncoderSessions */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetEncoderSessions),
-  /** nvmlDeviceGetEncoderStats */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetEncoderStats),
-  /** nvmlDeviceGetEncoderUtilization */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetEncoderUtilization),
-  /** nvmlDeviceGetEnforcedPowerLimit */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetEnforcedPowerLimit),
-  /** nvmlDeviceGetFanSpeed */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetFanSpeed),
-  /** nvmlDeviceGetFanSpeed_v2 */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetFanSpeed_v2),
-  /** nvmlDeviceGetFieldValues */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetFieldValues),
-  /** nvmlDeviceGetGpuOperationMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuOperationMode),
-  /** nvmlDeviceGetGraphicsRunningProcesses */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetGraphicsRunningProcesses),
-  /** nvmlDeviceGetGridLicensableFeatures */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetGridLicensableFeatures),
-  /** nvmlDeviceGetHandleByIndex_v2 */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByIndex_v2),
-  /** nvmlDeviceGetHandleByPciBusId */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByPciBusId),
-  /** nvmlDeviceGetHandleByPciBusId_v2 */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByPciBusId_v2),
-  /** nvmlDeviceGetHandleBySerial */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleBySerial),
-  /** nvmlDeviceGetHandleByUUID */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByUUID),
-  /** nvmlDeviceGetIndex */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetIndex),
-  /** nvmlDeviceGetInforomConfigurationChecksum */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetInforomConfigurationChecksum),
-  /** nvmlDeviceGetInforomImageVersion */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetInforomImageVersion),
-  /** nvmlDeviceGetInforomVersion */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetInforomVersion),
-  /** nvmlDeviceGetMaxClockInfo */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxClockInfo),
-  /** nvmlDeviceGetMaxCustomerBoostClock */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxCustomerBoostClock),
-  /** nvmlDeviceGetMaxPcieLinkGeneration */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxPcieLinkGeneration),
-  /** nvmlDeviceGetMaxPcieLinkWidth */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxPcieLinkWidth),
-  /** nvmlDeviceGetMemoryErrorCounter */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetMemoryErrorCounter),
-  /** nvmlDeviceGetMemoryInfo */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetMemoryInfo),
-  /** nvmlDeviceGetMemoryInfo_v2 */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetMemoryInfo_v2),
-  /** nvmlDeviceGetMinorNumber */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetMinorNumber),
-  /** nvmlDeviceGetMPSComputeRunningProcesses */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetMPSComputeRunningProcesses),
-  /** nvmlDeviceGetMultiGpuBoard */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetMultiGpuBoard),
-  /** nvmlDeviceGetName */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetName),
-  /** nvmlDeviceGetNvLinkCapability */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkCapability),
-  /** nvmlDeviceGetNvLinkErrorCounter */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkErrorCounter),
-  /** nvmlDeviceGetNvLinkRemotePciInfo */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkRemotePciInfo),
-  /** nvmlDeviceGetNvLinkRemotePciInfo_v2 */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkRemotePciInfo_v2),
-  /** nvmlDeviceGetNvLinkState */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkState),
-  /** nvmlDeviceGetNvLinkUtilizationControl */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkUtilizationControl),
-  /** nvmlDeviceGetNvLinkUtilizationCounter */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkUtilizationCounter),
-  /** nvmlDeviceGetNvLinkVersion */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkVersion),
-  /** nvmlDeviceGetP2PStatus */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetP2PStatus),
-  /** nvmlDeviceGetPcieReplayCounter */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetPcieReplayCounter),
-  /** nvmlDeviceGetPcieThroughput */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetPcieThroughput),
-  /** nvmlDeviceGetPciInfo_v2 */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetPciInfo_v2),
-  /** nvmlDeviceGetPciInfo_v3 */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetPciInfo_v3),
-  /** nvmlDeviceGetPerformanceState */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetPerformanceState),
-  /** nvmlDeviceGetPersistenceMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetPersistenceMode),
-  /** nvmlDeviceGetPowerManagementDefaultLimit */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerManagementDefaultLimit),
-  /** nvmlDeviceGetPowerManagementLimit */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerManagementLimit),
-  /** nvmlDeviceGetPowerManagementLimitConstraints */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerManagementLimitConstraints),
-  /** nvmlDeviceGetPowerManagementMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerManagementMode),
-  /** nvmlDeviceGetPowerState */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerState),
-  /** nvmlDeviceGetPowerUsage */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerUsage),
-  /** nvmlDeviceGetRetiredPages */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetRetiredPages),
-  /** nvmlDeviceGetRetiredPagesPendingStatus */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetRetiredPagesPendingStatus),
-  /** nvmlDeviceGetSamples */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetSamples),
-  /** nvmlDeviceGetSerial */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetSerial),
-  /** nvmlDeviceGetSupportedClocksThrottleReasons */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedClocksThrottleReasons),
-  /** nvmlDeviceGetSupportedEventTypes */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedEventTypes),
-  /** nvmlDeviceGetSupportedGraphicsClocks */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedGraphicsClocks),
-  /** nvmlDeviceGetSupportedMemoryClocks */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedMemoryClocks),
-  /** nvmlDeviceGetSupportedVgpus */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedVgpus),
-  /** nvmlDeviceGetTemperature */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetTemperature),
-  /** nvmlDeviceGetTemperatureThreshold */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetTemperatureThreshold),
-  /** nvmlDeviceGetTopologyCommonAncestor */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetTopologyCommonAncestor),
-  /** nvmlDeviceGetTopologyNearestGpus */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetTopologyNearestGpus),
-  /** nvmlDeviceGetTotalEccErrors */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetTotalEccErrors),
-  /** nvmlDeviceGetTotalEnergyConsumption */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetTotalEnergyConsumption),
-  /** nvmlDeviceGetUtilizationRates */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetUtilizationRates),
-  /** nvmlDeviceGetUUID */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetUUID),
-  /** nvmlDeviceGetVbiosVersion */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetVbiosVersion),
-  /** nvmlDeviceGetVgpuMetadata */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetVgpuMetadata),
-  /** nvmlDeviceGetVgpuProcessUtilization */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetVgpuProcessUtilization),
-  /** nvmlDeviceGetVgpuUtilization */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetVgpuUtilization),
-  /** nvmlDeviceGetViolationStatus */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetViolationStatus),
-  /** nvmlDeviceGetVirtualizationMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetVirtualizationMode),
-  /** nvmlDeviceModifyDrainState */
-  NVML_OVERRIDE_ENUM(nvmlDeviceModifyDrainState),
-  /** nvmlDeviceOnSameBoard */
-  NVML_OVERRIDE_ENUM(nvmlDeviceOnSameBoard),
-  /** nvmlDeviceQueryDrainState */
-  NVML_OVERRIDE_ENUM(nvmlDeviceQueryDrainState),
-  /** nvmlDeviceRegisterEvents */
-  NVML_OVERRIDE_ENUM(nvmlDeviceRegisterEvents),
-  /** nvmlDeviceRemoveGpu */
-  NVML_OVERRIDE_ENUM(nvmlDeviceRemoveGpu),
-  /** nvmlDeviceRemoveGpu_v2 */
-  NVML_OVERRIDE_ENUM(nvmlDeviceRemoveGpu_v2),
-  /** nvmlDeviceResetApplicationsClocks */
-  NVML_OVERRIDE_ENUM(nvmlDeviceResetApplicationsClocks),
-  /** nvmlDeviceResetNvLinkErrorCounters */
-  NVML_OVERRIDE_ENUM(nvmlDeviceResetNvLinkErrorCounters),
-  /** nvmlDeviceResetNvLinkUtilizationCounter */
-  NVML_OVERRIDE_ENUM(nvmlDeviceResetNvLinkUtilizationCounter),
-  /** nvmlDeviceSetAccountingMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetAccountingMode),
-  /** nvmlDeviceSetAPIRestriction */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetAPIRestriction),
-  /** nvmlDeviceSetApplicationsClocks */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetApplicationsClocks),
-  /** nvmlDeviceSetAutoBoostedClocksEnabled */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetAutoBoostedClocksEnabled),
-  /** nvmlDeviceSetComputeMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetComputeMode),
-  /** nvmlDeviceSetCpuAffinity */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetCpuAffinity),
-  /** nvmlDeviceSetDefaultAutoBoostedClocksEnabled */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetDefaultAutoBoostedClocksEnabled),
-  /** nvmlDeviceSetDriverModel */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetDriverModel),
-  /** nvmlDeviceSetEccMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetEccMode),
-  /** nvmlDeviceSetGpuOperationMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetGpuOperationMode),
-  /** nvmlDeviceSetNvLinkUtilizationControl */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetNvLinkUtilizationControl),
-  /** nvmlDeviceSetPersistenceMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetPersistenceMode),
-  /** nvmlDeviceSetPowerManagementLimit */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetPowerManagementLimit),
-  /** nvmlDeviceSetVirtualizationMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetVirtualizationMode),
-  /** nvmlDeviceValidateInforom */
-  NVML_OVERRIDE_ENUM(nvmlDeviceValidateInforom),
-  /** nvmlEventSetCreate */
-  NVML_OVERRIDE_ENUM(nvmlEventSetCreate),
-  /** nvmlEventSetFree */
-  NVML_OVERRIDE_ENUM(nvmlEventSetFree),
-  /** nvmlEventSetWait */
-  NVML_OVERRIDE_ENUM(nvmlEventSetWait),
-  /** nvmlGetVgpuCompatibility */
-  NVML_OVERRIDE_ENUM(nvmlGetVgpuCompatibility),
-  /** nvmlInit_v2 */
-  NVML_OVERRIDE_ENUM(nvmlInit_v2),
-  /** nvmlInitWithFlags */
-  NVML_OVERRIDE_ENUM(nvmlInitWithFlags),
-  /** nvmlInternalGetExportTable */
-  NVML_OVERRIDE_ENUM(nvmlInternalGetExportTable),
-  /** nvmlSystemGetCudaDriverVersion */
-  NVML_OVERRIDE_ENUM(nvmlSystemGetCudaDriverVersion),
-  /** nvmlSystemGetCudaDriverVersion_v2 */
-  NVML_OVERRIDE_ENUM(nvmlSystemGetCudaDriverVersion_v2),
-  /** nvmlSystemGetDriverVersion */
-  NVML_OVERRIDE_ENUM(nvmlSystemGetDriverVersion),
-  /** nvmlSystemGetHicVersion */
-  NVML_OVERRIDE_ENUM(nvmlSystemGetHicVersion),
-  /** nvmlSystemGetNVMLVersion */
-  NVML_OVERRIDE_ENUM(nvmlSystemGetNVMLVersion),
-  /** nvmlSystemGetProcessName */
-  NVML_OVERRIDE_ENUM(nvmlSystemGetProcessName),
-  /** nvmlSystemGetTopologyGpuSet */
-  NVML_OVERRIDE_ENUM(nvmlSystemGetTopologyGpuSet),
-  /** nvmlUnitGetCount */
-  NVML_OVERRIDE_ENUM(nvmlUnitGetCount),
-  /** nvmlUnitGetDevices */
-  NVML_OVERRIDE_ENUM(nvmlUnitGetDevices),
-  /** nvmlUnitGetFanSpeedInfo */
-  NVML_OVERRIDE_ENUM(nvmlUnitGetFanSpeedInfo),
-  /** nvmlUnitGetHandleByIndex */
-  NVML_OVERRIDE_ENUM(nvmlUnitGetHandleByIndex),
-  /** nvmlUnitGetLedState */
-  NVML_OVERRIDE_ENUM(nvmlUnitGetLedState),
-  /** nvmlUnitGetPsuInfo */
-  NVML_OVERRIDE_ENUM(nvmlUnitGetPsuInfo),
-  /** nvmlUnitGetTemperature */
-  NVML_OVERRIDE_ENUM(nvmlUnitGetTemperature),
-  /** nvmlUnitGetUnitInfo */
-  NVML_OVERRIDE_ENUM(nvmlUnitGetUnitInfo),
-  /** nvmlUnitSetLedState */
-  NVML_OVERRIDE_ENUM(nvmlUnitSetLedState),
-  /** nvmlVgpuInstanceGetEncoderCapacity */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetEncoderCapacity),
-  /** nvmlVgpuInstanceGetEncoderSessions */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetEncoderSessions),
-  /** nvmlVgpuInstanceGetEncoderStats */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetEncoderStats),
-  /** nvmlVgpuInstanceGetFbUsage */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetFbUsage),
-  /** nvmlVgpuInstanceGetFrameRateLimit */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetFrameRateLimit),
-  /** nvmlVgpuInstanceGetLicenseStatus */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetLicenseStatus),
-  /** nvmlVgpuInstanceGetMetadata */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetMetadata),
-  /** nvmlVgpuInstanceGetType */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetType),
-  /** nvmlVgpuInstanceGetUUID */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetUUID),
-  /** nvmlVgpuInstanceGetVmDriverVersion */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetVmDriverVersion),
-  /** nvmlVgpuInstanceGetVmID */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetVmID),
-  /** nvmlVgpuInstanceSetEncoderCapacity */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceSetEncoderCapacity),
-  /** nvmlVgpuTypeGetClass */
-  NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetClass),
-  /** nvmlVgpuTypeGetDeviceID */
-  NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetDeviceID),
-  /** nvmlVgpuTypeGetFramebufferSize */
-  NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetFramebufferSize),
-  /** nvmlVgpuTypeGetFrameRateLimit */
-  NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetFrameRateLimit),
-  /** nvmlVgpuTypeGetLicense */
-  NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetLicense),
-  /** nvmlVgpuTypeGetMaxInstances */
-  NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetMaxInstances),
-  /** nvmlVgpuTypeGetName */
-  NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetName),
-  /** nvmlVgpuTypeGetNumDisplayHeads */
-  NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetNumDisplayHeads),
-  /** nvmlVgpuTypeGetResolution */
-  NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetResolution),
-  /** nvmlDeviceGetFBCSessions */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetFBCSessions),
-  /** nvmlDeviceGetFBCStats */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetFBCStats),
-  /** nvmlDeviceGetGridLicensableFeatures_v2 */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetGridLicensableFeatures_v2),
-  /** nvmlDeviceGetRetiredPages_v2 */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetRetiredPages_v2),
-  /** nvmlDeviceResetGpuLockedClocks */
-  NVML_OVERRIDE_ENUM(nvmlDeviceResetGpuLockedClocks),
-  /** nvmlDeviceSetGpuLockedClocks */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetGpuLockedClocks),
-  /** nvmlGetBlacklistDeviceCount */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetAccountingMode),
-  /** nvmlVgpuInstanceGetAccountingPids */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetAccountingPids),
-  /** nvmlVgpuInstanceGetAccountingStats */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetAccountingStats),
-  /** nvmlVgpuInstanceGetFBCSessions */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetFBCSessions),
-  /** nvmlVgpuInstanceGetFBCStats */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetFBCStats),
-  /** nvmlVgpuTypeGetMaxInstancesPerVm */
-  NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetMaxInstancesPerVm),
-  /** nvmlGetVgpuVersion */
-  NVML_OVERRIDE_ENUM(nvmlGetVgpuVersion),
-  /** nvmlSetVgpuVersion */
-  NVML_OVERRIDE_ENUM(nvmlSetVgpuVersion),
-  /** nvmlDeviceGetGridLicensableFeatures_v3 */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetGridLicensableFeatures_v3),
-  /** nvmlDeviceGetHostVgpuMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetHostVgpuMode),
-  /** nvmlDeviceGetPgpuMetadataString */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetPgpuMetadataString),
-  /** nvmlVgpuInstanceGetEccMode */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetEccMode),
-  /** nvmlComputeInstanceDestroy */
-  NVML_OVERRIDE_ENUM(nvmlComputeInstanceDestroy),
-  /** nvmlComputeInstanceGetInfo */
-  NVML_OVERRIDE_ENUM(nvmlComputeInstanceGetInfo),
-  /** nvmlDeviceCreateGpuInstance */
-  NVML_OVERRIDE_ENUM(nvmlDeviceCreateGpuInstance),
-  /** nvmlDeviceGetArchitecture */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetArchitecture),
-  /** nvmlDeviceGetAttributes */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetAttributes),
-  /** nvmlDeviceGetAttributes_v2 */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetAttributes_v2),
-  /** nvmlDeviceGetComputeInstanceId */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetComputeInstanceId),
-  /** nvmlDeviceGetCpuAffinityWithinScope */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetCpuAffinityWithinScope),
-  /** nvmlDeviceGetDeviceHandleFromMigDeviceHandle */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetDeviceHandleFromMigDeviceHandle),
-  /** nvmlDeviceGetGpuInstanceById */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstanceById),
-  /** nvmlDeviceGetGpuInstanceId */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstanceId),
-  /** nvmlDeviceGetGpuInstancePossiblePlacements */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstancePossiblePlacements),
-  /** nvmlDeviceGetGpuInstanceProfileInfo */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstanceProfileInfo),
-  /** nvmlDeviceGetGpuInstanceRemainingCapacity */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstanceRemainingCapacity),
-  /** nvmlDeviceGetGpuInstances */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstances),
-  /** nvmlDeviceGetMaxMigDeviceCount */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxMigDeviceCount),
-  /** nvmlDeviceGetMemoryAffinity */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetMemoryAffinity),
-  /** nvmlDeviceGetMigDeviceHandleByIndex */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetMigDeviceHandleByIndex),
-  /** nvmlDeviceGetMigMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetMigMode),
-  /** nvmlDeviceGetRemappedRows */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetRemappedRows),
-  /** nvmlDeviceGetRowRemapperHistogram */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetRowRemapperHistogram),
-  /** nvmlDeviceIsMigDeviceHandle */
-  NVML_OVERRIDE_ENUM(nvmlDeviceIsMigDeviceHandle),
-  /** nvmlDeviceSetMigMode */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetMigMode),
-  /** nvmlEventSetWait_v2 */
-  NVML_OVERRIDE_ENUM(nvmlEventSetWait_v2),
-  /** nvmlGpuInstanceCreateComputeInstance */
-  NVML_OVERRIDE_ENUM(nvmlGpuInstanceCreateComputeInstance),
-  /** nvmlGpuInstanceDestroy */
-  NVML_OVERRIDE_ENUM(nvmlGpuInstanceDestroy),
-  /** nvmlGpuInstanceGetComputeInstanceById */
-  NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetComputeInstanceById),
-  /** nvmlGpuInstanceGetComputeInstanceProfileInfo */
-  NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetComputeInstanceProfileInfo),
-  /** nvmlGpuInstanceGetComputeInstanceRemainingCapacity */
-  NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetComputeInstanceRemainingCapacity),
-  /** nvmlGpuInstanceGetComputeInstances */
-  NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetComputeInstances),
-  /** nvmlGpuInstanceGetInfo */
-  NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetInfo),
-  /** nvmlVgpuInstanceClearAccountingPids */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceClearAccountingPids),
-  /** nvmlVgpuInstanceGetMdevUUID */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetMdevUUID),
-  /** nvmlComputeInstanceGetInfo_v2 */
-  NVML_OVERRIDE_ENUM(nvmlComputeInstanceGetInfo_v2),
-  /** nvmlDeviceGetComputeRunningProcesses_v2 */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetComputeRunningProcesses_v2),
-  /** nvmlDeviceGetGraphicsRunningProcesses_v2 */
-  NVML_OVERRIDE_ENUM(nvmlDeviceGetGraphicsRunningProcesses_v2),
-  /** nvmlDeviceSetTemperatureThreshold */
-  NVML_OVERRIDE_ENUM(nvmlDeviceSetTemperatureThreshold),
-  /** nvmlRetry_NvRmControl */
-  //NVML_OVERRIDE_ENUM(nvmlRetry_NvRmControl),
-  /** nvmlVgpuInstanceGetGpuInstanceId */
-  NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetGpuInstanceId),
-  /** nvmlVgpuTypeGetGpuInstanceProfileId */
-  NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetGpuInstanceProfileId),
-  NVML_ENTRY_END
+    /** nvmlInit */
+    NVML_OVERRIDE_ENUM(nvmlInit),
+    /** nvmlShutdown */
+    NVML_OVERRIDE_ENUM(nvmlShutdown),
+    /** nvmlErrorString */
+    NVML_OVERRIDE_ENUM(nvmlErrorString),
+    /** nvmlDeviceGetHandleByIndex */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByIndex),
+    /** nvmlDeviceGetComputeRunningProcesses */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetComputeRunningProcesses),
+    /** nvmlDeviceGetPciInfo */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetPciInfo),
+    /** nvmlDeviceGetProcessUtilization */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetProcessUtilization),
+    /** nvmlDeviceGetCount */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetCount),
+    /** nvmlDeviceClearAccountingPids */
+    NVML_OVERRIDE_ENUM(nvmlDeviceClearAccountingPids),
+    /** nvmlDeviceClearCpuAffinity */
+    NVML_OVERRIDE_ENUM(nvmlDeviceClearCpuAffinity),
+    /** nvmlDeviceClearEccErrorCounts */
+    NVML_OVERRIDE_ENUM(nvmlDeviceClearEccErrorCounts),
+    /** nvmlDeviceDiscoverGpus */
+    NVML_OVERRIDE_ENUM(nvmlDeviceDiscoverGpus),
+    /** nvmlDeviceFreezeNvLinkUtilizationCounter */
+    NVML_OVERRIDE_ENUM(nvmlDeviceFreezeNvLinkUtilizationCounter),
+    /** nvmlDeviceGetAccountingBufferSize */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetAccountingBufferSize),
+    /** nvmlDeviceGetAccountingMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetAccountingMode),
+    /** nvmlDeviceGetAccountingPids */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetAccountingPids),
+    /** nvmlDeviceGetAccountingStats */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetAccountingStats),
+    /** nvmlDeviceGetActiveVgpus */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetActiveVgpus),
+    /** nvmlDeviceGetAPIRestriction */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetAPIRestriction),
+    /** nvmlDeviceGetApplicationsClock */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetApplicationsClock),
+    /** nvmlDeviceGetAutoBoostedClocksEnabled */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetAutoBoostedClocksEnabled),
+    /** nvmlDeviceGetBAR1MemoryInfo */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetBAR1MemoryInfo),
+    /** nvmlDeviceGetBoardId */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetBoardId),
+    /** nvmlDeviceGetBoardPartNumber */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetBoardPartNumber),
+    /** nvmlDeviceGetBrand */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetBrand),
+    /** nvmlDeviceGetBridgeChipInfo */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetBridgeChipInfo),
+    /** nvmlDeviceGetClock */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetClock),
+    /** nvmlDeviceGetClockInfo */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetClockInfo),
+    /** nvmlDeviceGetComputeMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetComputeMode),
+    /** nvmlDeviceGetCount_v2 */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetCount_v2),
+    /** nvmlDeviceGetCpuAffinity */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetCpuAffinity),
+    /** nvmlDeviceGetCreatableVgpus */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetCreatableVgpus),
+    /** nvmlDeviceGetCudaComputeCapability */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetCudaComputeCapability),
+    /** nvmlDeviceGetCurrentClocksThrottleReasons */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetCurrentClocksThrottleReasons),
+    /** nvmlDeviceGetCurrPcieLinkGeneration */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetCurrPcieLinkGeneration),
+    /** nvmlDeviceGetCurrPcieLinkWidth */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetCurrPcieLinkWidth),
+    /** nvmlDeviceGetDecoderUtilization */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetDecoderUtilization),
+    /** nvmlDeviceGetDefaultApplicationsClock */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetDefaultApplicationsClock),
+    /** nvmlDeviceGetDetailedEccErrors */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetDetailedEccErrors),
+    /** nvmlDeviceGetDisplayActive */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetDisplayActive),
+    /** nvmlDeviceGetDisplayMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetDisplayMode),
+    /** nvmlDeviceGetDriverModel */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetDriverModel),
+    /** nvmlDeviceGetEccMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetEccMode),
+    /** nvmlDeviceGetEncoderCapacity */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetEncoderCapacity),
+    /** nvmlDeviceGetEncoderSessions */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetEncoderSessions),
+    /** nvmlDeviceGetEncoderStats */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetEncoderStats),
+    /** nvmlDeviceGetEncoderUtilization */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetEncoderUtilization),
+    /** nvmlDeviceGetEnforcedPowerLimit */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetEnforcedPowerLimit),
+    /** nvmlDeviceGetFanSpeed */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetFanSpeed),
+    /** nvmlDeviceGetFanSpeed_v2 */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetFanSpeed_v2),
+    /** nvmlDeviceGetFieldValues */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetFieldValues),
+    /** nvmlDeviceGetGpuOperationMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuOperationMode),
+    /** nvmlDeviceGetGraphicsRunningProcesses */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetGraphicsRunningProcesses),
+    /** nvmlDeviceGetGridLicensableFeatures */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetGridLicensableFeatures),
+    /** nvmlDeviceGetHandleByIndex_v2 */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByIndex_v2),
+    /** nvmlDeviceGetHandleByPciBusId */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByPciBusId),
+    /** nvmlDeviceGetHandleByPciBusId_v2 */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByPciBusId_v2),
+    /** nvmlDeviceGetHandleBySerial */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleBySerial),
+    /** nvmlDeviceGetHandleByUUID */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetHandleByUUID),
+    /** nvmlDeviceGetIndex */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetIndex),
+    /** nvmlDeviceGetInforomConfigurationChecksum */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetInforomConfigurationChecksum),
+    /** nvmlDeviceGetInforomImageVersion */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetInforomImageVersion),
+    /** nvmlDeviceGetInforomVersion */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetInforomVersion),
+    /** nvmlDeviceGetMaxClockInfo */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxClockInfo),
+    /** nvmlDeviceGetMaxCustomerBoostClock */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxCustomerBoostClock),
+    /** nvmlDeviceGetMaxPcieLinkGeneration */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxPcieLinkGeneration),
+    /** nvmlDeviceGetMaxPcieLinkWidth */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxPcieLinkWidth),
+    /** nvmlDeviceGetMemoryErrorCounter */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetMemoryErrorCounter),
+    /** nvmlDeviceGetMemoryInfo */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetMemoryInfo),
+    /** nvmlDeviceGetMemoryInfo_v2 */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetMemoryInfo_v2),
+    /** nvmlDeviceGetMinorNumber */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetMinorNumber),
+    /** nvmlDeviceGetMPSComputeRunningProcesses */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetMPSComputeRunningProcesses),
+    /** nvmlDeviceGetMultiGpuBoard */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetMultiGpuBoard),
+    /** nvmlDeviceGetName */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetName),
+    /** nvmlDeviceGetNvLinkCapability */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkCapability),
+    /** nvmlDeviceGetNvLinkErrorCounter */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkErrorCounter),
+    /** nvmlDeviceGetNvLinkRemotePciInfo */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkRemotePciInfo),
+    /** nvmlDeviceGetNvLinkRemotePciInfo_v2 */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkRemotePciInfo_v2),
+    /** nvmlDeviceGetNvLinkState */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkState),
+    /** nvmlDeviceGetNvLinkUtilizationControl */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkUtilizationControl),
+    /** nvmlDeviceGetNvLinkUtilizationCounter */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkUtilizationCounter),
+    /** nvmlDeviceGetNvLinkVersion */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetNvLinkVersion),
+    /** nvmlDeviceGetP2PStatus */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetP2PStatus),
+    /** nvmlDeviceGetPcieReplayCounter */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetPcieReplayCounter),
+    /** nvmlDeviceGetPcieThroughput */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetPcieThroughput),
+    /** nvmlDeviceGetPciInfo_v2 */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetPciInfo_v2),
+    /** nvmlDeviceGetPciInfo_v3 */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetPciInfo_v3),
+    /** nvmlDeviceGetPerformanceState */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetPerformanceState),
+    /** nvmlDeviceGetPersistenceMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetPersistenceMode),
+    /** nvmlDeviceGetPowerManagementDefaultLimit */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerManagementDefaultLimit),
+    /** nvmlDeviceGetPowerManagementLimit */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerManagementLimit),
+    /** nvmlDeviceGetPowerManagementLimitConstraints */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerManagementLimitConstraints),
+    /** nvmlDeviceGetPowerManagementMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerManagementMode),
+    /** nvmlDeviceGetPowerState */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerState),
+    /** nvmlDeviceGetPowerUsage */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetPowerUsage),
+    /** nvmlDeviceGetRetiredPages */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetRetiredPages),
+    /** nvmlDeviceGetRetiredPagesPendingStatus */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetRetiredPagesPendingStatus),
+    /** nvmlDeviceGetSamples */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetSamples),
+    /** nvmlDeviceGetSerial */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetSerial),
+    /** nvmlDeviceGetSupportedClocksThrottleReasons */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedClocksThrottleReasons),
+    /** nvmlDeviceGetSupportedEventTypes */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedEventTypes),
+    /** nvmlDeviceGetSupportedGraphicsClocks */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedGraphicsClocks),
+    /** nvmlDeviceGetSupportedMemoryClocks */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedMemoryClocks),
+    /** nvmlDeviceGetSupportedVgpus */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetSupportedVgpus),
+    /** nvmlDeviceGetTemperature */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetTemperature),
+    /** nvmlDeviceGetTemperatureThreshold */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetTemperatureThreshold),
+    /** nvmlDeviceGetTopologyCommonAncestor */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetTopologyCommonAncestor),
+    /** nvmlDeviceGetTopologyNearestGpus */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetTopologyNearestGpus),
+    /** nvmlDeviceGetTotalEccErrors */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetTotalEccErrors),
+    /** nvmlDeviceGetTotalEnergyConsumption */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetTotalEnergyConsumption),
+    /** nvmlDeviceGetUtilizationRates */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetUtilizationRates),
+    /** nvmlDeviceGetUUID */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetUUID),
+    /** nvmlDeviceGetVbiosVersion */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetVbiosVersion),
+    /** nvmlDeviceGetVgpuMetadata */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetVgpuMetadata),
+    /** nvmlDeviceGetVgpuProcessUtilization */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetVgpuProcessUtilization),
+    /** nvmlDeviceGetVgpuUtilization */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetVgpuUtilization),
+    /** nvmlDeviceGetViolationStatus */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetViolationStatus),
+    /** nvmlDeviceGetVirtualizationMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetVirtualizationMode),
+    /** nvmlDeviceModifyDrainState */
+    NVML_OVERRIDE_ENUM(nvmlDeviceModifyDrainState),
+    /** nvmlDeviceOnSameBoard */
+    NVML_OVERRIDE_ENUM(nvmlDeviceOnSameBoard),
+    /** nvmlDeviceQueryDrainState */
+    NVML_OVERRIDE_ENUM(nvmlDeviceQueryDrainState),
+    /** nvmlDeviceRegisterEvents */
+    NVML_OVERRIDE_ENUM(nvmlDeviceRegisterEvents),
+    /** nvmlDeviceRemoveGpu */
+    NVML_OVERRIDE_ENUM(nvmlDeviceRemoveGpu),
+    /** nvmlDeviceRemoveGpu_v2 */
+    NVML_OVERRIDE_ENUM(nvmlDeviceRemoveGpu_v2),
+    /** nvmlDeviceResetApplicationsClocks */
+    NVML_OVERRIDE_ENUM(nvmlDeviceResetApplicationsClocks),
+    /** nvmlDeviceResetNvLinkErrorCounters */
+    NVML_OVERRIDE_ENUM(nvmlDeviceResetNvLinkErrorCounters),
+    /** nvmlDeviceResetNvLinkUtilizationCounter */
+    NVML_OVERRIDE_ENUM(nvmlDeviceResetNvLinkUtilizationCounter),
+    /** nvmlDeviceSetAccountingMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetAccountingMode),
+    /** nvmlDeviceSetAPIRestriction */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetAPIRestriction),
+    /** nvmlDeviceSetApplicationsClocks */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetApplicationsClocks),
+    /** nvmlDeviceSetAutoBoostedClocksEnabled */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetAutoBoostedClocksEnabled),
+    /** nvmlDeviceSetComputeMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetComputeMode),
+    /** nvmlDeviceSetCpuAffinity */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetCpuAffinity),
+    /** nvmlDeviceSetDefaultAutoBoostedClocksEnabled */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetDefaultAutoBoostedClocksEnabled),
+    /** nvmlDeviceSetDriverModel */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetDriverModel),
+    /** nvmlDeviceSetEccMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetEccMode),
+    /** nvmlDeviceSetGpuOperationMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetGpuOperationMode),
+    /** nvmlDeviceSetNvLinkUtilizationControl */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetNvLinkUtilizationControl),
+    /** nvmlDeviceSetPersistenceMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetPersistenceMode),
+    /** nvmlDeviceSetPowerManagementLimit */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetPowerManagementLimit),
+    /** nvmlDeviceSetVirtualizationMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetVirtualizationMode),
+    /** nvmlDeviceValidateInforom */
+    NVML_OVERRIDE_ENUM(nvmlDeviceValidateInforom),
+    /** nvmlEventSetCreate */
+    NVML_OVERRIDE_ENUM(nvmlEventSetCreate),
+    /** nvmlEventSetFree */
+    NVML_OVERRIDE_ENUM(nvmlEventSetFree),
+    /** nvmlEventSetWait */
+    NVML_OVERRIDE_ENUM(nvmlEventSetWait),
+    /** nvmlGetVgpuCompatibility */
+    NVML_OVERRIDE_ENUM(nvmlGetVgpuCompatibility),
+    /** nvmlInit_v2 */
+    NVML_OVERRIDE_ENUM(nvmlInit_v2),
+    /** nvmlInitWithFlags */
+    NVML_OVERRIDE_ENUM(nvmlInitWithFlags),
+    /** nvmlInternalGetExportTable */
+    NVML_OVERRIDE_ENUM(nvmlInternalGetExportTable),
+    /** nvmlSystemGetCudaDriverVersion */
+    NVML_OVERRIDE_ENUM(nvmlSystemGetCudaDriverVersion),
+    /** nvmlSystemGetCudaDriverVersion_v2 */
+    NVML_OVERRIDE_ENUM(nvmlSystemGetCudaDriverVersion_v2),
+    /** nvmlSystemGetDriverVersion */
+    NVML_OVERRIDE_ENUM(nvmlSystemGetDriverVersion),
+    /** nvmlSystemGetHicVersion */
+    NVML_OVERRIDE_ENUM(nvmlSystemGetHicVersion),
+    /** nvmlSystemGetNVMLVersion */
+    NVML_OVERRIDE_ENUM(nvmlSystemGetNVMLVersion),
+    /** nvmlSystemGetProcessName */
+    NVML_OVERRIDE_ENUM(nvmlSystemGetProcessName),
+    /** nvmlSystemGetTopologyGpuSet */
+    NVML_OVERRIDE_ENUM(nvmlSystemGetTopologyGpuSet),
+    /** nvmlUnitGetCount */
+    NVML_OVERRIDE_ENUM(nvmlUnitGetCount),
+    /** nvmlUnitGetDevices */
+    NVML_OVERRIDE_ENUM(nvmlUnitGetDevices),
+    /** nvmlUnitGetFanSpeedInfo */
+    NVML_OVERRIDE_ENUM(nvmlUnitGetFanSpeedInfo),
+    /** nvmlUnitGetHandleByIndex */
+    NVML_OVERRIDE_ENUM(nvmlUnitGetHandleByIndex),
+    /** nvmlUnitGetLedState */
+    NVML_OVERRIDE_ENUM(nvmlUnitGetLedState),
+    /** nvmlUnitGetPsuInfo */
+    NVML_OVERRIDE_ENUM(nvmlUnitGetPsuInfo),
+    /** nvmlUnitGetTemperature */
+    NVML_OVERRIDE_ENUM(nvmlUnitGetTemperature),
+    /** nvmlUnitGetUnitInfo */
+    NVML_OVERRIDE_ENUM(nvmlUnitGetUnitInfo),
+    /** nvmlUnitSetLedState */
+    NVML_OVERRIDE_ENUM(nvmlUnitSetLedState),
+    /** nvmlVgpuInstanceGetEncoderCapacity */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetEncoderCapacity),
+    /** nvmlVgpuInstanceGetEncoderSessions */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetEncoderSessions),
+    /** nvmlVgpuInstanceGetEncoderStats */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetEncoderStats),
+    /** nvmlVgpuInstanceGetFbUsage */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetFbUsage),
+    /** nvmlVgpuInstanceGetFrameRateLimit */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetFrameRateLimit),
+    /** nvmlVgpuInstanceGetLicenseStatus */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetLicenseStatus),
+    /** nvmlVgpuInstanceGetMetadata */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetMetadata),
+    /** nvmlVgpuInstanceGetType */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetType),
+    /** nvmlVgpuInstanceGetUUID */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetUUID),
+    /** nvmlVgpuInstanceGetVmDriverVersion */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetVmDriverVersion),
+    /** nvmlVgpuInstanceGetVmID */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetVmID),
+    /** nvmlVgpuInstanceSetEncoderCapacity */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceSetEncoderCapacity),
+    /** nvmlVgpuTypeGetClass */
+    NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetClass),
+    /** nvmlVgpuTypeGetDeviceID */
+    NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetDeviceID),
+    /** nvmlVgpuTypeGetFramebufferSize */
+    NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetFramebufferSize),
+    /** nvmlVgpuTypeGetFrameRateLimit */
+    NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetFrameRateLimit),
+    /** nvmlVgpuTypeGetLicense */
+    NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetLicense),
+    /** nvmlVgpuTypeGetMaxInstances */
+    NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetMaxInstances),
+    /** nvmlVgpuTypeGetName */
+    NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetName),
+    /** nvmlVgpuTypeGetNumDisplayHeads */
+    NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetNumDisplayHeads),
+    /** nvmlVgpuTypeGetResolution */
+    NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetResolution),
+    /** nvmlDeviceGetFBCSessions */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetFBCSessions),
+    /** nvmlDeviceGetFBCStats */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetFBCStats),
+    /** nvmlDeviceGetGridLicensableFeatures_v2 */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetGridLicensableFeatures_v2),
+    /** nvmlDeviceGetRetiredPages_v2 */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetRetiredPages_v2),
+    /** nvmlDeviceResetGpuLockedClocks */
+    NVML_OVERRIDE_ENUM(nvmlDeviceResetGpuLockedClocks),
+    /** nvmlDeviceSetGpuLockedClocks */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetGpuLockedClocks),
+    /** nvmlGetBlacklistDeviceCount */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetAccountingMode),
+    /** nvmlVgpuInstanceGetAccountingPids */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetAccountingPids),
+    /** nvmlVgpuInstanceGetAccountingStats */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetAccountingStats),
+    /** nvmlVgpuInstanceGetFBCSessions */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetFBCSessions),
+    /** nvmlVgpuInstanceGetFBCStats */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetFBCStats),
+    /** nvmlVgpuTypeGetMaxInstancesPerVm */
+    NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetMaxInstancesPerVm),
+    /** nvmlGetVgpuVersion */
+    NVML_OVERRIDE_ENUM(nvmlGetVgpuVersion),
+    /** nvmlSetVgpuVersion */
+    NVML_OVERRIDE_ENUM(nvmlSetVgpuVersion),
+    /** nvmlDeviceGetGridLicensableFeatures_v3 */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetGridLicensableFeatures_v3),
+    /** nvmlDeviceGetHostVgpuMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetHostVgpuMode),
+    /** nvmlDeviceGetPgpuMetadataString */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetPgpuMetadataString),
+    /** nvmlVgpuInstanceGetEccMode */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetEccMode),
+    /** nvmlComputeInstanceDestroy */
+    NVML_OVERRIDE_ENUM(nvmlComputeInstanceDestroy),
+    /** nvmlComputeInstanceGetInfo */
+    NVML_OVERRIDE_ENUM(nvmlComputeInstanceGetInfo),
+    /** nvmlDeviceCreateGpuInstance */
+    NVML_OVERRIDE_ENUM(nvmlDeviceCreateGpuInstance),
+    /** nvmlDeviceGetArchitecture */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetArchitecture),
+    /** nvmlDeviceGetAttributes */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetAttributes),
+    /** nvmlDeviceGetAttributes_v2 */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetAttributes_v2),
+    /** nvmlDeviceGetComputeInstanceId */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetComputeInstanceId),
+    /** nvmlDeviceGetCpuAffinityWithinScope */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetCpuAffinityWithinScope),
+    /** nvmlDeviceGetDeviceHandleFromMigDeviceHandle */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetDeviceHandleFromMigDeviceHandle),
+    /** nvmlDeviceGetGpuInstanceById */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstanceById),
+    /** nvmlDeviceGetGpuInstanceId */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstanceId),
+    /** nvmlDeviceGetGpuInstancePossiblePlacements */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstancePossiblePlacements),
+    /** nvmlDeviceGetGpuInstanceProfileInfo */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstanceProfileInfo),
+    /** nvmlDeviceGetGpuInstanceRemainingCapacity */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstanceRemainingCapacity),
+    /** nvmlDeviceGetGpuInstances */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetGpuInstances),
+    /** nvmlDeviceGetMaxMigDeviceCount */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetMaxMigDeviceCount),
+    /** nvmlDeviceGetMemoryAffinity */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetMemoryAffinity),
+    /** nvmlDeviceGetMigDeviceHandleByIndex */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetMigDeviceHandleByIndex),
+    /** nvmlDeviceGetMigMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetMigMode),
+    /** nvmlDeviceGetRemappedRows */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetRemappedRows),
+    /** nvmlDeviceGetRowRemapperHistogram */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetRowRemapperHistogram),
+    /** nvmlDeviceIsMigDeviceHandle */
+    NVML_OVERRIDE_ENUM(nvmlDeviceIsMigDeviceHandle),
+    /** nvmlDeviceSetMigMode */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetMigMode),
+    /** nvmlEventSetWait_v2 */
+    NVML_OVERRIDE_ENUM(nvmlEventSetWait_v2),
+    /** nvmlGpuInstanceCreateComputeInstance */
+    NVML_OVERRIDE_ENUM(nvmlGpuInstanceCreateComputeInstance),
+    /** nvmlGpuInstanceDestroy */
+    NVML_OVERRIDE_ENUM(nvmlGpuInstanceDestroy),
+    /** nvmlGpuInstanceGetComputeInstanceById */
+    NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetComputeInstanceById),
+    /** nvmlGpuInstanceGetComputeInstanceProfileInfo */
+    NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetComputeInstanceProfileInfo),
+    /** nvmlGpuInstanceGetComputeInstanceRemainingCapacity */
+    NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetComputeInstanceRemainingCapacity),
+    /** nvmlGpuInstanceGetComputeInstances */
+    NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetComputeInstances),
+    /** nvmlGpuInstanceGetInfo */
+    NVML_OVERRIDE_ENUM(nvmlGpuInstanceGetInfo),
+    /** nvmlVgpuInstanceClearAccountingPids */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceClearAccountingPids),
+    /** nvmlVgpuInstanceGetMdevUUID */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetMdevUUID),
+    /** nvmlComputeInstanceGetInfo_v2 */
+    NVML_OVERRIDE_ENUM(nvmlComputeInstanceGetInfo_v2),
+    /** nvmlDeviceGetComputeRunningProcesses_v2 */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetComputeRunningProcesses_v2),
+    /** nvmlDeviceGetGraphicsRunningProcesses_v2 */
+    NVML_OVERRIDE_ENUM(nvmlDeviceGetGraphicsRunningProcesses_v2),
+    /** nvmlDeviceSetTemperatureThreshold */
+    NVML_OVERRIDE_ENUM(nvmlDeviceSetTemperatureThreshold),
+    /** nvmlRetry_NvRmControl */
+    // NVML_OVERRIDE_ENUM(nvmlRetry_NvRmControl),
+    /** nvmlVgpuInstanceGetGpuInstanceId */
+    NVML_OVERRIDE_ENUM(nvmlVgpuInstanceGetGpuInstanceId),
+    /** nvmlVgpuTypeGetGpuInstanceProfileId */
+    NVML_OVERRIDE_ENUM(nvmlVgpuTypeGetGpuInstanceProfileId),
+    NVML_ENTRY_END
 } NVML_OVERRIDE_ENUM_t;
 
 #endif
\ No newline at end of file
diff --git a/src/include/libvgpu.h b/src/include/libvgpu.h
old mode 100755
new mode 100644
index e596497e..2ab8dbf1
--- a/src/include/libvgpu.h
+++ b/src/include/libvgpu.h
@@ -1,55 +1,57 @@
 #ifndef __LIBVGPU_H__
 #define __LIBVGPU_H__
 
-#include <dlfcn.h>
 #include <cuda.h>
-#include "include/nvml_prefix.h"
+#include <dlfcn.h>
 #include <nvml.h>
 #include <pthread.h>
-#include <unistd.h>
-#include <stdio.h>
 #include <signal.h>
+#include <stdio.h>
+#include <unistd.h>
 
 #include "include/log_utils.h"
+#include "include/nvml_prefix.h"
 #include "static_config.h"
-//#include "memory_limit.h"
+// #include "memory_limit.h"
 
-#define ENSURE_INITIALIZED() ensure_initialized();        \
+#define ENSURE_INITIALIZED() ensure_initialized();
 
 extern void load_cuda_libraries();
 
 #if defined(__GNUC__) && defined(__GLIBC__)
 
-#define FUNC_ATTR_VISIBLE  __attribute__((visibility("default"))) 
+#define FUNC_ATTR_VISIBLE __attribute__((visibility("default")))
 #define FUNC_PTR_TYPE(fname) __func_ptr_type_##fname
 #define FUNC_PTR_NAME(fname) __func_ptr_origin_##fname
-#define FUNC_PTR_ALIAS_ATTR(overrided)                           \
-        __attribute__((alias(#overrided), used))                 \
-        FUNC_ATTR_VISIBLE;                                       \
+#define FUNC_PTR_ALIAS_ATTR(overrided) __attribute__((alias(#overrided), used)) FUNC_ATTR_VISIBLE;
 
 #define FUNC_OVERRIDE_NAME(fname) overrided_##fname
 
 #if defined(DLSYM_HOOK_DEBUG)
-#define DLSYM_HOOK_FUNC(f)                                       \
-    if (0 == strcmp(symbol, #f)) {                               \
-        LOG_DEBUG("Detect dlsym for %s\n", #f);                  \
-        return (void*) f; }                                      \
+#define DLSYM_HOOK_FUNC(f)                      \
+    if (0 == strcmp(symbol, #f)) {              \
+        LOG_DEBUG("Detect dlsym for %s\n", #f); \
+        return (void*)f;                        \
+    }
 
-#define DLSYM_HOOK_FUNC_REPLACE(f)                               \
-    if (0 == strcmp(symbol, hacked_#f)) {                        \
-        return (void*) f; }                                      \
+#define DLSYM_HOOK_FUNC_REPLACE(f)         \
+    if (0 == strcmp(symbol, hacked_ #f)) { \
+        return (void*)f;                   \
+    }
 
-#else 
+#else
 
-#define DLSYM_HOOK_FUNC(f)                                       \
-    if (0 == strcmp(symbol, #f)) {                               \
-        return (void*) f; }                                      \
+#define DLSYM_HOOK_FUNC(f)         \
+    if (0 == strcmp(symbol, #f)) { \
+        return (void*)f;           \
+    }
 
-#define DLSYM_HOOK_FUNC_REPLACE(f)                               \
-    if (0 == strcmp(symbol, #f)) {                        \
-        return (void*) hacked_##f; }                                      \
+#define DLSYM_HOOK_FUNC_REPLACE(f) \
+    if (0 == strcmp(symbol, #f)) { \
+        return (void*)hacked_##f;  \
+    }
 
-#endif     
+#endif
 
 void* __dlsym_hook_section(void* handle, const char* symbol);
 void* __dlsym_hook_section_nvml(void* handle, const char* symbol);
@@ -62,8 +64,7 @@ typedef void* (*fp_dlsym)(void*, const char*);
 #endif
 
 /* Determine the return address.  */
-#define RETURN_ADDRESS(nr) \
-  __builtin_extract_return_addr (__builtin_return_address (nr))
+#define RETURN_ADDRESS(nr) __builtin_extract_return_addr(__builtin_return_address(nr))
 
 nvmlReturn_t set_task_pid();
 int map_cuda_visible_devices();
diff --git a/src/include/log_utils.h b/src/include/log_utils.h
old mode 100755
new mode 100644
index 630ee5a9..7dfa512f
--- a/src/include/log_utils.h
+++ b/src/include/log_utils.h
@@ -1,12 +1,12 @@
 #ifndef __LOG_UTILS_H__
 #define __LOG_UTILS_H__
 
-#include <libgen.h>
 #include <assert.h>
-#include <unistd.h>
+#include <libgen.h>
 #include <pthread.h>
-#include <stdlib.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
 
 extern FILE *fp1;
 
@@ -23,92 +23,114 @@ extern int g_log_level;
 void log_utils_init(void);
 
 #ifdef FILEDEBUG
-#define LOG_DEBUG(msg, ...) { \
-    if (g_log_level >= 4) {\
-        if (fp1==NULL) fp1 = fopen ("/tmp/vgpulog", "a"); \
-        fprintf(fp1, "[HAMI-core Debug(%d:%ld:%s:%d)]: "msg"\n",getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \
-        }\
+#define LOG_DEBUG(msg, ...)                                                                      \
+    {                                                                                            \
+        if (g_log_level >= 4) {                                                                  \
+            if (fp1 == NULL) fp1 = fopen("/tmp/vgpulog", "a");                                   \
+            fprintf(fp1, "[HAMI-core Debug(%d:%ld:%s:%d)]: " msg "\n", getpid(), pthread_self(), \
+                    basename(__FILE__), __LINE__, ##__VA_ARGS__);                                \
+        }                                                                                        \
+    }
+#define LOG_INFO(msg, ...)                                                                      \
+    {                                                                                           \
+        if (g_log_level >= 3) {                                                                 \
+            if (fp1 == NULL) fp1 = fopen("/tmp/vgpulog", "a");                                  \
+            fprintf(fp1, "[HAMI-core Info(%d:%ld:%s:%d)]: " msg "\n", getpid(), pthread_self(), \
+                    basename(__FILE__), __LINE__, ##__VA_ARGS__);                               \
+        }                                                                                       \
     }
-#define LOG_INFO(msg, ...) { \
-    if (g_log_level >= 3) {\
-        if (fp1==NULL) fp1 = fopen ("/tmp/vgpulog", "a"); \
-        fprintf(fp1, "[HAMI-core Info(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \
-         }\
+#define LOG_WARN(msg, ...)                                                                      \
+    {                                                                                           \
+        if (g_log_level >= 2) {                                                                 \
+            if (fp1 == NULL) fp1 = fopen("/tmp/vgpulog", "a");                                  \
+            fprintf(fp1, "[HAMI-core Warn(%d:%ld:%s:%d)]: " msg "\n", getpid(), pthread_self(), \
+                    basename(__FILE__), __LINE__, ##__VA_ARGS__);                               \
+        }                                                                                       \
     }
-#define LOG_WARN(msg, ...) { \
-    if (g_log_level >= 2) {\
-        if (fp1==NULL) fp1 = fopen ("/tmp/vgpulog", "a"); \
-        fprintf(fp1, "[HAMI-core Warn(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \
-        }\
+#define LOG_MSG(msg, ...)                                                                      \
+    {                                                                                          \
+        if (g_log_level >= 2) {                                                                \
+            if (fp1 == NULL) fp1 = fopen("/tmp/vgpulog", "a");                                 \
+            fprintf(fp1, "[HAMI-core Msg(%d:%ld:%s:%d)]: " msg "\n", getpid(), pthread_self(), \
+                    basename(__FILE__), __LINE__, ##__VA_ARGS__);                              \
+        }                                                                                      \
     }
-#define LOG_MSG(msg, ...) { \
-    if (g_log_level >= 2) {\
-        if (fp1==NULL) fp1 = fopen ("/tmp/vgpulog", "a"); \
-        fprintf(fp1, "[HAMI-core Msg(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \
-         }\
+#define LOG_ERROR(msg, ...)                                                              \
+    {                                                                                    \
+        if (fp1 == NULL) fp1 = fopen("/tmp/vgpulog", "a");                               \
+        fprintf(fp1, "[HAMI-core ERROR (pid:%d thread=%ld %s:%d)]: " msg "\n", getpid(), \
+                pthread_self(), basename(__FILE__), __LINE__, ##__VA_ARGS__);            \
     }
-#define LOG_ERROR(msg, ...) { \
-    if (fp1==NULL) fp1 = fopen ("/tmp/vgpulog", "a"); \
-    fprintf(fp1, "[HAMI-core ERROR (pid:%d thread=%ld %s:%d)]: "msg"\n", getpid(), pthread_self(), basename(__FILE__),__LINE__, ##__VA_ARGS__); \
-}
 #else
-#define LOG_DEBUG(msg, ...) { \
-    if (g_log_level >= 4) {\
-        fprintf(stderr, "[HAMI-core Debug(%d:%ld:%s:%d)]: "msg"\n",getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \
-         }\
+#define LOG_DEBUG(msg, ...)                                                         \
+    {                                                                               \
+        if (g_log_level >= 4) {                                                     \
+            fprintf(stderr, "[HAMI-core Debug(%d:%ld:%s:%d)]: " msg "\n", getpid(), \
+                    pthread_self(), basename(__FILE__), __LINE__, ##__VA_ARGS__);   \
+        }                                                                           \
     }
-#define LOG_INFO(msg, ...) { \
-    if (g_log_level >= 3) {\
-        fprintf(stderr, "[HAMI-core Info(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \
-        }\
+#define LOG_INFO(msg, ...)                                                                         \
+    {                                                                                              \
+        if (g_log_level >= 3) {                                                                    \
+            fprintf(stderr, "[HAMI-core Info(%d:%ld:%s:%d)]: " msg "\n", getpid(), pthread_self(), \
+                    basename(__FILE__), __LINE__, ##__VA_ARGS__);                                  \
+        }                                                                                          \
     }
-#define LOG_WARN(msg, ...) { \
-    if (g_log_level >= 2) {\
-        fprintf(stderr, "[HAMI-core Warn(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \
-        }\
+#define LOG_WARN(msg, ...)                                                                         \
+    {                                                                                              \
+        if (g_log_level >= 2) {                                                                    \
+            fprintf(stderr, "[HAMI-core Warn(%d:%ld:%s:%d)]: " msg "\n", getpid(), pthread_self(), \
+                    basename(__FILE__), __LINE__, ##__VA_ARGS__);                                  \
+        }                                                                                          \
     }
-#define LOG_MSG(msg, ...) { \
-    if (g_log_level >= 2) {\
-        fprintf(stderr, "[HAMI-core Msg(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \
-         }\
+#define LOG_MSG(msg, ...)                                                                         \
+    {                                                                                             \
+        if (g_log_level >= 2) {                                                                   \
+            fprintf(stderr, "[HAMI-core Msg(%d:%ld:%s:%d)]: " msg "\n", getpid(), pthread_self(), \
+                    basename(__FILE__), __LINE__, ##__VA_ARGS__);                                 \
+        }                                                                                         \
+    }
+#define LOG_ERROR(msg, ...)                                                                 \
+    {                                                                                       \
+        fprintf(stderr, "[HAMI-core ERROR (pid:%d thread=%ld %s:%d)]: " msg "\n", getpid(), \
+                pthread_self(), basename(__FILE__), __LINE__, ##__VA_ARGS__);               \
     }
-#define LOG_ERROR(msg, ...) { \
-    fprintf(stderr, "[HAMI-core ERROR (pid:%d thread=%ld %s:%d)]: "msg"\n", getpid(), pthread_self(), basename(__FILE__),__LINE__, ##__VA_ARGS__); \
-}
 #endif
 
-#define CHECK_DRV_API(f)  {                   \
-    CUresult status = (f);                    \
-    if (status != CUDA_SUCCESS) {             \
-        LOG_WARN("Driver error at %d: %d",   \
-            __LINE__, status);                \
-        return status;                        \
-    } }                                       \
-
-#define CHECK_NVML_API(f)  {                  \
-    nvmlReturn_t status = (f);                \
-    if (status != NVML_SUCCESS) {             \
-        LOG_WARN("NVML error at line %d: %d",    \
-            __LINE__, status);                \
-        return status;                        \
-    } }                                       \
+#define CHECK_DRV_API(f)                                          \
+    {                                                             \
+        CUresult status = (f);                                    \
+        if (status != CUDA_SUCCESS) {                             \
+            LOG_WARN("Driver error at %d: %d", __LINE__, status); \
+            return status;                                        \
+        }                                                         \
+    }
 
-#define CHECK_CU_RESULT(res)  {               \
-    if (res != CUDA_SUCCESS) {                \
-        LOG_WARN("Driver error at %d: %d",   \
-            __LINE__, res);                   \
-        return res;                           \
-    } }                                       \
+#define CHECK_NVML_API(f)                                            \
+    {                                                                \
+        nvmlReturn_t status = (f);                                   \
+        if (status != NVML_SUCCESS) {                                \
+            LOG_WARN("NVML error at line %d: %d", __LINE__, status); \
+            return status;                                           \
+        }                                                            \
+    }
 
-#define CHECK_SUCCESS(res) {                  \
-    if (res != CUDA_SUCCESS)                  \
-        return res;                           \
-}
+#define CHECK_CU_RESULT(res)                                   \
+    {                                                          \
+        if (res != CUDA_SUCCESS) {                             \
+            LOG_WARN("Driver error at %d: %d", __LINE__, res); \
+            return res;                                        \
+        }                                                      \
+    }
 
-#define IF_CHECK_OOM(res) {                   \
-    if (res < 0)                              \
-        return CUDA_ERROR_OUT_OF_MEMORY;      \
-}
+#define CHECK_SUCCESS(res)                   \
+    {                                        \
+        if (res != CUDA_SUCCESS) return res; \
+    }
 
+#define IF_CHECK_OOM(res)                             \
+    {                                                 \
+        if (res < 0) return CUDA_ERROR_OUT_OF_MEMORY; \
+    }
 
 #endif
diff --git a/src/include/memory_limit.h b/src/include/memory_limit.h
old mode 100755
new mode 100644
index f9510d28..cafe3ae1
--- a/src/include/memory_limit.h
+++ b/src/include/memory_limit.h
@@ -1,72 +1,80 @@
 #ifndef __MEMORY_LIMIT_H__
 #define __MEMORY_LIMIT_H__
 
+#include <pthread.h>
 #include <stdio.h>
-#include <string.h>
 #include <stdlib.h>
-#include <pthread.h>
+#include <string.h>
 
 #include "static_config.h"
 
-
 #define CUDA_DEVICE_MEMORY_LIMIT "CUDA_DEVICE_MEMORY_LIMIT"
 #define CUDA_DEVICE_MEMORY_LIMIT_KEY_LENGTH 32
 #define CUDA_DEVICE_SM_LIMIT "CUDA_DEVICE_SM_LIMIT"
 #define CUDA_DEVICE_SM_LIMIT_KEY_LENGTH 32
 
-#define ENSURE_INITIALIZED() ensure_initialized();        \
+#define ENSURE_INITIALIZED() ensure_initialized();
 
 extern int wait_status_self(int status);
 
-#define ENSURE_RUNNING() {                                \
-   /* LOG_DEBUG("Memory op at %d",__LINE__); */              \
-    ensure_initialized();                                 \
-    while(!wait_status_self(1)) { LOG_DEBUG("E1"); sleep(1); }             \
-}                                                         \
+#define ENSURE_RUNNING()                             \
+    {                                                \
+        /* LOG_DEBUG("Memory op at %d",__LINE__); */ \
+        ensure_initialized();                        \
+        while (!wait_status_self(1)) {               \
+            LOG_DEBUG("E1");                         \
+            sleep(1);                                \
+        }                                            \
+    }
 
-#define INC_MEMORY_OR_RETURN_ERROR(bytes) {               \
-    CUdevice dev;                                         \
-    CHECK_DRV_API(cuCtxGetDevice(&dev));                  \
-    if (inc_current_device_memory_usage(dev, bytes) !=    \
-        CUDA_DEVICE_MEMORY_UPDATE_SUCCESS) {              \
-        return CUDA_ERROR_OUT_OF_MEMORY;                  \
-    } }                                                   \
+#define INC_MEMORY_OR_RETURN_ERROR(bytes)                                                       \
+    {                                                                                           \
+        CUdevice dev;                                                                           \
+        CHECK_DRV_API(cuCtxGetDevice(&dev));                                                    \
+        if (inc_current_device_memory_usage(dev, bytes) != CUDA_DEVICE_MEMORY_UPDATE_SUCCESS) { \
+            return CUDA_ERROR_OUT_OF_MEMORY;                                                    \
+        }                                                                                       \
+    }
 
-#define DECL_MEMORY_ON_ERROR(res, bytes) {                \
-    CUdevice dev;                                         \
-    CHECK_DRV_API(cuCtxGetDevice(&dev));                  \
-    if (res != CUDA_SUCCESS) {                            \
-        decl_current_device_memory_usage(dev, bytes);     \
-    } }                                                   \
+#define DECL_MEMORY_ON_ERROR(res, bytes)                  \
+    {                                                     \
+        CUdevice dev;                                     \
+        CHECK_DRV_API(cuCtxGetDevice(&dev));              \
+        if (res != CUDA_SUCCESS) {                        \
+            decl_current_device_memory_usage(dev, bytes); \
+        }                                                 \
+    }
 
-#define DECL_MEMORY_ON_SUCCESS(res, bytes) {              \
-    CUdevice dev;                                         \
-    CHECK_DRV_API(cuCtxGetDevice(&dev));                  \
-    if (res == CUDA_SUCCESS) {                            \
-        decl_current_device_memory_usage(dev, bytes);     \
-    } }                                                   \
+#define DECL_MEMORY_ON_SUCCESS(res, bytes)                \
+    {                                                     \
+        CUdevice dev;                                     \
+        CHECK_DRV_API(cuCtxGetDevice(&dev));              \
+        if (res == CUDA_SUCCESS) {                        \
+            decl_current_device_memory_usage(dev, bytes); \
+        }                                                 \
+    }
 
-#define INC_MEMORY_OR_RETURN_ERROR_WITH_DEV(d, bytes) {   \
-    if (inc_current_device_memory_usage(d, bytes) !=      \
-        CUDA_DEVICE_MEMORY_UPDATE_SUCCESS) {              \
-        return CUDA_ERROR_OUT_OF_MEMORY;                  \
-    }                                                     \
+#define INC_MEMORY_OR_RETURN_ERROR_WITH_DEV(d, bytes)                                         \
+    {                                                                                         \
+        if (inc_current_device_memory_usage(d, bytes) != CUDA_DEVICE_MEMORY_UPDATE_SUCCESS) { \
+            return CUDA_ERROR_OUT_OF_MEMORY;                                                  \
+        }
 
-#define DECL_MEMORY_ON_ERROR_WITH_DEV(dev, res, bytes)    \
-    if (res != CUDA_SUCCESS) {                            \
-        decl_current_device_memory_usage(dev, bytes);     \
-    }                                                     \
+#define DECL_MEMORY_ON_ERROR_WITH_DEV(dev, res, bytes) \
+    if (res != CUDA_SUCCESS) {                         \
+        decl_current_device_memory_usage(dev, bytes);  \
+    }
 
-#define DECL_MEMORY_ON_SUCCESS_WITH_DEV(dev, res, bytes)  \
-    if (res == CUDA_SUCCESS) {                            \
-        decl_current_device_memory_usage(dev, bytes);     \
-    }                                                     \
-/*
-#define OOM_CHECK()                                       \
-    CUdevice dev;                                         \
-    CHECK_DRV_API(cuCtxGetDevice(&dev));                  \
-    oom_check(dev);
-*/
+#define DECL_MEMORY_ON_SUCCESS_WITH_DEV(dev, res, bytes) \
+    if (res == CUDA_SUCCESS) {                           \
+        decl_current_device_memory_usage(dev, bytes);    \
+    }                                                    \
+    /*                                                   \
+    #define OOM_CHECK()                                  \
+        CUdevice dev;                                    \
+        CHECK_DRV_API(cuCtxGetDevice(&dev));             \
+        oom_check(dev);                                  \
+    */
 
 #include "multiprocess/multiprocess_memory_limit.h"
 
diff --git a/src/include/multi_func_hook.h b/src/include/multi_func_hook.h
index af32c189..d4a30929 100644
--- a/src/include/multi_func_hook.h
+++ b/src/include/multi_func_hook.h
@@ -6,12 +6,11 @@
 
 #include <stdlib.h>
 
-
 typedef struct {
-  const char *func_name;      // base func name（like "cuGraphAddDependencies"）
-  int min_ver;    // adjust to low version
-  int max_ver;    // adjust to high version
-  const char *real_name;      // the real name（ "cuGraphAddDependencies_v2"）
+    const char *func_name;  // base func name（like "cuGraphAddDependencies"）
+    int min_ver;            // adjust to low version
+    int max_ver;            // adjust to high version
+    const char *real_name;  // the real name（ "cuGraphAddDependencies_v2"）
 } CudaFuncMapEntry;
 
 // if multi func, we can add here
@@ -25,8 +24,6 @@ static CudaFuncMapEntry g_func_map[] = {
     {"cuGraphKernelNodeGetParams", 12000, 99999, "cuGraphKernelNodeGetParams_v2"},
 
     {"cuGraphKernelNodeSetParams", 10000, 11999, "cuGraphKernelNodeSetParams"},
-    {"cuGraphKernelNodeSetParams", 12000, 99999, "cuGraphKernelNodeSetParams_v2"}
-};
-
+    {"cuGraphKernelNodeSetParams", 12000, 99999, "cuGraphKernelNodeSetParams_v2"}};
 
 #endif
diff --git a/src/include/nvml-subset.h b/src/include/nvml-subset.h
index cb0908e5..a259e9e5 100644
--- a/src/include/nvml-subset.h
+++ b/src/include/nvml-subset.h
@@ -74,88 +74,79 @@ extern "C" {
 typedef struct nvmlDevice_st *nvmlDevice_t;
 
 typedef struct nvmlProcessInfo_st {
-  unsigned int pid;                  //!< Process ID
-  unsigned long long usedGpuMemory;  //!< Amount of used GPU memory in bytes.
-  //! Under WDDM, \ref NVML_VALUE_NOT_AVAILABLE is always reported
-  //! because Windows KMD manages all the memory and not the NVIDIA driver
+    unsigned int pid;                  //!< Process ID
+    unsigned long long usedGpuMemory;  //!< Amount of used GPU memory in bytes.
+    //! Under WDDM, \ref NVML_VALUE_NOT_AVAILABLE is always reported
+    //! because Windows KMD manages all the memory and not the NVIDIA driver
 } nvmlProcessInfo_t;
 
 /**
  * Return values for NVML API calls.
  */
 typedef enum nvmlReturn_enum {
-  NVML_SUCCESS = 0,  //!< The operation was successful
-  NVML_ERROR_UNINITIALIZED =
-  1,  //!< NVML was not first initialized with nvmlInit()
-  NVML_ERROR_INVALID_ARGUMENT = 2,  //!< A supplied argument is invalid
-  NVML_ERROR_NOT_SUPPORTED =
-  3,  //!< The requested operation is not available on target device
-  NVML_ERROR_NO_PERMISSION =
-  4,  //!< The current user does not have permission for operation
-  NVML_ERROR_ALREADY_INITIALIZED = 5,  //!< Deprecated: Multiple initializations
-  //! are now allowed through ref counting
-  NVML_ERROR_NOT_FOUND = 6,  //!< A query to find an object was unsuccessful
-  NVML_ERROR_INSUFFICIENT_SIZE = 7,  //!< An input argument is not large enough
-  NVML_ERROR_INSUFFICIENT_POWER =
-  8,  //!< A device's external power cables are not properly attached
-  NVML_ERROR_DRIVER_NOT_LOADED = 9,  //!< NVIDIA driver is not loaded
-  NVML_ERROR_TIMEOUT = 10,           //!< User provided timeout passed
-  NVML_ERROR_IRQ_ISSUE =
-  11,  //!< NVIDIA Kernel detected an interrupt issue with a GPU
-  NVML_ERROR_LIBRARY_NOT_FOUND =
-  12,  //!< NVML Shared Library couldn't be found or loaded
-  NVML_ERROR_FUNCTION_NOT_FOUND =
-  13,  //!< Local version of NVML doesn't implement this function
-  NVML_ERROR_CORRUPTED_INFOROM = 14,  //!< infoROM is corrupted
-  NVML_ERROR_GPU_IS_LOST = 15,        //!< The GPU has fallen off the bus or has
-  //! otherwise become inaccessible
-  NVML_ERROR_RESET_REQUIRED =
-  16,  //!< The GPU requires a reset before it can be used again
-  NVML_ERROR_OPERATING_SYSTEM = 17,  //!< The GPU control device has been
-  //! blocked by the operating system/cgroups
-  NVML_ERROR_LIB_RM_VERSION_MISMATCH =
-  18,                  //!< RM detects a driver/library version mismatch
-  NVML_ERROR_IN_USE = 19,  //!< An operation cannot be performed because the GPU
-  //! is currently in use
-  NVML_ERROR_NO_DATA = 20,  //!< No data
-  NVML_ERROR_UNKNOWN = 999  //!< An internal driver error occurred
+    NVML_SUCCESS = 0,                 //!< The operation was successful
+    NVML_ERROR_UNINITIALIZED = 1,     //!< NVML was not first initialized with nvmlInit()
+    NVML_ERROR_INVALID_ARGUMENT = 2,  //!< A supplied argument is invalid
+    NVML_ERROR_NOT_SUPPORTED = 3,     //!< The requested operation is not available on target device
+    NVML_ERROR_NO_PERMISSION = 4,     //!< The current user does not have permission for operation
+    NVML_ERROR_ALREADY_INITIALIZED = 5,  //!< Deprecated: Multiple initializations
+    //! are now allowed through ref counting
+    NVML_ERROR_NOT_FOUND = 6,          //!< A query to find an object was unsuccessful
+    NVML_ERROR_INSUFFICIENT_SIZE = 7,  //!< An input argument is not large enough
+    NVML_ERROR_INSUFFICIENT_POWER =
+        8,  //!< A device's external power cables are not properly attached
+    NVML_ERROR_DRIVER_NOT_LOADED = 9,    //!< NVIDIA driver is not loaded
+    NVML_ERROR_TIMEOUT = 10,             //!< User provided timeout passed
+    NVML_ERROR_IRQ_ISSUE = 11,           //!< NVIDIA Kernel detected an interrupt issue with a GPU
+    NVML_ERROR_LIBRARY_NOT_FOUND = 12,   //!< NVML Shared Library couldn't be found or loaded
+    NVML_ERROR_FUNCTION_NOT_FOUND = 13,  //!< Local version of NVML doesn't implement this function
+    NVML_ERROR_CORRUPTED_INFOROM = 14,   //!< infoROM is corrupted
+    NVML_ERROR_GPU_IS_LOST = 15,         //!< The GPU has fallen off the bus or has
+    //! otherwise become inaccessible
+    NVML_ERROR_RESET_REQUIRED = 16,    //!< The GPU requires a reset before it can be used again
+    NVML_ERROR_OPERATING_SYSTEM = 17,  //!< The GPU control device has been
+    //! blocked by the operating system/cgroups
+    NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18,  //!< RM detects a driver/library version mismatch
+    NVML_ERROR_IN_USE = 19,                   //!< An operation cannot be performed because the GPU
+    //! is currently in use
+    NVML_ERROR_NO_DATA = 20,  //!< No data
+    NVML_ERROR_UNKNOWN = 999  //!< An internal driver error occurred
 } nvmlReturn_t;
 
 /**
  * PCI information about a GPU device.
  */
 typedef struct nvmlPciInfo_st {
-  char busId[NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE];  //!< The tuple
-  //!< domain:bus:device.function
-  //!< PCI identifier (&amp;
-  //!< NULL terminator)
-  unsigned int domain;  //!< The PCI domain on which the device's bus resides, 0
-  //!< to 0xffff
-  unsigned int bus;     //!< The bus on which the device resides, 0 to 0xff
-  unsigned int device;  //!< The device's id on the bus, 0 to 31
-  unsigned int
-      pciDeviceId;  //!< The combined 16-bit device id and 16-bit vendor id
-
-  // Added in NVML 2.285 API
-  unsigned int pciSubSystemId;  //!< The 32-bit Sub System Device ID
-
-  // NVIDIA reserved for internal use only
-  unsigned int reserved0;
-  unsigned int reserved1;
-  unsigned int reserved2;
-  unsigned int reserved3;
+    char busId[NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE];  //!< The tuple
+    //!< domain:bus:device.function
+    //!< PCI identifier (&amp;
+    //!< NULL terminator)
+    unsigned int domain;  //!< The PCI domain on which the device's bus resides, 0
+    //!< to 0xffff
+    unsigned int bus;          //!< The bus on which the device resides, 0 to 0xff
+    unsigned int device;       //!< The device's id on the bus, 0 to 31
+    unsigned int pciDeviceId;  //!< The combined 16-bit device id and 16-bit vendor id
+
+    // Added in NVML 2.285 API
+    unsigned int pciSubSystemId;  //!< The 32-bit Sub System Device ID
+
+    // NVIDIA reserved for internal use only
+    unsigned int reserved0;
+    unsigned int reserved1;
+    unsigned int reserved2;
+    unsigned int reserved3;
 } nvmlPciInfo_t;
 
 /**
  * Structure to store utilization value and process Id
  */
 typedef struct nvmlProcessUtilizationSample_st {
-  unsigned int pid;              //!< PID of process
-  unsigned long long timeStamp;  //!< CPU Timestamp in microseconds
-  unsigned int smUtil;           //!< SM (3D/Compute) Util Value
-  unsigned int memUtil;          //!< Frame Buffer Memory Util Value
-  unsigned int encUtil;          //!< Encoder Util Value
-  unsigned int decUtil;          //!< Decoder Util Value
+    unsigned int pid;              //!< PID of process
+    unsigned long long timeStamp;  //!< CPU Timestamp in microseconds
+    unsigned int smUtil;           //!< SM (3D/Compute) Util Value
+    unsigned int memUtil;          //!< Frame Buffer Memory Util Value
+    unsigned int encUtil;          //!< Encoder Util Value
+    unsigned int decUtil;          //!< Decoder Util Value
 } nvmlProcessUtilizationSample_t;
 
 /**
@@ -168,57 +159,55 @@ typedef struct nvmlProcessUtilizationSample_st {
  * behavior. If not, volatile counts are reset each time a compute app is run.
  */
 typedef enum nvmlEccCounterType_enum {
-  NVML_VOLATILE_ECC =
-  0,  //!< Volatile counts are reset each time the driver loads.
-  NVML_AGGREGATE_ECC = 1,  //!< Aggregate counts persist across reboots (i.e.
-  //!< for the lifetime of the device)
+    NVML_VOLATILE_ECC = 0,   //!< Volatile counts are reset each time the driver loads.
+    NVML_AGGREGATE_ECC = 1,  //!< Aggregate counts persist across reboots (i.e.
+    //!< for the lifetime of the device)
 
-  // Keep this last
-  NVML_ECC_COUNTER_TYPE_COUNT  //!< Count of memory counter types
+    // Keep this last
+    NVML_ECC_COUNTER_TYPE_COUNT  //!< Count of memory counter types
 } nvmlEccCounterType_t;
 
 /**
  * Generic enable/disable enum.
  */
 typedef enum nvmlEnableState_enum {
-  NVML_FEATURE_DISABLED = 0,  //!< Feature disabled
-  NVML_FEATURE_ENABLED = 1    //!< Feature enabled
+    NVML_FEATURE_DISABLED = 0,  //!< Feature disabled
+    NVML_FEATURE_ENABLED = 1    //!< Feature enabled
 } nvmlEnableState_t;
 
 /**
  * Describes accounting statistics of a process.
  */
 typedef struct nvmlAccountingStats_st {
-  unsigned int
-      gpuUtilization;  //!< Percent of time over the process's lifetime during
-  //!< which one or more kernels was executing on the GPU.
-  //! Utilization stats just like returned by \ref nvmlDeviceGetUtilizationRates
-  //! but for the life time of a process (not just the last sample period). Set
-  //! to NVML_VALUE_NOT_AVAILABLE if nvmlDeviceGetUtilizationRates is not
-  //! supported
-
-  unsigned int memoryUtilization;  //!< Percent of time over the process's
-  //!< lifetime during which global (device)
-  //!< memory was being read or written.
-  //! Set to NVML_VALUE_NOT_AVAILABLE if nvmlDeviceGetUtilizationRates is not
-  //! supported
-
-  unsigned long long maxMemoryUsage;  //!< Maximum total memory in bytes that
-  //!< was ever allocated by the process.
-  //! Set to NVML_VALUE_NOT_AVAILABLE if nvmlProcessInfo_t->usedGpuMemory is not
-  //! supported
-
-  unsigned long long time;  //!< Amount of time in ms during which the compute
-  //!< context was active. The time is reported as 0 if
-  //!< the process is not terminated
-
-  unsigned long long startTime;  //!< CPU Timestamp in usec representing start
-  //!< time for the process
-
-  unsigned int isRunning;  //!< Flag to represent if the process is running (1
-  //!< for running, 0 for terminated)
-
-  unsigned int reserved[5];  //!< Reserved for future use
+    unsigned int gpuUtilization;  //!< Percent of time over the process's lifetime during
+    //!< which one or more kernels was executing on the GPU.
+    //! Utilization stats just like returned by \ref nvmlDeviceGetUtilizationRates
+    //! but for the life time of a process (not just the last sample period). Set
+    //! to NVML_VALUE_NOT_AVAILABLE if nvmlDeviceGetUtilizationRates is not
+    //! supported
+
+    unsigned int memoryUtilization;  //!< Percent of time over the process's
+    //!< lifetime during which global (device)
+    //!< memory was being read or written.
+    //! Set to NVML_VALUE_NOT_AVAILABLE if nvmlDeviceGetUtilizationRates is not
+    //! supported
+
+    unsigned long long maxMemoryUsage;  //!< Maximum total memory in bytes that
+    //!< was ever allocated by the process.
+    //! Set to NVML_VALUE_NOT_AVAILABLE if nvmlProcessInfo_t->usedGpuMemory is not
+    //! supported
+
+    unsigned long long time;  //!< Amount of time in ms during which the compute
+    //!< context was active. The time is reported as 0 if
+    //!< the process is not terminated
+
+    unsigned long long startTime;  //!< CPU Timestamp in usec representing start
+    //!< time for the process
+
+    unsigned int isRunning;  //!< Flag to represent if the process is running (1
+    //!< for running, 0 for terminated)
+
+    unsigned int reserved[5];  //!< Reserved for future use
 } nvmlAccountingStats_t;
 
 typedef unsigned int nvmlVgpuInstance_t;
@@ -227,15 +216,14 @@ typedef unsigned int nvmlVgpuInstance_t;
  * API types that allow changes to default permission restrictions
  */
 typedef enum nvmlRestrictedAPI_enum {
-  NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS =
-  0,  //!< APIs that change application clocks, see
-  //!< nvmlDeviceSetApplicationsClocks
-  //!< and see nvmlDeviceResetApplicationsClocks
-  NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS =
-  1,  //!< APIs that enable/disable Auto Boosted clocks
-  //!< see nvmlDeviceSetAutoBoostedClocksEnabled
-  // Keep this last
-  NVML_RESTRICTED_API_COUNT
+    NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS = 0,  //!< APIs that change application clocks, see
+    //!< nvmlDeviceSetApplicationsClocks
+    //!< and see nvmlDeviceResetApplicationsClocks
+    NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS =
+        1,  //!< APIs that enable/disable Auto Boosted clocks
+    //!< see nvmlDeviceSetAutoBoostedClocksEnabled
+    // Keep this last
+    NVML_RESTRICTED_API_COUNT
 } nvmlRestrictedAPI_t;
 
 /**
@@ -244,54 +232,54 @@ typedef enum nvmlRestrictedAPI_enum {
  * All speeds are in Mhz.
  */
 typedef enum nvmlClockType_enum {
-  NVML_CLOCK_GRAPHICS = 0,  //!< Graphics clock domain
-  NVML_CLOCK_SM = 1,        //!< SM clock domain
-  NVML_CLOCK_MEM = 2,       //!< Memory clock domain
-  NVML_CLOCK_VIDEO = 3,     //!< Video encoder/decoder clock domain
+    NVML_CLOCK_GRAPHICS = 0,  //!< Graphics clock domain
+    NVML_CLOCK_SM = 1,        //!< SM clock domain
+    NVML_CLOCK_MEM = 2,       //!< Memory clock domain
+    NVML_CLOCK_VIDEO = 3,     //!< Video encoder/decoder clock domain
 
-  // Keep this last
-  NVML_CLOCK_COUNT  //<! Count of clock types
+    // Keep this last
+    NVML_CLOCK_COUNT  //<! Count of clock types
 } nvmlClockType_t;
 
 /**
  * BAR1 Memory allocation Information for a device
  */
 typedef struct nvmlBAR1Memory_st {
-  unsigned long long bar1Total;  //!< Total BAR1 Memory (in bytes)
-  unsigned long long bar1Free;   //!< Unallocated BAR1 Memory (in bytes)
-  unsigned long long bar1Used;   //!< Allocated Used Memory (in bytes)
+    unsigned long long bar1Total;  //!< Total BAR1 Memory (in bytes)
+    unsigned long long bar1Free;   //!< Unallocated BAR1 Memory (in bytes)
+    unsigned long long bar1Used;   //!< Allocated Used Memory (in bytes)
 } nvmlBAR1Memory_t;
 
 /**
  *  * The Brand of the GPU
  *   */
 typedef enum nvmlBrandType_enum {
-  NVML_BRAND_UNKNOWN = 0,
-  NVML_BRAND_QUADRO = 1,
-  NVML_BRAND_TESLA = 2,
-  NVML_BRAND_NVS = 3,
-  NVML_BRAND_GRID = 4,
-  NVML_BRAND_GEFORCE = 5,
-  NVML_BRAND_TITAN = 6,
-
-  // Keep this last
-  NVML_BRAND_COUNT
+    NVML_BRAND_UNKNOWN = 0,
+    NVML_BRAND_QUADRO = 1,
+    NVML_BRAND_TESLA = 2,
+    NVML_BRAND_NVS = 3,
+    NVML_BRAND_GRID = 4,
+    NVML_BRAND_GEFORCE = 5,
+    NVML_BRAND_TITAN = 6,
+
+    // Keep this last
+    NVML_BRAND_COUNT
 } nvmlBrandType_t;
 
 /**
  * Enum to represent type of bridge chip
  */
 typedef enum nvmlBridgeChipType_enum {
-  NVML_BRIDGE_CHIP_PLX = 0,
-  NVML_BRIDGE_CHIP_BRO4 = 1
+    NVML_BRIDGE_CHIP_PLX = 0,
+    NVML_BRIDGE_CHIP_BRO4 = 1
 } nvmlBridgeChipType_t;
 
 /**
  * Information about the Bridge Chip Firmware
  */
 typedef struct nvmlBridgeChipInfo_st {
-  nvmlBridgeChipType_t type;  //!< Type of Bridge Chip
-  unsigned int fwVersion;     //!< Firmware Version. 0=Version is unavailable
+    nvmlBridgeChipType_t type;  //!< Type of Bridge Chip
+    unsigned int fwVersion;     //!< Firmware Version. 0=Version is unavailable
 } nvmlBridgeChipInfo_t;
 
 /**
@@ -300,10 +288,9 @@ typedef struct nvmlBridgeChipInfo_st {
  * immediate bridge is at index 1 and so forth.
  */
 typedef struct nvmlBridgeChipHierarchy_st {
-  unsigned char bridgeCount;  //!< Number of Bridge Chips on the Board
-  nvmlBridgeChipInfo_t
-      bridgeChipInfo[NVML_MAX_PHYSICAL_BRIDGE];  //!< Hierarchy of Bridge Chips
-  //!< on the board
+    unsigned char bridgeCount;  //!< Number of Bridge Chips on the Board
+    nvmlBridgeChipInfo_t bridgeChipInfo[NVML_MAX_PHYSICAL_BRIDGE];  //!< Hierarchy of Bridge Chips
+                                                                    //!< on the board
 } nvmlBridgeChipHierarchy_t;
 
 /**
@@ -311,13 +298,13 @@ typedef struct nvmlBridgeChipHierarchy_st {
  * to specify a single clock value.
  */
 typedef enum nvmlClockId_enum {
-  NVML_CLOCK_ID_CURRENT = 0,             //!< Current actual clock value
-  NVML_CLOCK_ID_APP_CLOCK_TARGET = 1,    //!< Target application clock
-  NVML_CLOCK_ID_APP_CLOCK_DEFAULT = 2,   //!< Default application clock target
-  NVML_CLOCK_ID_CUSTOMER_BOOST_MAX = 3,  //!< OEM-defined maximum clock rate
+    NVML_CLOCK_ID_CURRENT = 0,             //!< Current actual clock value
+    NVML_CLOCK_ID_APP_CLOCK_TARGET = 1,    //!< Target application clock
+    NVML_CLOCK_ID_APP_CLOCK_DEFAULT = 2,   //!< Default application clock target
+    NVML_CLOCK_ID_CUSTOMER_BOOST_MAX = 3,  //!< OEM-defined maximum clock rate
 
-  // Keep this last
-  NVML_CLOCK_ID_COUNT  //<! Count of Clock Ids.
+    // Keep this last
+    NVML_CLOCK_ID_COUNT  //<! Count of Clock Ids.
 } nvmlClockId_t;
 
 /**
@@ -329,17 +316,15 @@ typedef enum nvmlClockId_enum {
  * beyond.
  */
 typedef enum nvmlComputeMode_enum {
-  NVML_COMPUTEMODE_DEFAULT =
-  0,  //!< Default compute mode -- multiple contexts per device
-  NVML_COMPUTEMODE_EXCLUSIVE_THREAD = 1,  //!< Support Removed
-  NVML_COMPUTEMODE_PROHIBITED =
-  2,  //!< Compute-prohibited mode -- no contexts per device
-  NVML_COMPUTEMODE_EXCLUSIVE_PROCESS =
-  3,  //!< Compute-exclusive-process mode -- only one context per device,
-  //!< usable from multiple threads at a time
-
-  // Keep this last
-  NVML_COMPUTEMODE_COUNT
+    NVML_COMPUTEMODE_DEFAULT = 0,  //!< Default compute mode -- multiple contexts per device
+    NVML_COMPUTEMODE_EXCLUSIVE_THREAD = 1,  //!< Support Removed
+    NVML_COMPUTEMODE_PROHIBITED = 2,        //!< Compute-prohibited mode -- no contexts per device
+    NVML_COMPUTEMODE_EXCLUSIVE_PROCESS =
+        3,  //!< Compute-exclusive-process mode -- only one context per device,
+    //!< usable from multiple threads at a time
+
+    // Keep this last
+    NVML_COMPUTEMODE_COUNT
 } nvmlComputeMode_t;
 
 typedef unsigned int nvmlVgpuTypeId_t;
@@ -353,10 +338,10 @@ typedef unsigned int nvmlVgpuTypeId_t;
  *              See \ref nvmlDeviceGetMemoryErrorCounter
  */
 typedef struct nvmlEccErrorCounts_st {
-  unsigned long long l1Cache;       //!< L1 cache errors
-  unsigned long long l2Cache;       //!< L2 cache errors
-  unsigned long long deviceMemory;  //!< Device memory errors
-  unsigned long long registerFile;  //!< Register file errors
+    unsigned long long l1Cache;       //!< L1 cache errors
+    unsigned long long l2Cache;       //!< L2 cache errors
+    unsigned long long deviceMemory;  //!< Device memory errors
+    unsigned long long registerFile;  //!< Register file errors
 } nvmlEccErrorCounts_t;
 
 /**
@@ -365,84 +350,77 @@ typedef struct nvmlEccErrorCounts_st {
  * Windows only.
  */
 typedef enum nvmlDriverModel_enum {
-  NVML_DRIVER_WDDM =
-  0,  //!< WDDM driver model -- GPU treated as a display device
-  NVML_DRIVER_WDM =
-  1  //!< WDM (TCC) model (recommended) -- GPU treated as a generic device
+    NVML_DRIVER_WDDM = 0,  //!< WDDM driver model -- GPU treated as a display device
+    NVML_DRIVER_WDM = 1    //!< WDM (TCC) model (recommended) -- GPU treated as a generic device
 } nvmlDriverModel_t;
 
 /*
  * Represents type of encoder for capacity can be queried
  */
 typedef enum nvmlEncoderQueryType_enum {
-  NVML_ENCODER_QUERY_H264 = 0,
-  NVML_ENCODER_QUERY_HEVC = 1,
+    NVML_ENCODER_QUERY_H264 = 0,
+    NVML_ENCODER_QUERY_HEVC = 1,
 } nvmlEncoderType_t;
 
 /*
  * Struct to hold encoder session data
  */
 typedef struct nvmlEncoderSessionInfo_st {
-  unsigned int sessionId;           //!< Unique session ID
-  unsigned int pid;                 //!< Owning process ID
-  nvmlVgpuInstance_t vgpuInstance;  //!< Owning vGPU instance ID (only valid on
-  //!< vGPU hosts, otherwise zero)
-  nvmlEncoderType_t codecType;  //!< Video encoder type
-  unsigned int hResolution;     //!< Current encode horizontal resolution
-  unsigned int vResolution;     //!< Current encode vertical resolution
-  unsigned int averageFps;      //!< Moving average encode frames per second
-  unsigned int
-      averageLatency;  //!< Moving average encode latency in microseconds
+    unsigned int sessionId;           //!< Unique session ID
+    unsigned int pid;                 //!< Owning process ID
+    nvmlVgpuInstance_t vgpuInstance;  //!< Owning vGPU instance ID (only valid on
+    //!< vGPU hosts, otherwise zero)
+    nvmlEncoderType_t codecType;  //!< Video encoder type
+    unsigned int hResolution;     //!< Current encode horizontal resolution
+    unsigned int vResolution;     //!< Current encode vertical resolution
+    unsigned int averageFps;      //!< Moving average encode frames per second
+    unsigned int averageLatency;  //!< Moving average encode latency in microseconds
 } nvmlEncoderSessionInfo_t;
 
 /**
  * Represents the type for sample value returned
  */
 typedef enum nvmlValueType_enum {
-  NVML_VALUE_TYPE_DOUBLE = 0,
-  NVML_VALUE_TYPE_UNSIGNED_INT = 1,
-  NVML_VALUE_TYPE_UNSIGNED_LONG = 2,
-  NVML_VALUE_TYPE_UNSIGNED_LONG_LONG = 3,
-  NVML_VALUE_TYPE_SIGNED_LONG_LONG = 4,
-
-  // Keep this last
-  NVML_VALUE_TYPE_COUNT
+    NVML_VALUE_TYPE_DOUBLE = 0,
+    NVML_VALUE_TYPE_UNSIGNED_INT = 1,
+    NVML_VALUE_TYPE_UNSIGNED_LONG = 2,
+    NVML_VALUE_TYPE_UNSIGNED_LONG_LONG = 3,
+    NVML_VALUE_TYPE_SIGNED_LONG_LONG = 4,
+
+    // Keep this last
+    NVML_VALUE_TYPE_COUNT
 } nvmlValueType_t;
 
 /**
  * Union to represent different types of Value
  */
 typedef union nvmlValue_st {
-  double dVal;                //!< If the value is double
-  unsigned int uiVal;         //!< If the value is unsigned int
-  unsigned long ulVal;        //!< If the value is unsigned long
-  unsigned long long ullVal;  //!< If the value is unsigned long long
-  signed long long sllVal;    //!< If the value is signed long long
+    double dVal;                //!< If the value is double
+    unsigned int uiVal;         //!< If the value is unsigned int
+    unsigned long ulVal;        //!< If the value is unsigned long
+    unsigned long long ullVal;  //!< If the value is unsigned long long
+    signed long long sllVal;    //!< If the value is signed long long
 } nvmlValue_t;
 
 /**
  * Information for a Field Value Sample
  */
 typedef struct nvmlFieldValue_st {
-  unsigned int fieldId;  //!< ID of the NVML field to retrieve. This must be set
-  //!< before any call that uses this struct. See the
-  //!< constants starting with NVML_FI_ above.
-  unsigned int unused;  //!< Currently unused. This should be initialized to 0
-  //!< by the caller before any API call
-  long long
-      timestamp;  //!< CPU Timestamp of this value in microseconds since 1970
-  long long
-      latencyUsec;  //!< How long this field value took to update (in usec)
-  //!< within NVML. This may be averaged across several
-  //!< fields that are serviced by the same driver call.
-  nvmlValueType_t valueType;  //!< Type of the value stored in value
-  nvmlReturn_t
-      nvmlReturn;  //!< Return code for retrieving this value. This must
-  //!< be checked before looking at value, as value is
-  //!< undefined if nvmlReturn != NVML_SUCCESS
-  nvmlValue_t
-      value;  //!< Value for this field. This is only valid if nvmlReturn
-  //!< == NVML_SUCCESS
+    unsigned int fieldId;  //!< ID of the NVML field to retrieve. This must be set
+    //!< before any call that uses this struct. See the
+    //!< constants starting with NVML_FI_ above.
+    unsigned int unused;  //!< Currently unused. This should be initialized to 0
+    //!< by the caller before any API call
+    long long timestamp;    //!< CPU Timestamp of this value in microseconds since 1970
+    long long latencyUsec;  //!< How long this field value took to update (in usec)
+    //!< within NVML. This may be averaged across several
+    //!< fields that are serviced by the same driver call.
+    nvmlValueType_t valueType;  //!< Type of the value stored in value
+    nvmlReturn_t nvmlReturn;    //!< Return code for retrieving this value. This must
+    //!< be checked before looking at value, as value is
+    //!< undefined if nvmlReturn != NVML_SUCCESS
+    nvmlValue_t value;  //!< Value for this field. This is only valid if nvmlReturn
+                        //!< == NVML_SUCCESS
 } nvmlFieldValue_t;
 
 /**
@@ -454,76 +432,73 @@ typedef struct nvmlFieldValue_st {
  * Each GOM is designed to meet specific user needs.
  */
 typedef enum nvmlGom_enum {
-  NVML_GOM_ALL_ON = 0,  //!< Everything is enabled and running at full speed
+    NVML_GOM_ALL_ON = 0,  //!< Everything is enabled and running at full speed
 
-  NVML_GOM_COMPUTE =
-  1,  //!< Designed for running only compute tasks. Graphics operations
-  //!< are not allowed
+    NVML_GOM_COMPUTE = 1,  //!< Designed for running only compute tasks. Graphics operations
+    //!< are not allowed
 
-  NVML_GOM_LOW_DP = 2  //!< Designed for running graphics applications that
-  //!< don't require high bandwidth double precision
+    NVML_GOM_LOW_DP = 2  //!< Designed for running graphics applications that
+                         //!< don't require high bandwidth double precision
 } nvmlGpuOperationMode_t;
 
 // GRID license feature code
 typedef enum {
-  NVML_GRID_LICENSE_FEATURE_CODE_VGPU = 1,         // Virtual GPU
-  NVML_GRID_LICENSE_FEATURE_CODE_VWORKSTATION = 2  // Virtual Workstation
+    NVML_GRID_LICENSE_FEATURE_CODE_VGPU = 1,         // Virtual GPU
+    NVML_GRID_LICENSE_FEATURE_CODE_VWORKSTATION = 2  // Virtual Workstation
 } nvmlGridLicenseFeatureCode_t;
 
 /**
  * Structure to store GRID licensable features
  */
 typedef struct nvmlGridLicensableFeature_st {
-  nvmlGridLicenseFeatureCode_t featureCode;  //<! Licensed feature code
-  unsigned int featureState;  //<! Non-zero if feature is currently licensed,
-  // otherwise zero
-  char licenseInfo[NVML_GRID_LICENSE_BUFFER_SIZE];
+    nvmlGridLicenseFeatureCode_t featureCode;  //<! Licensed feature code
+    unsigned int featureState;                 //<! Non-zero if feature is currently licensed,
+    // otherwise zero
+    char licenseInfo[NVML_GRID_LICENSE_BUFFER_SIZE];
 } nvmlGridLicensableFeature_t;
 
 typedef struct nvmlGridLicensableFeatures_st {
-  int isGridLicenseSupported;  //<! Non-zero if GRID Software Licensing is
-  // supported on the system, otherwise zero
-  unsigned int licensableFeaturesCount;  //<! Entries returned in \a
-  // gridLicensableFeatures array
-  nvmlGridLicensableFeature_t
-      gridLicensableFeatures[NVML_GRID_LICENSE_FEATURE_MAX_COUNT];
+    int isGridLicenseSupported;  //<! Non-zero if GRID Software Licensing is
+    // supported on the system, otherwise zero
+    unsigned int licensableFeaturesCount;  //<! Entries returned in \a
+    // gridLicensableFeatures array
+    nvmlGridLicensableFeature_t gridLicensableFeatures[NVML_GRID_LICENSE_FEATURE_MAX_COUNT];
 } nvmlGridLicensableFeatures_t;
 
 /**
  * Available infoROM objects.
  */
 typedef enum nvmlInforomObject_enum {
-  NVML_INFOROM_OEM = 0,  //!< An object defined by OEM
-  NVML_INFOROM_ECC =
-  1,  //!< The ECC object determining the level of ECC support
-  NVML_INFOROM_POWER = 2,  //!< The power management object
-
-  // Keep this last
-  NVML_INFOROM_COUNT  //!< This counts the number of infoROM objects the driver
-  //!< knows about
+    NVML_INFOROM_OEM = 0,    //!< An object defined by OEM
+    NVML_INFOROM_ECC = 1,    //!< The ECC object determining the level of ECC support
+    NVML_INFOROM_POWER = 2,  //!< The power management object
+
+    // Keep this last
+    NVML_INFOROM_COUNT  //!< This counts the number of infoROM objects the driver
+                        //!< knows about
 } nvmlInforomObject_t;
 
 /**
  * Memory error types
  */
 typedef enum nvmlMemoryErrorType_enum {
-  /**
-   * A memory error that was corrected
-   *
-   * For ECC errors, these are single bit errors
-   * For Texture memory, these are errors fixed by resend
-   */
-  NVML_MEMORY_ERROR_TYPE_CORRECTED = 0,
-  /**
-   * A memory error that was not corrected
-   *
-   * For ECC errors, these are double bit errors
-   * For Texture memory, these are errors where the resend fails
-   */
-  NVML_MEMORY_ERROR_TYPE_UNCORRECTED = 1,
-
-  // Keep this last
-  NVML_MEMORY_ERROR_TYPE_COUNT  //!< Count of memory error types
+    /**
+     * A memory error that was corrected
+     *
+     * For ECC errors, these are single bit errors
+     * For Texture memory, these are errors fixed by resend
+     */
+    NVML_MEMORY_ERROR_TYPE_CORRECTED = 0,
+    /**
+     * A memory error that was not corrected
+     *
+     * For ECC errors, these are double bit errors
+     * For Texture memory, these are errors where the resend fails
+     */
+    NVML_MEMORY_ERROR_TYPE_UNCORRECTED = 1,
+
+    // Keep this last
+    NVML_MEMORY_ERROR_TYPE_COUNT  //!< Count of memory error types
 
 } nvmlMemoryErrorType_t;
 
@@ -533,86 +508,83 @@ typedef enum nvmlMemoryErrorType_enum {
  * See \ref nvmlDeviceGetMemoryErrorCounter
  */
 typedef enum nvmlMemoryLocation_enum {
-  NVML_MEMORY_LOCATION_L1_CACHE = 0,        //!< GPU L1 Cache
-  NVML_MEMORY_LOCATION_L2_CACHE = 1,        //!< GPU L2 Cache
-  NVML_MEMORY_LOCATION_DEVICE_MEMORY = 2,   //!< GPU Device Memory
-  NVML_MEMORY_LOCATION_REGISTER_FILE = 3,   //!< GPU Register File
-  NVML_MEMORY_LOCATION_TEXTURE_MEMORY = 4,  //!< GPU Texture Memory
-  NVML_MEMORY_LOCATION_TEXTURE_SHM = 5,     //!< Shared memory
-  NVML_MEMORY_LOCATION_CBU = 6,             //!< CBU
-
-  // Keep this last
-  NVML_MEMORY_LOCATION_COUNT  //!< This counts the number of memory locations
-  //!< the driver knows about
+    NVML_MEMORY_LOCATION_L1_CACHE = 0,        //!< GPU L1 Cache
+    NVML_MEMORY_LOCATION_L2_CACHE = 1,        //!< GPU L2 Cache
+    NVML_MEMORY_LOCATION_DEVICE_MEMORY = 2,   //!< GPU Device Memory
+    NVML_MEMORY_LOCATION_REGISTER_FILE = 3,   //!< GPU Register File
+    NVML_MEMORY_LOCATION_TEXTURE_MEMORY = 4,  //!< GPU Texture Memory
+    NVML_MEMORY_LOCATION_TEXTURE_SHM = 5,     //!< Shared memory
+    NVML_MEMORY_LOCATION_CBU = 6,             //!< CBU
+
+    // Keep this last
+    NVML_MEMORY_LOCATION_COUNT  //!< This counts the number of memory locations
+                                //!< the driver knows about
 } nvmlMemoryLocation_t;
 
 /**
  * Memory allocation information for a device.
  */
 typedef struct nvmlMemory_st {
-  unsigned long long total;  //!< Total installed FB memory (in bytes)
-  unsigned long long free;   //!< Unallocated FB memory (in bytes)
-  unsigned long long
-      used;  //!< Allocated FB memory (in bytes). Note that the driver/GPU
-  //!< always sets aside a small amount of memory for bookkeeping
+    unsigned long long total;  //!< Total installed FB memory (in bytes)
+    unsigned long long free;   //!< Unallocated FB memory (in bytes)
+    unsigned long long used;   //!< Allocated FB memory (in bytes). Note that the driver/GPU
+                               //!< always sets aside a small amount of memory for bookkeeping
 } nvmlMemory_t;
 
 /**
  * Memory allocation information for a device (v2).
- * 
+ *
  * Version 2 adds versioning for the struct and the amount of system-reserved memory as an output.
- * @note The \ref nvmlMemory_v2_t.used amount also includes the \ref nvmlMemory_v2_t.reserved amount.
- */
-typedef struct nvmlMemory_v2_st
-{
-    unsigned int version;            //!< Structure format version (must be 2)
-    unsigned long long total;        //!< Total physical device memory (in bytes)
-    unsigned long long reserved;     //!< Device memory (in bytes) reserved for system use (driver or firmware)
-    unsigned long long free;         //!< Unallocated device memory (in bytes)
-    unsigned long long used;         //!< Allocated device memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping
+ * @note The \ref nvmlMemory_v2_t.used amount also includes the \ref nvmlMemory_v2_t.reserved
+ * amount.
+ */
+typedef struct nvmlMemory_v2_st {
+    unsigned int version;      //!< Structure format version (must be 2)
+    unsigned long long total;  //!< Total physical device memory (in bytes)
+    unsigned long long
+        reserved;  //!< Device memory (in bytes) reserved for system use (driver or firmware)
+    unsigned long long free;  //!< Unallocated device memory (in bytes)
+    unsigned long long used;  //!< Allocated device memory (in bytes). Note that the driver/GPU
+                              //!< always sets aside a small amount of memory for bookkeeping
 } nvmlMemory_v2_t;
 
-
 /**
  * Enum to represent NvLink queryable capabilities
  */
 typedef enum nvmlNvLinkCapability_enum {
-  NVML_NVLINK_CAP_P2P_SUPPORTED = 0,   // P2P over NVLink is supported
-  NVML_NVLINK_CAP_SYSMEM_ACCESS = 1,   // Access to system memory is supported
-  NVML_NVLINK_CAP_P2P_ATOMICS = 2,     // P2P atomics are supported
-  NVML_NVLINK_CAP_SYSMEM_ATOMICS = 3,  // System memory atomics are supported
-  NVML_NVLINK_CAP_SLI_BRIDGE = 4,      // SLI is supported over this link
-  NVML_NVLINK_CAP_VALID = 5,           // Link is supported on this device
-  // should be last
-  NVML_NVLINK_CAP_COUNT
+    NVML_NVLINK_CAP_P2P_SUPPORTED = 0,   // P2P over NVLink is supported
+    NVML_NVLINK_CAP_SYSMEM_ACCESS = 1,   // Access to system memory is supported
+    NVML_NVLINK_CAP_P2P_ATOMICS = 2,     // P2P atomics are supported
+    NVML_NVLINK_CAP_SYSMEM_ATOMICS = 3,  // System memory atomics are supported
+    NVML_NVLINK_CAP_SLI_BRIDGE = 4,      // SLI is supported over this link
+    NVML_NVLINK_CAP_VALID = 5,           // Link is supported on this device
+    // should be last
+    NVML_NVLINK_CAP_COUNT
 } nvmlNvLinkCapability_t;
 
 /**
  * Enum to represent NvLink queryable error counters
  */
 typedef enum nvmlNvLinkErrorCounter_enum {
-  NVML_NVLINK_ERROR_DL_REPLAY = 0,  // Data link transmit replay error counter
-  NVML_NVLINK_ERROR_DL_RECOVERY =
-  1,  // Data link transmit recovery error counter
-  NVML_NVLINK_ERROR_DL_CRC_FLIT =
-  2,  // Data link receive flow control digit CRC error counter
-  NVML_NVLINK_ERROR_DL_CRC_DATA =
-  3,  // Data link receive data CRC error counter
-
-  // this must be last
-  NVML_NVLINK_ERROR_COUNT
+    NVML_NVLINK_ERROR_DL_REPLAY = 0,    // Data link transmit replay error counter
+    NVML_NVLINK_ERROR_DL_RECOVERY = 1,  // Data link transmit recovery error counter
+    NVML_NVLINK_ERROR_DL_CRC_FLIT = 2,  // Data link receive flow control digit CRC error counter
+    NVML_NVLINK_ERROR_DL_CRC_DATA = 3,  // Data link receive data CRC error counter
+
+    // this must be last
+    NVML_NVLINK_ERROR_COUNT
 } nvmlNvLinkErrorCounter_t;
 
 /**
  * Enum to represent the NvLink utilization counter packet units
  */
 typedef enum nvmlNvLinkUtilizationCountUnits_enum {
-  NVML_NVLINK_COUNTER_UNIT_CYCLES = 0,   // count by cycles
-  NVML_NVLINK_COUNTER_UNIT_PACKETS = 1,  // count by packets
-  NVML_NVLINK_COUNTER_UNIT_BYTES = 2,    // count by bytes
+    NVML_NVLINK_COUNTER_UNIT_CYCLES = 0,   // count by cycles
+    NVML_NVLINK_COUNTER_UNIT_PACKETS = 1,  // count by packets
+    NVML_NVLINK_COUNTER_UNIT_BYTES = 2,    // count by bytes
 
-  // this must be last
-  NVML_NVLINK_COUNTER_UNIT_COUNT
+    // this must be last
+    NVML_NVLINK_COUNTER_UNIT_COUNT
 } nvmlNvLinkUtilizationCountUnits_t;
 
 /**
@@ -623,44 +595,44 @@ typedef enum nvmlNvLinkUtilizationCountUnits_enum {
  *  ** these can be "OR'd" together
  */
 typedef enum nvmlNvLinkUtilizationCountPktTypes_enum {
-  NVML_NVLINK_COUNTER_PKTFILTER_NOP = 0x1,      // no operation packets
-  NVML_NVLINK_COUNTER_PKTFILTER_READ = 0x2,     // read packets
-  NVML_NVLINK_COUNTER_PKTFILTER_WRITE = 0x4,    // write packets
-  NVML_NVLINK_COUNTER_PKTFILTER_RATOM = 0x8,    // reduction atomic requests
-  NVML_NVLINK_COUNTER_PKTFILTER_NRATOM = 0x10,  // non-reduction atomic requests
-  NVML_NVLINK_COUNTER_PKTFILTER_FLUSH = 0x20,   // flush requests
-  NVML_NVLINK_COUNTER_PKTFILTER_RESPDATA = 0x40,    // responses with data
-  NVML_NVLINK_COUNTER_PKTFILTER_RESPNODATA = 0x80,  // responses without data
-  NVML_NVLINK_COUNTER_PKTFILTER_ALL = 0xFF          // all packets
+    NVML_NVLINK_COUNTER_PKTFILTER_NOP = 0x1,          // no operation packets
+    NVML_NVLINK_COUNTER_PKTFILTER_READ = 0x2,         // read packets
+    NVML_NVLINK_COUNTER_PKTFILTER_WRITE = 0x4,        // write packets
+    NVML_NVLINK_COUNTER_PKTFILTER_RATOM = 0x8,        // reduction atomic requests
+    NVML_NVLINK_COUNTER_PKTFILTER_NRATOM = 0x10,      // non-reduction atomic requests
+    NVML_NVLINK_COUNTER_PKTFILTER_FLUSH = 0x20,       // flush requests
+    NVML_NVLINK_COUNTER_PKTFILTER_RESPDATA = 0x40,    // responses with data
+    NVML_NVLINK_COUNTER_PKTFILTER_RESPNODATA = 0x80,  // responses without data
+    NVML_NVLINK_COUNTER_PKTFILTER_ALL = 0xFF          // all packets
 } nvmlNvLinkUtilizationCountPktTypes_t;
 
 /**
  * Struct to define the NVLINK counter controls
  */
 typedef struct nvmlNvLinkUtilizationControl_st {
-  nvmlNvLinkUtilizationCountUnits_t units;
-  nvmlNvLinkUtilizationCountPktTypes_t pktfilter;
+    nvmlNvLinkUtilizationCountUnits_t units;
+    nvmlNvLinkUtilizationCountPktTypes_t pktfilter;
 } nvmlNvLinkUtilizationControl_t;
 
 /* P2P Capability Index*/
 typedef enum nvmlGpuP2PCapsIndex_enum {
-  NVML_P2P_CAPS_INDEX_READ = 0,
-  NVML_P2P_CAPS_INDEX_WRITE,
-  NVML_P2P_CAPS_INDEX_NVLINK,
-  NVML_P2P_CAPS_INDEX_ATOMICS,
-  NVML_P2P_CAPS_INDEX_PROP,
-  NVML_P2P_CAPS_INDEX_UNKNOWN
+    NVML_P2P_CAPS_INDEX_READ = 0,
+    NVML_P2P_CAPS_INDEX_WRITE,
+    NVML_P2P_CAPS_INDEX_NVLINK,
+    NVML_P2P_CAPS_INDEX_ATOMICS,
+    NVML_P2P_CAPS_INDEX_PROP,
+    NVML_P2P_CAPS_INDEX_UNKNOWN
 } nvmlGpuP2PCapsIndex_t;
 
 /* P2P Capability Index Status*/
 typedef enum nvmlGpuP2PStatus_enum {
-  NVML_P2P_STATUS_OK = 0,
-  NVML_P2P_STATUS_CHIPSET_NOT_SUPPORTED,
-  NVML_P2P_STATUS_GPU_NOT_SUPPORTED,
-  NVML_P2P_STATUS_IOH_TOPOLOGY_NOT_SUPPORTED,
-  NVML_P2P_STATUS_DISABLED_BY_REGKEY,
-  NVML_P2P_STATUS_NOT_SUPPORTED,
-  NVML_P2P_STATUS_UNKNOWN
+    NVML_P2P_STATUS_OK = 0,
+    NVML_P2P_STATUS_CHIPSET_NOT_SUPPORTED,
+    NVML_P2P_STATUS_GPU_NOT_SUPPORTED,
+    NVML_P2P_STATUS_IOH_TOPOLOGY_NOT_SUPPORTED,
+    NVML_P2P_STATUS_DISABLED_BY_REGKEY,
+    NVML_P2P_STATUS_NOT_SUPPORTED,
+    NVML_P2P_STATUS_UNKNOWN
 
 } nvmlGpuP2PStatus_t;
 
@@ -668,103 +640,102 @@ typedef enum nvmlGpuP2PStatus_enum {
  * Represents the queryable PCIe utilization counters
  */
 typedef enum nvmlPcieUtilCounter_enum {
-  NVML_PCIE_UTIL_TX_BYTES = 0,  // 1KB granularity
-  NVML_PCIE_UTIL_RX_BYTES = 1,  // 1KB granularity
+    NVML_PCIE_UTIL_TX_BYTES = 0,  // 1KB granularity
+    NVML_PCIE_UTIL_RX_BYTES = 1,  // 1KB granularity
 
-  // Keep this last
-  NVML_PCIE_UTIL_COUNT
+    // Keep this last
+    NVML_PCIE_UTIL_COUNT
 } nvmlPcieUtilCounter_t;
 
 /**
  * Allowed PStates.
  */
 typedef enum nvmlPStates_enum {
-  NVML_PSTATE_0 = 0,        //!< Performance state 0 -- Maximum Performance
-  NVML_PSTATE_1 = 1,        //!< Performance state 1
-  NVML_PSTATE_2 = 2,        //!< Performance state 2
-  NVML_PSTATE_3 = 3,        //!< Performance state 3
-  NVML_PSTATE_4 = 4,        //!< Performance state 4
-  NVML_PSTATE_5 = 5,        //!< Performance state 5
-  NVML_PSTATE_6 = 6,        //!< Performance state 6
-  NVML_PSTATE_7 = 7,        //!< Performance state 7
-  NVML_PSTATE_8 = 8,        //!< Performance state 8
-  NVML_PSTATE_9 = 9,        //!< Performance state 9
-  NVML_PSTATE_10 = 10,      //!< Performance state 10
-  NVML_PSTATE_11 = 11,      //!< Performance state 11
-  NVML_PSTATE_12 = 12,      //!< Performance state 12
-  NVML_PSTATE_13 = 13,      //!< Performance state 13
-  NVML_PSTATE_14 = 14,      //!< Performance state 14
-  NVML_PSTATE_15 = 15,      //!< Performance state 15 -- Minimum Performance
-  NVML_PSTATE_UNKNOWN = 32  //!< Unknown performance state
+    NVML_PSTATE_0 = 0,        //!< Performance state 0 -- Maximum Performance
+    NVML_PSTATE_1 = 1,        //!< Performance state 1
+    NVML_PSTATE_2 = 2,        //!< Performance state 2
+    NVML_PSTATE_3 = 3,        //!< Performance state 3
+    NVML_PSTATE_4 = 4,        //!< Performance state 4
+    NVML_PSTATE_5 = 5,        //!< Performance state 5
+    NVML_PSTATE_6 = 6,        //!< Performance state 6
+    NVML_PSTATE_7 = 7,        //!< Performance state 7
+    NVML_PSTATE_8 = 8,        //!< Performance state 8
+    NVML_PSTATE_9 = 9,        //!< Performance state 9
+    NVML_PSTATE_10 = 10,      //!< Performance state 10
+    NVML_PSTATE_11 = 11,      //!< Performance state 11
+    NVML_PSTATE_12 = 12,      //!< Performance state 12
+    NVML_PSTATE_13 = 13,      //!< Performance state 13
+    NVML_PSTATE_14 = 14,      //!< Performance state 14
+    NVML_PSTATE_15 = 15,      //!< Performance state 15 -- Minimum Performance
+    NVML_PSTATE_UNKNOWN = 32  //!< Unknown performance state
 } nvmlPstates_t;
 
 /**
  * Causes for page retirement
  */
 typedef enum nvmlPageRetirementCause_enum {
-  NVML_PAGE_RETIREMENT_CAUSE_MULTIPLE_SINGLE_BIT_ECC_ERRORS =
-  0,  //!< Page was retired due to multiple single bit ECC error
-  NVML_PAGE_RETIREMENT_CAUSE_DOUBLE_BIT_ECC_ERROR =
-  1,  //!< Page was retired due to double bit ECC error
+    NVML_PAGE_RETIREMENT_CAUSE_MULTIPLE_SINGLE_BIT_ECC_ERRORS =
+        0,  //!< Page was retired due to multiple single bit ECC error
+    NVML_PAGE_RETIREMENT_CAUSE_DOUBLE_BIT_ECC_ERROR =
+        1,  //!< Page was retired due to double bit ECC error
 
-  // Keep this last
-  NVML_PAGE_RETIREMENT_CAUSE_COUNT
+    // Keep this last
+    NVML_PAGE_RETIREMENT_CAUSE_COUNT
 } nvmlPageRetirementCause_t;
 
 /**
  *  Represents Type of Sampling Event
  */
 typedef enum nvmlSamplingType_enum {
-  NVML_TOTAL_POWER_SAMPLES = 0,  //!< To represent total power drawn by GPU
-  NVML_GPU_UTILIZATION_SAMPLES =
-  1,  //!< To represent percent of time during which one or more kernels was
-  //!< executing on the GPU
-  NVML_MEMORY_UTILIZATION_SAMPLES =
-  2,  //!< To represent percent of time during which global (device) memory
-  //!< was being read or written
-  NVML_ENC_UTILIZATION_SAMPLES =
-  3,  //!< To represent percent of time during which NVENC remains busy
-  NVML_DEC_UTILIZATION_SAMPLES =
-  4,  //!< To represent percent of time during which NVDEC remains busy
-  NVML_PROCESSOR_CLK_SAMPLES = 5,  //!< To represent processor clock samples
-  NVML_MEMORY_CLK_SAMPLES = 6,     //!< To represent memory clock samples
-
-  // Keep this last
-  NVML_SAMPLINGTYPE_COUNT
+    NVML_TOTAL_POWER_SAMPLES = 0,  //!< To represent total power drawn by GPU
+    NVML_GPU_UTILIZATION_SAMPLES =
+        1,  //!< To represent percent of time during which one or more kernels was
+    //!< executing on the GPU
+    NVML_MEMORY_UTILIZATION_SAMPLES =
+        2,  //!< To represent percent of time during which global (device) memory
+    //!< was being read or written
+    NVML_ENC_UTILIZATION_SAMPLES =
+        3,  //!< To represent percent of time during which NVENC remains busy
+    NVML_DEC_UTILIZATION_SAMPLES =
+        4,  //!< To represent percent of time during which NVDEC remains busy
+    NVML_PROCESSOR_CLK_SAMPLES = 5,  //!< To represent processor clock samples
+    NVML_MEMORY_CLK_SAMPLES = 6,     //!< To represent memory clock samples
+
+    // Keep this last
+    NVML_SAMPLINGTYPE_COUNT
 } nvmlSamplingType_t;
 
 /**
  * Information for Sample
  */
 typedef struct nvmlSample_st {
-  unsigned long long timeStamp;  //!< CPU Timestamp in microseconds
-  nvmlValue_t sampleValue;       //!< Sample Value
+    unsigned long long timeStamp;  //!< CPU Timestamp in microseconds
+    nvmlValue_t sampleValue;       //!< Sample Value
 } nvmlSample_t;
 
 /**
  * Temperature sensors.
  */
 typedef enum nvmlTemperatureSensors_enum {
-  NVML_TEMPERATURE_GPU = 0,  //!< Temperature sensor for the GPU die
+    NVML_TEMPERATURE_GPU = 0,  //!< Temperature sensor for the GPU die
 
-  // Keep this last
-  NVML_TEMPERATURE_COUNT
+    // Keep this last
+    NVML_TEMPERATURE_COUNT
 } nvmlTemperatureSensors_t;
 
 /**
  * Temperature thresholds.
  */
 typedef enum nvmlTemperatureThresholds_enum {
-  NVML_TEMPERATURE_THRESHOLD_SHUTDOWN = 0,  // Temperature at which the GPU will
-  // shut down for HW protection
-  NVML_TEMPERATURE_THRESHOLD_SLOWDOWN =
-  1,  // Temperature at which the GPU will begin HW slowdown
-  NVML_TEMPERATURE_THRESHOLD_MEM_MAX =
-  2,  // Memory Temperature at which the GPU will begin SW slowdown
-  NVML_TEMPERATURE_THRESHOLD_GPU_MAX =
-  3,  // GPU Temperature at which the GPU can be throttled below base clock
-  // Keep this last
-  NVML_TEMPERATURE_THRESHOLD_COUNT
+    NVML_TEMPERATURE_THRESHOLD_SHUTDOWN = 0,  // Temperature at which the GPU will
+    // shut down for HW protection
+    NVML_TEMPERATURE_THRESHOLD_SLOWDOWN = 1,  // Temperature at which the GPU will begin HW slowdown
+    NVML_TEMPERATURE_THRESHOLD_MEM_MAX =
+        2,  // Memory Temperature at which the GPU will begin SW slowdown
+    NVML_TEMPERATURE_THRESHOLD_GPU_MAX =
+        3,  // GPU Temperature at which the GPU can be throttled below base clock
+    // Keep this last
+    NVML_TEMPERATURE_THRESHOLD_COUNT
 } nvmlTemperatureThresholds_t;
 
 /**
@@ -772,18 +743,15 @@ typedef enum nvmlTemperatureThresholds_enum {
  * The enums are spaced to allow for future relationships
  */
 typedef enum nvmlGpuLevel_enum {
-  NVML_TOPOLOGY_INTERNAL = 0,  // e.g. Tesla K80
-  NVML_TOPOLOGY_SINGLE =
-  10,  // all devices that only need traverse a single PCIe switch
-  NVML_TOPOLOGY_MULTIPLE =
-  20,  // all devices that need not traverse a host bridge
-  NVML_TOPOLOGY_HOSTBRIDGE =
-  30,  // all devices that are connected to the same host bridge
-  NVML_TOPOLOGY_NODE = 40,    // all devices that are connected to the same NUMA
-  // node but possibly multiple host bridges
-  NVML_TOPOLOGY_SYSTEM = 50,  // all devices in the system
-
-  // there is purposefully no COUNT here because of the need for spacing above
+    NVML_TOPOLOGY_INTERNAL = 0,     // e.g. Tesla K80
+    NVML_TOPOLOGY_SINGLE = 10,      // all devices that only need traverse a single PCIe switch
+    NVML_TOPOLOGY_MULTIPLE = 20,    // all devices that need not traverse a host bridge
+    NVML_TOPOLOGY_HOSTBRIDGE = 30,  // all devices that are connected to the same host bridge
+    NVML_TOPOLOGY_NODE = 40,        // all devices that are connected to the same NUMA
+    // node but possibly multiple host bridges
+    NVML_TOPOLOGY_SYSTEM = 50,  // all devices in the system
+
+    // there is purposefully no COUNT here because of the need for spacing above
 } nvmlGpuTopologyLevel_t;
 
 /**
@@ -792,106 +760,101 @@ typedef enum nvmlGpuLevel_enum {
  * product being queried.
  */
 typedef struct nvmlUtilization_st {
-  unsigned int gpu;  //!< Percent of time over the past sample period during
-  //!< which one or more kernels was executing on the GPU
-  unsigned int
-      memory;  //!< Percent of time over the past sample period during which
-  //!< global (device) memory was being read or written
+    unsigned int gpu;  //!< Percent of time over the past sample period during
+    //!< which one or more kernels was executing on the GPU
+    unsigned int memory;  //!< Percent of time over the past sample period during which
+                          //!< global (device) memory was being read or written
 } nvmlUtilization_t;
 
 /**
  * Physical GPU metadata structure
  */
 typedef struct nvmlVgpuPgpuMetadata_st {
-  unsigned int version;   //!< Current version of the structure
-  unsigned int revision;  //!< Current revision of the structure
-  char hostDriverVersion
-  [NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE];  //!< Host driver version
-  unsigned int
-      pgpuVirtualizationCaps;   //!< Pgpu virtualization capabilities bitfield
-  unsigned int reserved[7];     //!< Reserved for internal use
-  unsigned int opaqueDataSize;  //!< Size of opaque data field in bytes
-  char opaqueData[4];           //!< Opaque data
+    unsigned int version;   //!< Current version of the structure
+    unsigned int revision;  //!< Current revision of the structure
+    char hostDriverVersion[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE];  //!< Host driver version
+    unsigned int pgpuVirtualizationCaps;  //!< Pgpu virtualization capabilities bitfield
+    unsigned int reserved[7];             //!< Reserved for internal use
+    unsigned int opaqueDataSize;          //!< Size of opaque data field in bytes
+    char opaqueData[4];                   //!< Opaque data
 } nvmlVgpuPgpuMetadata_t;
 
 /**
  * Structure to store Utilization Value, vgpuInstance and subprocess information
  */
 typedef struct nvmlVgpuProcessUtilizationSample_st {
-  nvmlVgpuInstance_t vgpuInstance;  //!< vGPU Instance
-  unsigned int pid;  //!< PID of process running within the vGPU VM
-  char processName[NVML_VGPU_NAME_BUFFER_SIZE];  //!< Name of process running
-  //!< within the vGPU VM
-  unsigned long long timeStamp;  //!< CPU Timestamp in microseconds
-  unsigned int smUtil;           //!< SM (3D/Compute) Util Value
-  unsigned int memUtil;          //!< Frame Buffer Memory Util Value
-  unsigned int encUtil;          //!< Encoder Util Value
-  unsigned int decUtil;          //!< Decoder Util Value
+    nvmlVgpuInstance_t vgpuInstance;               //!< vGPU Instance
+    unsigned int pid;                              //!< PID of process running within the vGPU VM
+    char processName[NVML_VGPU_NAME_BUFFER_SIZE];  //!< Name of process running
+    //!< within the vGPU VM
+    unsigned long long timeStamp;  //!< CPU Timestamp in microseconds
+    unsigned int smUtil;           //!< SM (3D/Compute) Util Value
+    unsigned int memUtil;          //!< Frame Buffer Memory Util Value
+    unsigned int encUtil;          //!< Encoder Util Value
+    unsigned int decUtil;          //!< Decoder Util Value
 } nvmlVgpuProcessUtilizationSample_t;
 
 /**
  * Structure to store Utilization Value and vgpuInstance
  */
 typedef struct nvmlVgpuInstanceUtilizationSample_st {
-  nvmlVgpuInstance_t vgpuInstance;  //!< vGPU Instance
-  unsigned long long timeStamp;     //!< CPU Timestamp in microseconds
-  nvmlValue_t smUtil;               //!< SM (3D/Compute) Util Value
-  nvmlValue_t memUtil;              //!< Frame Buffer Memory Util Value
-  nvmlValue_t encUtil;              //!< Encoder Util Value
-  nvmlValue_t decUtil;              //!< Decoder Util Value
+    nvmlVgpuInstance_t vgpuInstance;  //!< vGPU Instance
+    unsigned long long timeStamp;     //!< CPU Timestamp in microseconds
+    nvmlValue_t smUtil;               //!< SM (3D/Compute) Util Value
+    nvmlValue_t memUtil;              //!< Frame Buffer Memory Util Value
+    nvmlValue_t encUtil;              //!< Encoder Util Value
+    nvmlValue_t decUtil;              //!< Decoder Util Value
 } nvmlVgpuInstanceUtilizationSample_t;
 
 /**
  * Represents type of perf policy for which violation times can be queried
  */
 typedef enum nvmlPerfPolicyType_enum {
-  NVML_PERF_POLICY_POWER = 0,  //!< How long did power violations cause the GPU
-  //!< to be below application clocks
-  NVML_PERF_POLICY_THERMAL = 1,  //!< How long did thermal violations cause the
-  //!< GPU to be below application clocks
-  NVML_PERF_POLICY_SYNC_BOOST = 2,   //!< How long did sync boost cause the GPU
-  //!< to be below application clocks
-  NVML_PERF_POLICY_BOARD_LIMIT = 3,  //!< How long did the board limit cause the
-  //!< GPU to be below application clocks
-  NVML_PERF_POLICY_LOW_UTILIZATION =
-  4,  //!< How long did low utilization cause the GPU to be below
-  //!< application clocks
-  NVML_PERF_POLICY_RELIABILITY =
-  5,  //!< How long did the board reliability limit cause the GPU to be
-  //!< below application clocks
-
-  NVML_PERF_POLICY_TOTAL_APP_CLOCKS =
-  10,  //!< Total time the GPU was held below application clocks by any
-  //!< limiter (0 - 5 above)
-  NVML_PERF_POLICY_TOTAL_BASE_CLOCKS =
-  11,  //!< Total time the GPU was held below base clocks
-
-  // Keep this last
-  NVML_PERF_POLICY_COUNT
+    NVML_PERF_POLICY_POWER = 0,  //!< How long did power violations cause the GPU
+    //!< to be below application clocks
+    NVML_PERF_POLICY_THERMAL = 1,  //!< How long did thermal violations cause the
+    //!< GPU to be below application clocks
+    NVML_PERF_POLICY_SYNC_BOOST = 2,  //!< How long did sync boost cause the GPU
+    //!< to be below application clocks
+    NVML_PERF_POLICY_BOARD_LIMIT = 3,  //!< How long did the board limit cause the
+    //!< GPU to be below application clocks
+    NVML_PERF_POLICY_LOW_UTILIZATION =
+        4,  //!< How long did low utilization cause the GPU to be below
+    //!< application clocks
+    NVML_PERF_POLICY_RELIABILITY =
+        5,  //!< How long did the board reliability limit cause the GPU to be
+    //!< below application clocks
+
+    NVML_PERF_POLICY_TOTAL_APP_CLOCKS =
+        10,  //!< Total time the GPU was held below application clocks by any
+    //!< limiter (0 - 5 above)
+    NVML_PERF_POLICY_TOTAL_BASE_CLOCKS = 11,  //!< Total time the GPU was held below base clocks
+
+    // Keep this last
+    NVML_PERF_POLICY_COUNT
 } nvmlPerfPolicyType_t;
 
 /**
  * Struct to hold perf policy violation status data
  */
 typedef struct nvmlViolationTime_st {
-  unsigned long long referenceTime;  //!< referenceTime represents CPU timestamp
-  //!< in microseconds
-  unsigned long long violationTime;  //!< violationTime in Nanoseconds
+    unsigned long long referenceTime;  //!< referenceTime represents CPU timestamp
+    //!< in microseconds
+    unsigned long long violationTime;  //!< violationTime in Nanoseconds
 } nvmlViolationTime_t;
 
 /*!
  * GPU virtualization mode types.
  */
 typedef enum nvmlGpuVirtualizationMode {
-  NVML_GPU_VIRTUALIZATION_MODE_NONE = 0,  //!< Represents Bare Metal GPU
-  NVML_GPU_VIRTUALIZATION_MODE_PASSTHROUGH =
-  1,  //!< Device is associated with GPU-Passthorugh
-  NVML_GPU_VIRTUALIZATION_MODE_VGPU =
-  2,  //!< Device is associated with vGPU inside virtual machine.
-  NVML_GPU_VIRTUALIZATION_MODE_HOST_VGPU =
-  3,  //!< Device is associated with VGX hypervisor in vGPU mode
-  NVML_GPU_VIRTUALIZATION_MODE_HOST_VSGA =
-  4,  //!< Device is associated with VGX hypervisor in vSGA mode
+    NVML_GPU_VIRTUALIZATION_MODE_NONE = 0,         //!< Represents Bare Metal GPU
+    NVML_GPU_VIRTUALIZATION_MODE_PASSTHROUGH = 1,  //!< Device is associated with GPU-Passthorugh
+    NVML_GPU_VIRTUALIZATION_MODE_VGPU =
+        2,  //!< Device is associated with vGPU inside virtual machine.
+    NVML_GPU_VIRTUALIZATION_MODE_HOST_VGPU =
+        3,  //!< Device is associated with VGX hypervisor in vGPU mode
+    NVML_GPU_VIRTUALIZATION_MODE_HOST_VSGA =
+        4,  //!< Device is associated with VGX hypervisor in vSGA mode
 } nvmlGpuVirtualizationMode_t;
 
 /**
@@ -903,108 +866,99 @@ typedef struct nvmlEventSet_st *nvmlEventSet_t;
  *  Is the GPU device to be removed from the kernel by nvmlDeviceRemoveGpu()
  */
 typedef enum nvmlDetachGpuState_enum {
-  NVML_DETACH_GPU_KEEP = 0,
-  NVML_DETACH_GPU_REMOVE,
+    NVML_DETACH_GPU_KEEP = 0,
+    NVML_DETACH_GPU_REMOVE,
 } nvmlDetachGpuState_t;
 
 /**
  *  Parent bridge PCIe link state requested by nvmlDeviceRemoveGpu()
  */
 typedef enum nvmlPcieLinkState_enum {
-  NVML_PCIE_LINK_KEEP = 0,
-  NVML_PCIE_LINK_SHUT_DOWN,
+    NVML_PCIE_LINK_KEEP = 0,
+    NVML_PCIE_LINK_SHUT_DOWN,
 } nvmlPcieLinkState_t;
 
 /**
  * Information about occurred event
  */
 typedef struct nvmlEventData_st {
-  nvmlDevice_t device;  //!< Specific device where the event occurred
-  unsigned long long
-      eventType;  //!< Information about what specific event occurred
-  unsigned long long
-      eventData;  //!< Stores last XID error for the device in the
-  //!< event of nvmlEventTypeXidCriticalError,
-  //  eventData is 0 for any other event. eventData is set as 999 for unknown
-  //  xid error.
+    nvmlDevice_t device;           //!< Specific device where the event occurred
+    unsigned long long eventType;  //!< Information about what specific event occurred
+    unsigned long long eventData;  //!< Stores last XID error for the device in the
+                                   //!< event of nvmlEventTypeXidCriticalError,
+    //  eventData is 0 for any other event. eventData is set as 999 for unknown
+    //  xid error.
 } nvmlEventData_t;
 
 // vGPU GUEST info state.
 typedef enum nvmlVgpuGuestInfoState_enum {
-  NVML_VGPU_INSTANCE_GUEST_INFO_STATE_UNINITIALIZED =
-  0,  //<! Guest-dependent fields uninitialized
-  NVML_VGPU_INSTANCE_GUEST_INFO_STATE_INITIALIZED =
-  1,  //<! Guest-dependent fields initialized
+    NVML_VGPU_INSTANCE_GUEST_INFO_STATE_UNINITIALIZED =
+        0,                                                //<! Guest-dependent fields uninitialized
+    NVML_VGPU_INSTANCE_GUEST_INFO_STATE_INITIALIZED = 1,  //<! Guest-dependent fields initialized
 } nvmlVgpuGuestInfoState_t;
 
 /**
  * vGPU metadata structure.
  */
 typedef struct nvmlVgpuMetadata_st {
-  unsigned int version;   //!< Current version of the structure
-  unsigned int revision;  //!< Current revision of the structure
-  nvmlVgpuGuestInfoState_t
-      guestInfoState;  //!< Current state of Guest-dependent fields
-  char guestDriverVersion
-  [NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE];  //!< Version of driver
-  //!< installed in guest
-  char hostDriverVersion[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE];  //!< Version
-  //!< of driver
-  //!< installed in host
-  unsigned int reserved[8];     //!< Reserved for internal use
-  unsigned int opaqueDataSize;  //!< Size of opaque data field in bytes
-  char opaqueData[4];           //!< Opaque data
+    unsigned int version;                     //!< Current version of the structure
+    unsigned int revision;                    //!< Current revision of the structure
+    nvmlVgpuGuestInfoState_t guestInfoState;  //!< Current state of Guest-dependent fields
+    char guestDriverVersion[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE];  //!< Version of driver
+    //!< installed in guest
+    char hostDriverVersion[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE];  //!< Version
+    //!< of driver
+    //!< installed in host
+    unsigned int reserved[8];     //!< Reserved for internal use
+    unsigned int opaqueDataSize;  //!< Size of opaque data field in bytes
+    char opaqueData[4];           //!< Opaque data
 } nvmlVgpuMetadata_t;
 
 /**
  * vGPU VM compatibility codes
  */
 typedef enum nvmlVgpuVmCompatibility_enum {
-  NVML_VGPU_VM_COMPATIBILITY_NONE = 0x0,  //!< vGPU is not runnable
-  NVML_VGPU_VM_COMPATIBILITY_COLD =
-  0x1,  //!< vGPU is runnable from a cold / powered-off state (ACPI S5)
-  NVML_VGPU_VM_COMPATIBILITY_HIBERNATE =
-  0x2,  //!< vGPU is runnable from a hibernated state (ACPI S4)
-  NVML_VGPU_VM_COMPATIBILITY_SLEEP =
-  0x4,  //!< vGPU is runnable from a slept state (ACPI S3)
-  NVML_VGPU_VM_COMPATIBILITY_LIVE =
-  0x8,  //!< vGPU is runnable from a live/paused (ACPI S0)
+    NVML_VGPU_VM_COMPATIBILITY_NONE = 0x0,  //!< vGPU is not runnable
+    NVML_VGPU_VM_COMPATIBILITY_COLD =
+        0x1,  //!< vGPU is runnable from a cold / powered-off state (ACPI S5)
+    NVML_VGPU_VM_COMPATIBILITY_HIBERNATE =
+        0x2,                                 //!< vGPU is runnable from a hibernated state (ACPI S4)
+    NVML_VGPU_VM_COMPATIBILITY_SLEEP = 0x4,  //!< vGPU is runnable from a slept state (ACPI S3)
+    NVML_VGPU_VM_COMPATIBILITY_LIVE = 0x8,   //!< vGPU is runnable from a live/paused (ACPI S0)
 } nvmlVgpuVmCompatibility_t;
 
 /**
  *  vGPU-pGPU compatibility limit codes
  */
 typedef enum nvmlVgpuPgpuCompatibilityLimitCode_enum {
-  NVML_VGPU_COMPATIBILITY_LIMIT_NONE = 0x0,  //!< Compatibility is not limited.
-  NVML_VGPU_COMPATIBILITY_LIMIT_HOST_DRIVER =
-  0x1,  //!< Compatibility is limited by host driver version.
-  NVML_VGPU_COMPATIBILITY_LIMIT_GUEST_DRIVER =
-  0x2,  //!< Compatibility is limited by guest driver version.
-  NVML_VGPU_COMPATIBILITY_LIMIT_GPU =
-  0x4,  //!< Compatibility is limited by GPU hardware.
-  NVML_VGPU_COMPATIBILITY_LIMIT_OTHER =
-  0x80000000,  //!< Compatibility is limited by an undefined factor.
+    NVML_VGPU_COMPATIBILITY_LIMIT_NONE = 0x0,  //!< Compatibility is not limited.
+    NVML_VGPU_COMPATIBILITY_LIMIT_HOST_DRIVER =
+        0x1,  //!< Compatibility is limited by host driver version.
+    NVML_VGPU_COMPATIBILITY_LIMIT_GUEST_DRIVER =
+        0x2,                                  //!< Compatibility is limited by guest driver version.
+    NVML_VGPU_COMPATIBILITY_LIMIT_GPU = 0x4,  //!< Compatibility is limited by GPU hardware.
+    NVML_VGPU_COMPATIBILITY_LIMIT_OTHER =
+        0x80000000,  //!< Compatibility is limited by an undefined factor.
 } nvmlVgpuPgpuCompatibilityLimitCode_t;
 
 /**
  * vGPU-pGPU compatibility structure
  */
 typedef struct nvmlVgpuPgpuCompatibility_st {
-  nvmlVgpuVmCompatibility_t
-      vgpuVmCompatibility;  //!< Compatibility of vGPU VM. See \ref
-  //!< nvmlVgpuVmCompatibility_t
-  nvmlVgpuPgpuCompatibilityLimitCode_t
-      compatibilityLimitCode;  //!< Limiting factor for vGPU-pGPU compatibility.
-  //!< See \ref
-  //!< nvmlVgpuPgpuCompatibilityLimitCode_t
+    nvmlVgpuVmCompatibility_t vgpuVmCompatibility;  //!< Compatibility of vGPU VM. See \ref
+    //!< nvmlVgpuVmCompatibility_t
+    nvmlVgpuPgpuCompatibilityLimitCode_t
+        compatibilityLimitCode;  //!< Limiting factor for vGPU-pGPU compatibility.
+                                 //!< See \ref
+                                 //!< nvmlVgpuPgpuCompatibilityLimitCode_t
 } nvmlVgpuPgpuCompatibility_t;
 
 /**
  * Description of HWBC entry
  */
 typedef struct nvmlHwbcEntry_st {
-  unsigned int hwbcId;
-  char firmwareVersion[32];
+    unsigned int hwbcId;
+    char firmwareVersion[32];
 } nvmlHwbcEntry_t;
 
 typedef struct nvmlUnit_st *nvmlUnit_t;
@@ -1013,40 +967,39 @@ typedef struct nvmlUnit_st *nvmlUnit_t;
  * Fan state enum.
  */
 typedef enum nvmlFanState_enum {
-  NVML_FAN_NORMAL = 0,  //!< Fan is working properly
-  NVML_FAN_FAILED = 1   //!< Fan has failed
+    NVML_FAN_NORMAL = 0,  //!< Fan is working properly
+    NVML_FAN_FAILED = 1   //!< Fan has failed
 } nvmlFanState_t;
 
 /**
  * Fan speed reading for a single fan in an S-class unit.
  */
 typedef struct nvmlUnitFanInfo_st {
-  unsigned int speed;  //!< Fan speed (RPM)
-  nvmlFanState_t
-      state;  //!< Flag that indicates whether fan is working properly
+    unsigned int speed;    //!< Fan speed (RPM)
+    nvmlFanState_t state;  //!< Flag that indicates whether fan is working properly
 } nvmlUnitFanInfo_t;
 /**
  * Fan speed readings for an entire S-class unit.
  */
 typedef struct nvmlUnitFanSpeeds_st {
-  nvmlUnitFanInfo_t fans[24];  //!< Fan speed data for each fan
-  unsigned int count;          //!< Number of fans in unit
+    nvmlUnitFanInfo_t fans[24];  //!< Fan speed data for each fan
+    unsigned int count;          //!< Number of fans in unit
 } nvmlUnitFanSpeeds_t;
 
 /**
  * Led color enum.
  */
 typedef enum nvmlLedColor_enum {
-  NVML_LED_COLOR_GREEN = 0,  //!< GREEN, indicates good health
-  NVML_LED_COLOR_AMBER = 1   //!< AMBER, indicates problem
+    NVML_LED_COLOR_GREEN = 0,  //!< GREEN, indicates good health
+    NVML_LED_COLOR_AMBER = 1   //!< AMBER, indicates problem
 } nvmlLedColor_t;
 
 /**
  * LED states for an S-class unit.
  */
 typedef struct nvmlLedState_st {
-  char cause[256];       //!< If amber, a text description of the cause
-  nvmlLedColor_t color;  //!< GREEN or AMBER
+    char cause[256];       //!< If amber, a text description of the cause
+    nvmlLedColor_t color;  //!< GREEN or AMBER
 } nvmlLedState_t;
 
 /**
@@ -1065,74 +1018,73 @@ typedef struct nvmlLedState_st {
  *    - Short pin transition
  */
 typedef struct nvmlPSUInfo_st {
-  char state[256];       //!< The power supply state
-  unsigned int current;  //!< PSU current (A)
-  unsigned int voltage;  //!< PSU voltage (V)
-  unsigned int power;    //!< PSU power draw (W)
+    char state[256];       //!< The power supply state
+    unsigned int current;  //!< PSU current (A)
+    unsigned int voltage;  //!< PSU voltage (V)
+    unsigned int power;    //!< PSU power draw (W)
 } nvmlPSUInfo_t;
 
 /**
  * Static S-class unit info.
  */
 typedef struct nvmlUnitInfo_st {
-  char name[96];             //!< Product name
-  char id[96];               //!< Product identifier
-  char serial[96];           //!< Product serial number
-  char firmwareVersion[96];  //!< Firmware version
+    char name[96];             //!< Product name
+    char id[96];               //!< Product identifier
+    char serial[96];           //!< Product serial number
+    char firmwareVersion[96];  //!< Firmware version
 } nvmlUnitInfo_t;
 
 /*!
  * Types of VM identifiers
  */
 typedef enum nvmlVgpuVmIdType {
-  NVML_VGPU_VM_ID_DOMAIN_ID = 0,  //!< VM ID represents DOMAIN ID
-  NVML_VGPU_VM_ID_UUID = 1,       //!< VM ID represents UUID
+    NVML_VGPU_VM_ID_DOMAIN_ID = 0,  //!< VM ID represents DOMAIN ID
+    NVML_VGPU_VM_ID_UUID = 1,       //!< VM ID represents UUID
 } nvmlVgpuVmIdType_t;
 
 /**
  * Represents frame buffer capture session type
  */
 typedef enum nvmlFBCSessionType_enum {
-  NVML_FBC_SESSION_TYPE_UNKNOWN = 0,  //!< Unknown
-  NVML_FBC_SESSION_TYPE_TOSYS,        //!< ToSys
-  NVML_FBC_SESSION_TYPE_CUDA,         //!< Cuda
-  NVML_FBC_SESSION_TYPE_VID,          //!< Vid
-  NVML_FBC_SESSION_TYPE_HWENC,        //!< HEnc
+    NVML_FBC_SESSION_TYPE_UNKNOWN = 0,  //!< Unknown
+    NVML_FBC_SESSION_TYPE_TOSYS,        //!< ToSys
+    NVML_FBC_SESSION_TYPE_CUDA,         //!< Cuda
+    NVML_FBC_SESSION_TYPE_VID,          //!< Vid
+    NVML_FBC_SESSION_TYPE_HWENC,        //!< HEnc
 } nvmlFBCSessionType_t;
 
 /**
  * Structure to hold FBC session data
  */
 typedef struct nvmlFBCSessionInfo_st {
-  unsigned int sessionId;           //!< Unique session ID
-  unsigned int pid;                 //!< Owning process ID
-  nvmlVgpuInstance_t vgpuInstance;  //!< Owning vGPU instance ID (only valid on
-  //!< vGPU hosts, otherwise zero)
-  unsigned int displayOrdinal;       //!< Display identifier
-  nvmlFBCSessionType_t sessionType;  //!< Type of frame buffer capture session
-  unsigned int sessionFlags;         //!< Session flags (one or more of
-  //!< NVML_NVFBC_SESSION_FLAG_XXX).
-  unsigned int hMaxResolution;  //!< Max horizontal resolution supported by the
-  //!< capture session
-  unsigned int vMaxResolution;  //!< Max vertical resolution supported by the
-  //!< capture session
-  unsigned int hResolution;  //!< Horizontal resolution requested by caller in
-  //!< capture call
-  unsigned int
-      vResolution;  //!< Vertical resolution requested by caller in capture call
-  unsigned int averageFPS;  //!< Moving average new frames captured per second
-  unsigned int averageLatency;  //!< Moving average new frame capture latency in
-  //!< microseconds
+    unsigned int sessionId;           //!< Unique session ID
+    unsigned int pid;                 //!< Owning process ID
+    nvmlVgpuInstance_t vgpuInstance;  //!< Owning vGPU instance ID (only valid on
+    //!< vGPU hosts, otherwise zero)
+    unsigned int displayOrdinal;       //!< Display identifier
+    nvmlFBCSessionType_t sessionType;  //!< Type of frame buffer capture session
+    unsigned int sessionFlags;         //!< Session flags (one or more of
+    //!< NVML_NVFBC_SESSION_FLAG_XXX).
+    unsigned int hMaxResolution;  //!< Max horizontal resolution supported by the
+    //!< capture session
+    unsigned int vMaxResolution;  //!< Max vertical resolution supported by the
+    //!< capture session
+    unsigned int hResolution;  //!< Horizontal resolution requested by caller in
+    //!< capture call
+    unsigned int vResolution;     //!< Vertical resolution requested by caller in capture call
+    unsigned int averageFPS;      //!< Moving average new frames captured per second
+    unsigned int averageLatency;  //!< Moving average new frame capture latency in
+                                  //!< microseconds
 } nvmlFBCSessionInfo_t;
 
 /**
  * Structure to hold frame buffer capture sessions stats
  */
 typedef struct nvmlFBCStats_st {
-  unsigned int sessionsCount;  //!< Total no of sessions
-  unsigned int averageFPS;  //!< Moving average new frames captured per second
-  unsigned int averageLatency;  //!< Moving average new frame capture latency in
-  //!< microseconds
+    unsigned int sessionsCount;   //!< Total no of sessions
+    unsigned int averageFPS;      //!< Moving average new frames captured per second
+    unsigned int averageLatency;  //!< Moving average new frame capture latency in
+                                  //!< microseconds
 } nvmlFBCStats_t;
 
 /**
@@ -1149,34 +1101,34 @@ typedef struct nvmlFBCStats_st {
  * Blacklist GPU device information
  **/
 typedef struct nvmlBlacklistDeviceInfo_st {
-  nvmlPciInfo_t pciInfo;  //!< The PCI information for the blacklisted GPU
-  char uuid[NVML_DEVICE_UUID_BUFFER_SIZE];  //!< The ASCII string UUID for the
-  //!< blacklisted GPU
+    nvmlPciInfo_t pciInfo;                    //!< The PCI information for the blacklisted GPU
+    char uuid[NVML_DEVICE_UUID_BUFFER_SIZE];  //!< The ASCII string UUID for the
+                                              //!< blacklisted GPU
 } nvmlBlacklistDeviceInfo_t;
 
 /**
  * Structure representing a range of vGPU version
  */
 typedef struct nvmlVgpuVersion_st {
-  unsigned int minVersion;  //!< Minimum vGPU version.
-  unsigned int maxVersion;  //!< Maximum vGPU version.
+    unsigned int minVersion;  //!< Minimum vGPU version.
+    unsigned int maxVersion;  //!< Maximum vGPU version.
 } nvmlVgpuVersion_t;
 
 /**
  * Host vGPU modes
  */
 typedef enum nvmlHostVgpuMode_enum {
-  NVML_HOST_VGPU_MODE_NON_SRIOV = 0,  //!< Non SR-IOV mode
-  NVML_HOST_VGPU_MODE_SRIOV = 1       //!< SR-IOV mode
+    NVML_HOST_VGPU_MODE_NON_SRIOV = 0,  //!< Non SR-IOV mode
+    NVML_HOST_VGPU_MODE_SRIOV = 1       //!< SR-IOV mode
 } nvmlHostVgpuMode_t;
 
 typedef struct nvmlGpuInstance_st *nvmlGpuInstance_t;
 
 typedef struct nvmlComputeInstanceInfo_st {
-  nvmlDevice_t device;           //!< Parent device
-  nvmlGpuInstance_t gpuInstance; //!< Parent GPU instance
-  unsigned int id;               //!< Unique instance ID within the GPU instance
-  unsigned int profileId;        //!< Unique profile ID within the GPU instance
+    nvmlDevice_t device;            //!< Parent device
+    nvmlGpuInstance_t gpuInstance;  //!< Parent GPU instance
+    unsigned int id;                //!< Unique instance ID within the GPU instance
+    unsigned int profileId;         //!< Unique profile ID within the GPU instance
 } nvmlComputeInstanceInfo_t;
 
 typedef struct nvmlComputeInstance_st *nvmlComputeInstance_t;
@@ -1184,52 +1136,52 @@ typedef struct nvmlComputeInstance_st *nvmlComputeInstance_t;
 typedef unsigned int nvmlDeviceArchitecture_t;
 
 typedef struct nvmlDeviceAttributes_st {
-  unsigned int multiprocessorCount;   //!< Streaming Multiprocessor count
-  unsigned int sharedCopyEngineCount; //!< Shared Copy Engine count
-  unsigned int sharedDecoderCount;    //!< Shared Decoder Engine count
-  unsigned int sharedEncoderCount;    //!< Shared Encoder Engine count
-  unsigned int sharedJpegCount;       //!< Shared JPEG Engine count
-  unsigned int sharedOfaCount;        //!< Shared OFA Engine count
+    unsigned int multiprocessorCount;    //!< Streaming Multiprocessor count
+    unsigned int sharedCopyEngineCount;  //!< Shared Copy Engine count
+    unsigned int sharedDecoderCount;     //!< Shared Decoder Engine count
+    unsigned int sharedEncoderCount;     //!< Shared Encoder Engine count
+    unsigned int sharedJpegCount;        //!< Shared JPEG Engine count
+    unsigned int sharedOfaCount;         //!< Shared OFA Engine count
 } nvmlDeviceAttributes_t;
 
 typedef unsigned int nvmlAffinityScope_t;
 
 typedef struct nvmlGpuInstancePlacement_st {
-  unsigned int start;
-  unsigned int size;
+    unsigned int start;
+    unsigned int size;
 } nvmlGpuInstancePlacement_t;
 
 typedef struct nvmlGpuInstanceProfileInfo_st {
-  unsigned int id;                  //!< Unique profile ID within the device
-  unsigned int isP2pSupported;      //!< Peer-to-Peer support
-  unsigned int sliceCount;          //!< GPU Slice count
-  unsigned int instanceCount;       //!< GPU instance count
-  unsigned int multiprocessorCount; //!< Streaming Multiprocessor count
-  unsigned int copyEngineCount;     //!< Copy Engine count
-  unsigned int decoderCount;        //!< Decoder Engine count
-  unsigned int encoderCount;        //!< Encoder Engine count
-  unsigned int jpegCount;           //!< JPEG Engine count
-  unsigned int ofaCount;            //!< OFA Engine count
-  unsigned long long memorySizeMB;  //!< Memory size in MBytes
+    unsigned int id;                   //!< Unique profile ID within the device
+    unsigned int isP2pSupported;       //!< Peer-to-Peer support
+    unsigned int sliceCount;           //!< GPU Slice count
+    unsigned int instanceCount;        //!< GPU instance count
+    unsigned int multiprocessorCount;  //!< Streaming Multiprocessor count
+    unsigned int copyEngineCount;      //!< Copy Engine count
+    unsigned int decoderCount;         //!< Decoder Engine count
+    unsigned int encoderCount;         //!< Encoder Engine count
+    unsigned int jpegCount;            //!< JPEG Engine count
+    unsigned int ofaCount;             //!< OFA Engine count
+    unsigned long long memorySizeMB;   //!< Memory size in MBytes
 } nvmlGpuInstanceProfileInfo_t;
 
 typedef struct nvmlComputeInstanceProfileInfo_st {
-  unsigned int id;                    //!< Unique profile ID within the GPU instance
-  unsigned int sliceCount;            //!< GPU Slice count
-  unsigned int instanceCount;         //!< Compute instance count
-  unsigned int multiprocessorCount;   //!< Streaming Multiprocessor count
-  unsigned int sharedCopyEngineCount; //!< Shared Copy Engine count
-  unsigned int sharedDecoderCount;    //!< Shared Decoder Engine count
-  unsigned int sharedEncoderCount;    //!< Shared Encoder Engine count
-  unsigned int sharedJpegCount;       //!< Shared JPEG Engine count
-  unsigned int sharedOfaCount;        //!< Shared OFA Engine count
+    unsigned int id;                     //!< Unique profile ID within the GPU instance
+    unsigned int sliceCount;             //!< GPU Slice count
+    unsigned int instanceCount;          //!< Compute instance count
+    unsigned int multiprocessorCount;    //!< Streaming Multiprocessor count
+    unsigned int sharedCopyEngineCount;  //!< Shared Copy Engine count
+    unsigned int sharedDecoderCount;     //!< Shared Decoder Engine count
+    unsigned int sharedEncoderCount;     //!< Shared Encoder Engine count
+    unsigned int sharedJpegCount;        //!< Shared JPEG Engine count
+    unsigned int sharedOfaCount;         //!< Shared OFA Engine count
 } nvmlComputeInstanceProfileInfo_t;
 
 typedef struct nvmlGpuInstanceInfo_st {
-  nvmlDevice_t device;                  //!< Parent device
-  unsigned int id;                      //!< Unique instance ID within the device
-  unsigned int profileId;               //!< Unique profile ID within the device
-  nvmlGpuInstancePlacement_t placement; //!< Placement for this instance
+    nvmlDevice_t device;                   //!< Parent device
+    unsigned int id;                       //!< Unique instance ID within the device
+    unsigned int profileId;                //!< Unique profile ID within the device
+    nvmlGpuInstancePlacement_t placement;  //!< Placement for this instance
 } nvmlGpuInstanceInfo_t;
 
 /**
@@ -1239,11 +1191,11 @@ typedef struct nvmlGpuInstanceInfo_st {
  * rows available.
  */
 typedef struct nvmlRowRemapperHistogramValues_st {
-  unsigned int max;
-  unsigned int high;
-  unsigned int partial;
-  unsigned int low;
-  unsigned int none;
+    unsigned int max;
+    unsigned int high;
+    unsigned int partial;
+    unsigned int low;
+    unsigned int none;
 } nvmlRowRemapperHistogramValues_t;
 
 #ifdef __cplusplus
diff --git a/src/include/nvml_override.h b/src/include/nvml_override.h
index 4d0f5b00..cc663400 100644
--- a/src/include/nvml_override.h
+++ b/src/include/nvml_override.h
@@ -1,43 +1,36 @@
 #ifndef __NVML_OVERRIDE_H__
 #define __NVML_OVERRIDE_H__
 
-typedef struct nvmlProcessInfo_st1
-{
-    unsigned int        pid;                //!< Process ID
-    unsigned long long  usedGpuMemory;      //!< Amount of used GPU memory in bytes.
-                                            //! Under WDDM, \ref NVML_VALUE_NOT_AVAILABLE is always reported
-                                            //! because Windows KMD manages all the memory and not the NVIDIA driver
+typedef struct nvmlProcessInfo_st1 {
+    unsigned int pid;  //!< Process ID
+    unsigned long long
+        usedGpuMemory;  //!< Amount of used GPU memory in bytes.
+                        //! Under WDDM, \ref NVML_VALUE_NOT_AVAILABLE is always reported
+                        //! because Windows KMD manages all the memory and not the NVIDIA driver
 } nvmlProcessInfo_t1;
 
+nvmlReturn_t nvmlInternalGetExportTable(const void **ppExportTable, void *pExportTableId);
 
-nvmlReturn_t nvmlInternalGetExportTable(const void **ppExportTable,
-                                        void *pExportTableId);
+// nvmlReturn_t nvmlDeviceSetTemperatureThreshold(
+//     nvmlDevice_t device, nvmlTemperatureThresholds_t thresholdType, int *temp);
 
+// nvmlReturn_t nvmlVgpuInstanceGetGpuInstanceId(nvmlVgpuInstance_t vgpuInstance,
+//                                               unsigned int *gpuInstanceId);
 
-//nvmlReturn_t nvmlDeviceSetTemperatureThreshold(
-//    nvmlDevice_t device, nvmlTemperatureThresholds_t thresholdType, int *temp);
-
-
-//nvmlReturn_t nvmlVgpuInstanceGetGpuInstanceId(nvmlVgpuInstance_t vgpuInstance,
-//                                              unsigned int *gpuInstanceId);
-
-//nvmlReturn_t nvmlDeviceGetMemoryInfo_v2(nvmlDevice_t device, nvmlMemory_v2_t* memory);
+// nvmlReturn_t nvmlDeviceGetMemoryInfo_v2(nvmlDevice_t device, nvmlMemory_v2_t* memory);
 
 nvmlReturn_t nvmlDeviceGetPciInfo_v2(nvmlDevice_t device, nvmlPciInfo_t *pci);
 
 nvmlReturn_t nvmlDeviceGetGridLicensableFeatures_v2(
-    nvmlDevice_t device,
-    nvmlGridLicensableFeatures_t *pGridLicensableFeatures);
+    nvmlDevice_t device, nvmlGridLicensableFeatures_t *pGridLicensableFeatures);
 
 nvmlReturn_t nvmlDeviceGetGridLicensableFeatures_v3(
-    nvmlDevice_t device,
-    nvmlGridLicensableFeatures_t *pGridLicensableFeatures);
+    nvmlDevice_t device, nvmlGridLicensableFeatures_t *pGridLicensableFeatures);
 
-nvmlReturn_t nvmlDeviceGetComputeRunningProcesses_v2(nvmlDevice_t device,
-                                                     unsigned int *infoCount,
+nvmlReturn_t nvmlDeviceGetComputeRunningProcesses_v2(nvmlDevice_t device, unsigned int *infoCount,
                                                      nvmlProcessInfo_v2_t *infos);
 
-nvmlReturn_t nvmlDeviceGetGraphicsRunningProcesses_v2(
-    nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_v2_t *infos);
+nvmlReturn_t nvmlDeviceGetGraphicsRunningProcesses_v2(nvmlDevice_t device, unsigned int *infoCount,
+                                                      nvmlProcessInfo_v2_t *infos);
 
 #endif
\ No newline at end of file
diff --git a/src/include/nvml_prefix.h b/src/include/nvml_prefix.h
index 361f6b62..437900ba 100644
--- a/src/include/nvml_prefix.h
+++ b/src/include/nvml_prefix.h
@@ -4,23 +4,23 @@
 #define NVML_NO_UNVERSIONED_FUNC_DEFS
 
 #ifndef NVML_NO_UNVERSIONED_FUNC_DEFS
-#define nvmlInit                                nvmlInit_v2
-#define nvmlDeviceGetPciInfo                    nvmlDeviceGetPciInfo_v3
-#define nvmlDeviceGetCount                      nvmlDeviceGetCount_v2
-#define nvmlDeviceGetHandleByIndex              nvmlDeviceGetHandleByIndex_v2
-#define nvmlDeviceGetHandleByPciBusId           nvmlDeviceGetHandleByPciBusId_v2
-#define nvmlDeviceGetNvLinkRemotePciInfo        nvmlDeviceGetNvLinkRemotePciInfo_v2
-#define nvmlDeviceRemoveGpu                     nvmlDeviceRemoveGpu_v2
-#define nvmlDeviceGetGridLicensableFeatures     nvmlDeviceGetGridLicensableFeatures_v3
-#define nvmlEventSetWait                        nvmlEventSetWait_v2
-#define nvmlDeviceGetAttributes                 nvmlDeviceGetAttributes_v2
-#define nvmlDeviceGetComputeRunningProcesses    nvmlDeviceGetComputeRunningProcesses_v2
-#define nvmlDeviceGetGraphicsRunningProcesses   nvmlDeviceGetGraphicsRunningProcesses_v2
+#define nvmlInit nvmlInit_v2
+#define nvmlDeviceGetPciInfo nvmlDeviceGetPciInfo_v3
+#define nvmlDeviceGetCount nvmlDeviceGetCount_v2
+#define nvmlDeviceGetHandleByIndex nvmlDeviceGetHandleByIndex_v2
+#define nvmlDeviceGetHandleByPciBusId nvmlDeviceGetHandleByPciBusId_v2
+#define nvmlDeviceGetNvLinkRemotePciInfo nvmlDeviceGetNvLinkRemotePciInfo_v2
+#define nvmlDeviceRemoveGpu nvmlDeviceRemoveGpu_v2
+#define nvmlDeviceGetGridLicensableFeatures nvmlDeviceGetGridLicensableFeatures_v3
+#define nvmlEventSetWait nvmlEventSetWait_v2
+#define nvmlDeviceGetAttributes nvmlDeviceGetAttributes_v2
+#define nvmlDeviceGetComputeRunningProcesses nvmlDeviceGetComputeRunningProcesses_v2
+#define nvmlDeviceGetGraphicsRunningProcesses nvmlDeviceGetGraphicsRunningProcesses_v2
 #endif
 
 typedef struct {
-  void *fn_ptr;
-  char *name;
+    void *fn_ptr;
+    char *name;
 } entry_t;
 
 extern entry_t nvml_library_entry[];
diff --git a/src/include/process_utils.h b/src/include/process_utils.h
old mode 100755
new mode 100644
index 4feabca0..17688eec
--- a/src/include/process_utils.h
+++ b/src/include/process_utils.h
@@ -1,10 +1,10 @@
 #ifndef __UTILS_PROCESS_UTILS_H__
-#define __UTILS_PROCESS_UTILS_H__ 
+#define __UTILS_PROCESS_UTILS_H__
 
-#include <stdio.h>
 #include <dirent.h>
-#include <unistd.h>
+#include <stdio.h>
 #include <stdlib.h>
+#include <unistd.h>
 
 #define BUFFER_LENGTH 8192  // ensure larger than linux max filename length
 #define FILENAME_LENGTH 8192
@@ -13,13 +13,12 @@
 #define PROC_STATE_NONALIVE 1
 #define PROC_STATE_UNKNOWN 2
 
-
 int proc_alive(int32_t pid) {
     char filename[FILENAME_LENGTH] = {0};
     sprintf(filename, "/proc/%d/stat", pid);
 
     FILE* fp;
-    if ((fp = fopen(filename, "r")) == NULL) {   
+    if ((fp = fopen(filename, "r")) == NULL) {
         return PROC_STATE_NONALIVE;
     }
 
@@ -39,5 +38,4 @@ int proc_alive(int32_t pid) {
     return res;
 }
 
-
 #endif  // __UTILS_PROCESS_UTILS_H__
diff --git a/src/include/utils.h b/src/include/utils.h
index 8a40012e..f636cbf8 100644
--- a/src/include/utils.h
+++ b/src/include/utils.h
@@ -1,14 +1,13 @@
-#include <sys/stat.h>
-#include <sys/types.h>
 #include <fcntl.h>
 #include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
 #include <unistd.h>
 
-
 int try_lock_unified_lock();
 int try_unlock_unified_lock();
 
-//Nvml part utils
+// Nvml part utils
 void sort(int vmap[16]);
 int initial_virtual_devices();
 int parser(char *str);
diff --git a/src/libvgpu.c b/src/libvgpu.c
index befc16c3..934ef19e 100644
--- a/src/libvgpu.c
+++ b/src/libvgpu.c
@@ -1,21 +1,22 @@
-//#include "memory_limit.h"
-#include <fcntl.h>
+// #include "memory_limit.h"
+#include "include/libvgpu.h"
+
 #include <dlfcn.h>
-#include <pthread.h>
-#include "include/nvml_prefix.h"
+#include <fcntl.h>
 #include <nvml.h>
-#include "include/nvml_prefix.h"
-#include "include/log_utils.h"
+#include <pthread.h>
+
+#include "allocator/allocator.h"
 #include "include/libcuda_hook.h"
-#include "include/libvgpu.h"
-#include "include/utils.h"
+#include "include/log_utils.h"
 #include "include/nvml_override.h"
-#include "allocator/allocator.h"
+#include "include/nvml_prefix.h"
+#include "include/utils.h"
 #include "multiprocess/multiprocess_memory_limit.h"
 
 extern void init_utilization_watcher(void);
 extern void utilization_watcher(void);
-extern void initial_virtual_map(void); 
+extern void initial_virtual_map(void);
 extern int set_host_pid(int hostpid);
 extern void allocator_init(void);
 void preInit();
@@ -26,8 +27,8 @@ pthread_once_t pre_cuinit_flag = PTHREAD_ONCE_INIT;
 pthread_once_t post_cuinit_flag = PTHREAD_ONCE_INIT;
 pthread_once_t dlsym_init_flag = PTHREAD_ONCE_INIT;
 
-/* pidfound is to enable core utilization, if we don't find hostpid in container, then we have no
- where to find its core utilization */
+/* pidfound is to enable core utilization, if we don't find hostpid in
+ container, then we have no where to find its core utilization */
 extern int pidfound;
 
 /* used to switch on/off the core utilization limitation*/
@@ -43,25 +44,24 @@ pthread_mutex_t dlsym_lock;
 typedef struct {
     pthread_t tid;
     void *pointer;
-}tid_dl_map;
+} tid_dl_map;
 
 #define DLMAP_SIZE 100
 tid_dl_map dlmap[DLMAP_SIZE];
-int dlmap_count=0;
+int dlmap_count = 0;
 
-void init_dlsym(){
+void init_dlsym() {
     LOG_DEBUG("init_dlsym\n");
-    pthread_mutex_init(&dlsym_lock,NULL);
-    dlmap_count=0;
-    memset(dlmap, 0, sizeof(tid_dl_map)*DLMAP_SIZE);
+    pthread_mutex_init(&dlsym_lock, NULL);
+    dlmap_count = 0;
+    memset(dlmap, 0, sizeof(tid_dl_map) * DLMAP_SIZE);
 }
 
-int check_dlmap(pthread_t tid, void *pointer){
+int check_dlmap(pthread_t tid, void *pointer) {
     int i;
     int cursor = (dlmap_count < DLMAP_SIZE) ? dlmap_count : DLMAP_SIZE;
-    for (i=cursor-1; i>=0; i--) {
-        if ((dlmap[i].pointer == pointer) && pthread_equal(dlmap[i].tid, tid))
-            return 1;
+    for (i = cursor - 1; i >= 0; i--) {
+        if ((dlmap[i].pointer == pointer) && pthread_equal(dlmap[i].tid, tid)) return 1;
     }
     cursor = dlmap_count % DLMAP_SIZE;
     dlmap[cursor].tid = tid;
@@ -70,20 +70,14 @@ int check_dlmap(pthread_t tid, void *pointer){
     return 0;
 }
 
-FUNC_ATTR_VISIBLE void* dlsym(void* handle, const char* symbol) {
-    LOG_DEBUG("into dlsym %s",symbol);
-    pthread_once(&dlsym_init_flag,init_dlsym);
+FUNC_ATTR_VISIBLE void *dlsym(void *handle, const char *symbol) {
+    LOG_DEBUG("into dlsym %s", symbol);
+    pthread_once(&dlsym_init_flag, init_dlsym);
     if (real_dlsym == NULL) {
-        const char* glibc_versions[] = {
-                "GLIBC_2.2.5",  // for amd64
-                "GLIBC_2.17",   // for arm64
-                "GLIBC_2.3",
-                "GLIBC_2.4",
-                "GLIBC_2.10",
-                "GLIBC_2.18",
-                "GLIBC_2.22",
-                NULL
-        };
+        const char *glibc_versions[] = {"GLIBC_2.2.5",  // for amd64
+                                        "GLIBC_2.17",   // for arm64
+                                        "GLIBC_2.3",   "GLIBC_2.4",  "GLIBC_2.10",
+                                        "GLIBC_2.18",  "GLIBC_2.22", NULL};
         for (int i = 0; glibc_versions[i] != NULL; i++) {
             real_dlsym = dlvsym(RTLD_NEXT, "dlsym", glibc_versions[i]);
             if (real_dlsym != NULL) {
@@ -91,11 +85,11 @@ FUNC_ATTR_VISIBLE void* dlsym(void* handle, const char* symbol) {
                 break;
             }
         }
-        char *path_search=getenv("CUDA_REDIRECT");
-        if ((path_search!=NULL) && (strlen(path_search)>0)){
-            vgpulib = dlopen(path_search,RTLD_LAZY);
-        }else{
-            vgpulib = dlopen("/usr/local/vgpu/libvgpu.so",RTLD_LAZY);
+        char *path_search = getenv("CUDA_REDIRECT");
+        if ((path_search != NULL) && (strlen(path_search) > 0)) {
+            vgpulib = dlopen(path_search, RTLD_LAZY);
+        } else {
+            vgpulib = dlopen("/usr/local/vgpu/libvgpu.so", RTLD_LAZY);
         }
         if (real_dlsym == NULL) {
             LOG_ERROR("real dlsym not found");
@@ -103,33 +97,30 @@ FUNC_ATTR_VISIBLE void* dlsym(void* handle, const char* symbol) {
             if (libc_handle) {
                 real_dlsym = dlsym(libc_handle, "dlsym");
             }
-            if (real_dlsym == NULL)
-                LOG_ERROR("real dlsym not found after trying libc.so.6");
+            if (real_dlsym == NULL) LOG_ERROR("real dlsym not found after trying libc.so.6");
         }
     }
     if (handle == RTLD_NEXT) {
-        void *h = real_dlsym(RTLD_NEXT,symbol);
+        void *h = real_dlsym(RTLD_NEXT, symbol);
         pthread_mutex_lock(&dlsym_lock);
         pthread_t tid = pthread_self();
-        if (check_dlmap(tid,h)){
-            LOG_WARN("recursive dlsym : %s\n",symbol);
+        if (check_dlmap(tid, h)) {
+            LOG_WARN("recursive dlsym : %s\n", symbol);
             h = NULL;
         }
         pthread_mutex_unlock(&dlsym_lock);
         return h;
     }
     if (symbol[0] == 'c' && symbol[1] == 'u') {
-        //Compatible with cuda 12.8+ fix
-        if (strcmp(symbol,"cuGetExportTable")!=0)
-            pthread_once(&pre_cuinit_flag,(void(*)(void))preInit);
-        void *f = real_dlsym(vgpulib,symbol);
-        if (f!=NULL)
-            return f;
+        // Compatible with cuda 12.8+ fix
+        if (strcmp(symbol, "cuGetExportTable") != 0)
+            pthread_once(&pre_cuinit_flag, (void (*)(void))preInit);
+        void *f = real_dlsym(vgpulib, symbol);
+        if (f != NULL) return f;
     }
-    #ifdef HOOK_NVML_ENABLE
-    if (symbol[0] == 'n' && symbol[1] == 'v' &&
-          symbol[2] == 'm' && symbol[3] == 'l' ) {
-        void* f = __dlsym_hook_section_nvml(handle, symbol);
+#ifdef HOOK_NVML_ENABLE
+    if (symbol[0] == 'n' && symbol[1] == 'v' && symbol[2] == 'm' && symbol[3] == 'l') {
+        void *f = __dlsym_hook_section_nvml(handle, symbol);
         if (f != NULL) {
             return f;
         }
@@ -138,14 +129,14 @@ FUNC_ATTR_VISIBLE void* dlsym(void* handle, const char* symbol) {
     return real_dlsym(handle, symbol);
 }
 
-void* __dlsym_hook_section(void* handle, const char* symbol) {
+void *__dlsym_hook_section(void *handle, const char *symbol) {
     int it;
-    for (it=0;it<CUDA_ENTRY_END;it++){
-        if (strcmp(cuda_library_entry[it].name,symbol) == 0){
+    for (it = 0; it < CUDA_ENTRY_END; it++) {
+        if (strcmp(cuda_library_entry[it].name, symbol) == 0) {
             if (cuda_library_entry[it].fn_ptr == NULL) {
                 LOG_WARN("NEED TO RETURN NULL");
                 return NULL;
-            }else{
+            } else {
                 break;
             }
         }
@@ -153,8 +144,8 @@ void* __dlsym_hook_section(void* handle, const char* symbol) {
     DLSYM_HOOK_FUNC(cuInit);
     DLSYM_HOOK_FUNC(cuGetProcAddress);
     DLSYM_HOOK_FUNC(cuGetProcAddress_v2);
-    //Context
-    //DLSYM_HOOK_FUNC(cuCtxGetDevice);
+    // Context
+    // DLSYM_HOOK_FUNC(cuCtxGetDevice);
 #if CUDA_VERSION < 13000
     DLSYM_HOOK_FUNC(cuCtxCreate_v2);
     DLSYM_HOOK_FUNC(cuCtxCreate_v3);
@@ -183,8 +174,8 @@ void* __dlsym_hook_section(void* handle, const char* symbol) {
     DLSYM_HOOK_FUNC(cuCtxSetLimit);
     DLSYM_HOOK_FUNC(cuCtxSetSharedMemConfig);
     DLSYM_HOOK_FUNC(cuCtxSynchronize);
-    //DLSYM_HOOK_FUNC(cuCtxEnablePeerAccess);
-    //DLSYM_HOOK_FUNC(cuGetExportTable);
+    // DLSYM_HOOK_FUNC(cuCtxEnablePeerAccess);
+    // DLSYM_HOOK_FUNC(cuGetExportTable);
     DLSYM_HOOK_FUNC(cuArray3DCreate_v2);
     DLSYM_HOOK_FUNC(cuArrayCreate_v2);
     DLSYM_HOOK_FUNC(cuArrayDestroy);
@@ -315,7 +306,7 @@ void* __dlsym_hook_section(void* handle, const char* symbol) {
     DLSYM_HOOK_FUNC(cuSignalExternalSemaphoresAsync);
     DLSYM_HOOK_FUNC(cuWaitExternalSemaphoresAsync);
     DLSYM_HOOK_FUNC(cuDestroyExternalSemaphore);
-    // cuda Graph 
+    // cuda Graph
     DLSYM_HOOK_FUNC(cuGraphCreate);
     DLSYM_HOOK_FUNC(cuGraphAddKernelNode_v2);
     DLSYM_HOOK_FUNC(cuGraphKernelNodeGetParams_v2);
@@ -374,7 +365,7 @@ void* __dlsym_hook_section(void* handle, const char* symbol) {
     return NULL;
 }
 
-void* __dlsym_hook_section_nvml(void* handle, const char* symbol) {
+void *__dlsym_hook_section_nvml(void *handle, const char *symbol) {
     DLSYM_HOOK_FUNC(nvmlInit);
     /** nvmlShutdown */
     DLSYM_HOOK_FUNC(nvmlShutdown);
@@ -857,7 +848,7 @@ void* __dlsym_hook_section_nvml(void* handle, const char* symbol) {
     /** nvmlDeviceSetTemperatureThreshold */
     DLSYM_HOOK_FUNC(nvmlDeviceSetTemperatureThreshold);
     /** nvmlRetry_NvRmControl */
-    //DLSYM_HOOK_FUNC(nvmlRetry_NvRmControl);
+    // DLSYM_HOOK_FUNC(nvmlRetry_NvRmControl);
     /** nvmlVgpuInstanceGetGpuInstanceId */
     DLSYM_HOOK_FUNC(nvmlVgpuInstanceGetGpuInstanceId);
     /** nvmlVgpuTypeGetGpuInstanceProfileId */
@@ -865,11 +856,11 @@ void* __dlsym_hook_section_nvml(void* handle, const char* symbol) {
     return NULL;
 }
 
-void preInit(){
+void preInit() {
     log_utils_init();
     LOG_MSG("Initializing.....");
     if (real_dlsym == NULL) {
-        real_dlsym = dlvsym(RTLD_NEXT,"dlsym","GLIBC_2.2.5");
+        real_dlsym = dlvsym(RTLD_NEXT, "dlsym", "GLIBC_2.2.5");
         if (real_dlsym == NULL) {
             LOG_ERROR("real dlsym not found");
             void *libc_handle = dlopen("libc.so.6", RTLD_LAZY);
@@ -882,11 +873,11 @@ void preInit(){
     }
     real_realpath = NULL;
     load_cuda_libraries();
-    //nvmlInit();
+    // nvmlInit();
     ENSURE_INITIALIZED();
 }
 
-void postInit(){
+void postInit() {
     allocator_init();
     map_cuda_visible_devices();
 
@@ -914,20 +905,20 @@ void postInit(){
         pidfound = 1;
     }
 
-    //add_gpu_device_memory_usage(getpid(),0,context_size,0);
+    // add_gpu_device_memory_usage(getpid(),0,context_size,0);
     env_utilization_switch = set_env_utilization_switch();
     init_utilization_watcher();
 }
 
-CUresult cuInit(unsigned int Flags){
+CUresult cuInit(unsigned int Flags) {
     LOG_INFO("Into cuInit");
-    pthread_once(&pre_cuinit_flag,(void(*)(void))preInit);
+    pthread_once(&pre_cuinit_flag, (void (*)(void))preInit);
     ENSURE_INITIALIZED();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuInit,Flags);
-    if (res != CUDA_SUCCESS){
-        LOG_ERROR("cuInit failed:%d",res);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuInit, Flags);
+    if (res != CUDA_SUCCESS) {
+        LOG_ERROR("cuInit failed:%d", res);
         return res;
     }
-    pthread_once(&post_cuinit_flag, (void(*) (void))postInit);
+    pthread_once(&post_cuinit_flag, (void (*)(void))postInit);
     return CUDA_SUCCESS;
 }
diff --git a/src/multiprocess/common_header.h b/src/multiprocess/common_header.h
old mode 100755
new mode 100644
index fbab37fc..bfc5ff7d
--- a/src/multiprocess/common_header.h
+++ b/src/multiprocess/common_header.h
@@ -9,4 +9,3 @@
 #include "../memory_limit.h"
 
 #endif
-
diff --git a/src/multiprocess/multiprocess_memory_limit.c b/src/multiprocess/multiprocess_memory_limit.c
old mode 100755
new mode 100644
index fce713ba..99d4779a
--- a/src/multiprocess/multiprocess_memory_limit.c
+++ b/src/multiprocess/multiprocess_memory_limit.c
@@ -1,26 +1,25 @@
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/stat.h>
+#include "multiprocess/multiprocess_memory_limit.h"
+
+#include <assert.h>
+#include <cuda.h>
+#include <errno.h>
 #include <fcntl.h>
+#include <nvml.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <errno.h>
-#include <stddef.h>
-#include <semaphore.h>
-#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
 #include <time.h>
-#include <signal.h>
+#include <unistd.h>
 
-#include <assert.h>
-#include <cuda.h>
+#include "include/memory_limit.h"
 #include "include/nvml_prefix.h"
-#include <nvml.h>
-
 #include "include/process_utils.h"
-#include "include/memory_limit.h"
-#include "multiprocess/multiprocess_memory_limit.h"
-
 
 #ifndef SEM_WAIT_TIME
 #define SEM_WAIT_TIME 10
@@ -52,8 +51,8 @@ static shared_region_info_t region_info = {0, -1, PTHREAD_ONCE_INIT, NULL, 0, NU
 int env_utilization_switch;
 int enable_active_oom_killer;
 size_t context_size;
-size_t initial_offset=0;
-//lock for record kernel time
+size_t initial_offset = 0;
+// lock for record kernel time
 pthread_mutex_t _kernel_mutex;
 int _record_kernel_interval = 1;
 
@@ -62,7 +61,7 @@ int _record_kernel_interval = 1;
 void do_init_device_memory_limits(uint64_t*, int);
 void exit_withlock(int exitcode);
 
-void set_current_gpu_status(int status){
+void set_current_gpu_status(int status) {
     // Fast path: use cached slot if available
     if (region_info.my_slot != NULL) {
         atomic_store_explicit(&region_info.my_slot->status, status, memory_order_release);
@@ -74,22 +73,19 @@ void set_current_gpu_status(int status){
     int i;
     int32_t my_pid = getpid();
     for (i = 0; i < proc_num; i++) {
-        int32_t slot_pid = atomic_load_explicit(&region_info.shared_region->procs[i].pid, memory_order_acquire);
+        int32_t slot_pid =
+            atomic_load_explicit(&region_info.shared_region->procs[i].pid, memory_order_acquire);
         if (my_pid == slot_pid) {
-            atomic_store_explicit(&region_info.shared_region->procs[i].status, status, memory_order_release);
+            atomic_store_explicit(&region_info.shared_region->procs[i].status, status,
+                                  memory_order_release);
             return;
         }
     }
 }
 
-void sig_restore_stub(int signo){
-    set_current_gpu_status(1);
-}
-
-void sig_swap_stub(int signo){
-    set_current_gpu_status(2);
-}
+void sig_restore_stub(int signo) { set_current_gpu_status(1); }
 
+void sig_swap_stub(int signo) { set_current_gpu_status(2); }
 
 // get device memory from env
 size_t get_limit_from_env(const char* env_name) {
@@ -118,13 +114,13 @@ size_t get_limit_from_env(const char* env_name) {
     size_t res = strtoul(env_limit, &digit_end, 0);
     size_t scaled_res = res * scalar;
     if (scaled_res == 0) {
-        if (env_name[12]=='S'){
-            LOG_INFO("device core util limit set to 0, which means no limit: %s=%s",
-                env_name, env_limit);
-        }else if (env_name[12]=='M'){
-            LOG_WARN("invalid device memory limit %s=%s",env_name,env_limit);
-        }else{
-            LOG_WARN("invalid env name:%s",env_name);
+        if (env_name[12] == 'S') {
+            LOG_INFO("device core util limit set to 0, which means no limit: %s=%s", env_name,
+                     env_limit);
+        } else if (env_name[12] == 'M') {
+            LOG_WARN("invalid device memory limit %s=%s", env_name, env_limit);
+        } else {
+            LOG_WARN("invalid env name:%s", env_name);
         }
         return 0;
     }
@@ -136,36 +132,33 @@ size_t get_limit_from_env(const char* env_name) {
 }
 
 int init_device_info() {
-    unsigned int i,nvmlDevicesCount;
+    unsigned int i, nvmlDevicesCount;
     CHECK_NVML_API(nvmlDeviceGetCount_v2(&nvmlDevicesCount));
-    region_info.shared_region->device_num=nvmlDevicesCount;
+    region_info.shared_region->device_num = nvmlDevicesCount;
     nvmlDevice_t dev;
-    for(i=0;i<nvmlDevicesCount;i++){
+    for (i = 0; i < nvmlDevicesCount; i++) {
         CHECK_NVML_API(nvmlDeviceGetHandleByIndex(i, &dev));
-        CHECK_NVML_API(nvmlDeviceGetUUID(dev,region_info.shared_region->uuids[i],NVML_DEVICE_UUID_V2_BUFFER_SIZE));
+        CHECK_NVML_API(nvmlDeviceGetUUID(dev, region_info.shared_region->uuids[i],
+                                         NVML_DEVICE_UUID_V2_BUFFER_SIZE));
     }
-    LOG_INFO("put_device_info finished %d",nvmlDevicesCount);
+    LOG_INFO("put_device_info finished %d", nvmlDevicesCount);
     return 0;
 }
 
-
-int load_env_from_file(char *filename) {
-    FILE *f=fopen(filename,"r");
-    if (f==NULL)
-        return 0;
+int load_env_from_file(char* filename) {
+    FILE* f = fopen(filename, "r");
+    if (f == NULL) return 0;
     char tmp[10000];
-    int cursor=0;
-    while (!feof(f)){
-        fgets(tmp,10000,f);
-        if (strstr(tmp,"=")==NULL)
-            break;
-        if (tmp[strlen(tmp)-1]=='\n')
-            tmp[strlen(tmp)-1]='\0';
-        for (cursor=0;cursor<strlen(tmp);cursor++){
-            if (tmp[cursor]=='=') {
-                tmp[cursor]='\0';
-                setenv(tmp,tmp+cursor+1,1);
-                LOG_INFO("SET %s to %s",tmp,tmp+cursor+1);
+    int cursor = 0;
+    while (!feof(f)) {
+        fgets(tmp, 10000, f);
+        if (strstr(tmp, "=") == NULL) break;
+        if (tmp[strlen(tmp) - 1] == '\n') tmp[strlen(tmp) - 1] = '\0';
+        for (cursor = 0; cursor < strlen(tmp); cursor++) {
+            if (tmp[cursor] == '=') {
+                tmp[cursor] = '\0';
+                setenv(tmp, tmp + cursor + 1, 1);
+                LOG_INFO("SET %s to %s", tmp, tmp + cursor + 1);
                 break;
             }
         }
@@ -192,7 +185,7 @@ void do_init_device_memory_limits(uint64_t* arr, int len) {
     }
 }
 
-void do_init_device_sm_limits(uint64_t *arr, int len) {
+void do_init_device_sm_limits(uint64_t* arr, int len) {
     size_t fallback_limit = get_limit_from_env(CUDA_DEVICE_SM_LIMIT);
     if (fallback_limit == 0) fallback_limit = 100;
     int i;
@@ -214,8 +207,8 @@ void do_init_device_sm_limits(uint64_t *arr, int len) {
 
 int active_oom_killer() {
     int i;
-    for (i=0;i<region_info.shared_region->proc_num;i++) {
-        kill(region_info.shared_region->procs[i].pid,9);
+    for (i = 0; i < region_info.shared_region->proc_num; i++) {
+        kill(region_info.shared_region->procs[i].pid, 9);
     }
     return 0;
 }
@@ -241,41 +234,36 @@ void pre_launch_kernel() {
 
     LOG_INFO("write last kernel time: %ld", now)
     // Lock-free update using atomic compare-exchange
-    uint64_t expected = atomic_load_explicit(&region_info.shared_region->last_kernel_time, memory_order_acquire);
+    uint64_t expected =
+        atomic_load_explicit(&region_info.shared_region->last_kernel_time, memory_order_acquire);
     while (expected < now) {
         if (atomic_compare_exchange_weak_explicit(&region_info.shared_region->last_kernel_time,
-                &expected, now, memory_order_release, memory_order_acquire)) {
+                                                  &expected, now, memory_order_release,
+                                                  memory_order_acquire)) {
             break;
         }
     }
 }
 
-int shrreg_major_version() {
-    return MAJOR_VERSION;
-}
-
-int shrreg_minor_version() {
-    return MINOR_VERSION;
-}
+int shrreg_major_version() { return MAJOR_VERSION; }
 
+int shrreg_minor_version() { return MINOR_VERSION; }
 
 // Lock-free memory monitor aggregation
 size_t get_gpu_memory_monitor(const int dev) {
     LOG_DEBUG("get_gpu_memory_monitor_lockfree dev=%d", dev);
     ensure_initialized();
-    int i=0;
-    size_t total=0;
+    int i = 0;
+    size_t total = 0;
 
     int proc_num = atomic_load_explicit(&region_info.shared_region->proc_num, memory_order_acquire);
-    for (i=0; i < proc_num; i++) {
+    for (i = 0; i < proc_num; i++) {
         uint64_t monitor = atomic_load_explicit(
-            &region_info.shared_region->procs[i].monitorused[dev],
-            memory_order_relaxed);
+            &region_info.shared_region->procs[i].monitorused[dev], memory_order_relaxed);
         uint64_t used_total = atomic_load_explicit(
-            &region_info.shared_region->procs[i].used[dev].total,
-            memory_order_relaxed);
+            &region_info.shared_region->procs[i].used[dev].total, memory_order_relaxed);
         LOG_DEBUG("dev=%d i=%lu,%lu\n", dev, monitor, used_total);
-        total+=monitor;
+        total += monitor;
     }
     return total;
 }
@@ -284,13 +272,13 @@ size_t get_gpu_memory_monitor(const int dev) {
 size_t get_gpu_memory_usage(const int dev) {
     LOG_INFO("get_gpu_memory_usage_lockfree dev=%d", dev);
     ensure_initialized();
-    int i=0;
-    size_t total=0;
+    int i = 0;
+    size_t total = 0;
 
     // Lock-free read with acquire semantics for proc_num
     int proc_num = atomic_load_explicit(&region_info.shared_region->proc_num, memory_order_acquire);
 
-    for (i=0; i < proc_num; i++) {
+    for (i = 0; i < proc_num; i++) {
         shrreg_proc_slot_t* slot = &region_info.shared_region->procs[i];
         uint64_t proc_usage;
         uint64_t seq1, seq2;
@@ -306,12 +294,12 @@ size_t get_gpu_memory_usage(const int dev) {
             while (seq1 & 1) {
                 // Exponential backoff to reduce contention
                 if (retry_count < 5) {
-                    // First 5 retries: just CPU pause (fast path)
-                    #if defined(__x86_64__) || defined(__i386__)
+// First 5 retries: just CPU pause (fast path)
+#if defined(__x86_64__) || defined(__i386__)
                     __asm__ __volatile__("pause" ::: "memory");
-                    #elif defined(__aarch64__)
+#elif defined(__aarch64__)
                     __asm__ __volatile__("yield" ::: "memory");
-                    #endif
+#endif
                 } else if (retry_count < 20) {
                     // Next 15 retries: 1μs delay
                     usleep(1);
@@ -323,7 +311,8 @@ size_t get_gpu_memory_usage(const int dev) {
                     usleep(100);
                     // Log if we're spinning for a very long time
                     if (retry_count % 100 == 0) {
-                        LOG_DEBUG("Seqlock spinning for slot %d, retry %d (writer active)", i, retry_count);
+                        LOG_DEBUG("Seqlock spinning for slot %d, retry %d (writer active)", i,
+                                  retry_count);
                     }
                 }
 
@@ -348,28 +337,30 @@ size_t get_gpu_memory_usage(const int dev) {
         int32_t hostpid = atomic_load_explicit(&slot->hostpid, memory_order_relaxed);
 
         LOG_INFO("dev=%d pid=%d host pid=%d i=%lu", dev, pid, hostpid, proc_usage);
-        total+=proc_usage;
+        total += proc_usage;
     }
 
-    total+=initial_offset;
+    total += initial_offset;
     return total;
 }
 
 // Lock-free memory monitor update
-int set_gpu_device_memory_monitor(int32_t pid,int dev,size_t monitor){
+int set_gpu_device_memory_monitor(int32_t pid, int dev, size_t monitor) {
     // LOG_WARN("set_gpu_device_memory_monitor_lockfree:%d %d %lu",pid,dev,monitor);
     int i;
     ensure_initialized();
 
     int proc_num = atomic_load_explicit(&region_info.shared_region->proc_num, memory_order_acquire);
-    for (i=0; i < proc_num; i++) {
-        int32_t hostpid = atomic_load_explicit(&region_info.shared_region->procs[i].hostpid, memory_order_acquire);
+    for (i = 0; i < proc_num; i++) {
+        int32_t hostpid = atomic_load_explicit(&region_info.shared_region->procs[i].hostpid,
+                                               memory_order_acquire);
         if (hostpid == pid) {
             uint64_t used_total = atomic_load_explicit(
-                &region_info.shared_region->procs[i].used[dev].total,
-                memory_order_relaxed);
-            LOG_INFO("set_gpu_device_memory_monitor_lockfree:%d %d %lu->%lu", pid, dev, used_total, monitor);
-            atomic_store_explicit(&region_info.shared_region->procs[i].monitorused[dev], monitor, memory_order_relaxed);
+                &region_info.shared_region->procs[i].used[dev].total, memory_order_relaxed);
+            LOG_INFO("set_gpu_device_memory_monitor_lockfree:%d %d %lu->%lu", pid, dev, used_total,
+                     monitor);
+            atomic_store_explicit(&region_info.shared_region->procs[i].monitorused[dev], monitor,
+                                  memory_order_relaxed);
             return 1;
         }
     }
@@ -383,15 +374,16 @@ int set_gpu_device_sm_utilization(int32_t pid, int dev, unsigned int smUtil) {
 
     int proc_num = atomic_load_explicit(&region_info.shared_region->proc_num, memory_order_acquire);
     for (i = 0; i < proc_num; i++) {
-        int32_t hostpid = atomic_load_explicit(&region_info.shared_region->procs[i].hostpid, memory_order_acquire);
+        int32_t hostpid = atomic_load_explicit(&region_info.shared_region->procs[i].hostpid,
+                                               memory_order_acquire);
         if (hostpid == pid) {
-            uint64_t old_util = atomic_load_explicit(
-                &region_info.shared_region->procs[i].device_util[dev].sm_util,
-                memory_order_relaxed);
-            LOG_INFO("set_gpu_device_sm_utilization_lockfree:%d %d %lu->%u", pid, dev, old_util, smUtil);
-            atomic_store_explicit(
-                &region_info.shared_region->procs[i].device_util[dev].sm_util, smUtil,
-                memory_order_relaxed);
+            uint64_t old_util =
+                atomic_load_explicit(&region_info.shared_region->procs[i].device_util[dev].sm_util,
+                                     memory_order_relaxed);
+            LOG_INFO("set_gpu_device_sm_utilization_lockfree:%d %d %lu->%u", pid, dev, old_util,
+                     smUtil);
+            atomic_store_explicit(&region_info.shared_region->procs[i].device_util[dev].sm_util,
+                                  smUtil, memory_order_relaxed);
             return 1;
         }
     }
@@ -399,18 +391,17 @@ int set_gpu_device_sm_utilization(int32_t pid, int dev, unsigned int smUtil) {
 }
 
 // Lock-free utilization initialization
-int init_gpu_device_utilization(){
-    int i,dev;
+int init_gpu_device_utilization() {
+    int i, dev;
     ensure_initialized();
 
     int proc_num = atomic_load_explicit(&region_info.shared_region->proc_num, memory_order_acquire);
-    for (i=0; i < proc_num; i++) {
-        for (dev=0;dev<CUDA_DEVICE_MAX_COUNT;dev++){
-            atomic_store_explicit(
-                &region_info.shared_region->procs[i].device_util[dev].sm_util,
-                0,
-                memory_order_relaxed);
-            atomic_store_explicit(&region_info.shared_region->procs[i].monitorused[dev], 0, memory_order_relaxed);
+    for (i = 0; i < proc_num; i++) {
+        for (dev = 0; dev < CUDA_DEVICE_MAX_COUNT; dev++) {
+            atomic_store_explicit(&region_info.shared_region->procs[i].device_util[dev].sm_util, 0,
+                                  memory_order_relaxed);
+            atomic_store_explicit(&region_info.shared_region->procs[i].monitorused[dev], 0,
+                                  memory_order_relaxed);
             break;
         }
     }
@@ -441,19 +432,18 @@ uint64_t nvml_get_device_memory_usage(const int dev) {
         int slot = 0;
         for (; slot < proc_num; slot++) {
             int32_t slot_pid = atomic_load_explicit(&region->procs[slot].pid, memory_order_relaxed);
-            if (infos[i].pid != slot_pid)
-                continue;
+            if (infos[i].pid != slot_pid) continue;
             usage += infos[i].usedGpuMemory;
         }
     }
-    LOG_DEBUG("Device %d current memory %lu / %lu", 
-            dev, usage, region->limit[dev]);
+    LOG_DEBUG("Device %d current memory %lu / %lu", dev, usage, region->limit[dev]);
     return usage;
 }
 
 // Lock-free memory add using atomics with seqlock for consistent reads
 int add_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type) {
-    LOG_INFO("add_gpu_device_memory_lockfree:%d %d->%d %lu", pid, cudadev, cuda_to_nvml_map(cudadev), usage);
+    LOG_INFO("add_gpu_device_memory_lockfree:%d %d->%d %lu", pid, cudadev,
+             cuda_to_nvml_map(cudadev), usage);
 
     int dev = cuda_to_nvml_map(cudadev);
     ensure_initialized();
@@ -469,10 +459,12 @@ int add_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type
         atomic_fetch_add_explicit(&slot->used[dev].total, usage, memory_order_release);
         switch (type) {
             case 0:
-                atomic_fetch_add_explicit(&slot->used[dev].context_size, usage, memory_order_release);
+                atomic_fetch_add_explicit(&slot->used[dev].context_size, usage,
+                                          memory_order_release);
                 break;
             case 1:
-                atomic_fetch_add_explicit(&slot->used[dev].module_size, usage, memory_order_release);
+                atomic_fetch_add_explicit(&slot->used[dev].module_size, usage,
+                                          memory_order_release);
                 break;
             case 2:
                 atomic_fetch_add_explicit(&slot->used[dev].data_size, usage, memory_order_release);
@@ -489,8 +481,9 @@ int add_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type
     // Slow path: find slot for other process (still lock-free)
     int proc_num = atomic_load_explicit(&region_info.shared_region->proc_num, memory_order_acquire);
     int i;
-    for (i=0; i < proc_num; i++) {
-        int32_t slot_pid = atomic_load_explicit(&region_info.shared_region->procs[i].pid, memory_order_acquire);
+    for (i = 0; i < proc_num; i++) {
+        int32_t slot_pid =
+            atomic_load_explicit(&region_info.shared_region->procs[i].pid, memory_order_acquire);
         if (slot_pid == pid) {
             shrreg_proc_slot_t* slot = &region_info.shared_region->procs[i];
 
@@ -501,13 +494,16 @@ int add_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type
             atomic_fetch_add_explicit(&slot->used[dev].total, usage, memory_order_release);
             switch (type) {
                 case 0:
-                    atomic_fetch_add_explicit(&slot->used[dev].context_size, usage, memory_order_release);
+                    atomic_fetch_add_explicit(&slot->used[dev].context_size, usage,
+                                              memory_order_release);
                     break;
                 case 1:
-                    atomic_fetch_add_explicit(&slot->used[dev].module_size, usage, memory_order_release);
+                    atomic_fetch_add_explicit(&slot->used[dev].module_size, usage,
+                                              memory_order_release);
                     break;
                 case 2:
-                    atomic_fetch_add_explicit(&slot->used[dev].data_size, usage, memory_order_release);
+                    atomic_fetch_add_explicit(&slot->used[dev].data_size, usage,
+                                              memory_order_release);
                     break;
             }
 
@@ -525,7 +521,8 @@ int add_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type
 
 // Lock-free memory remove using atomics with seqlock for consistent reads
 int rm_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type) {
-    LOG_INFO("rm_gpu_device_memory_lockfree:%d %d->%d %d:%lu", pid, cudadev, cuda_to_nvml_map(cudadev), type, usage);
+    LOG_INFO("rm_gpu_device_memory_lockfree:%d %d->%d %d:%lu", pid, cudadev,
+             cuda_to_nvml_map(cudadev), type, usage);
     int dev = cuda_to_nvml_map(cudadev);
     ensure_initialized();
 
@@ -540,10 +537,12 @@ int rm_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type)
         atomic_fetch_sub_explicit(&slot->used[dev].total, usage, memory_order_release);
         switch (type) {
             case 0:
-                atomic_fetch_sub_explicit(&slot->used[dev].context_size, usage, memory_order_release);
+                atomic_fetch_sub_explicit(&slot->used[dev].context_size, usage,
+                                          memory_order_release);
                 break;
             case 1:
-                atomic_fetch_sub_explicit(&slot->used[dev].module_size, usage, memory_order_release);
+                atomic_fetch_sub_explicit(&slot->used[dev].module_size, usage,
+                                          memory_order_release);
                 break;
             case 2:
                 atomic_fetch_sub_explicit(&slot->used[dev].data_size, usage, memory_order_release);
@@ -562,7 +561,8 @@ int rm_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type)
     int proc_num = atomic_load_explicit(&region_info.shared_region->proc_num, memory_order_acquire);
     int i;
     for (i = 0; i < proc_num; i++) {
-        int32_t slot_pid = atomic_load_explicit(&region_info.shared_region->procs[i].pid, memory_order_acquire);
+        int32_t slot_pid =
+            atomic_load_explicit(&region_info.shared_region->procs[i].pid, memory_order_acquire);
         if (slot_pid == pid) {
             shrreg_proc_slot_t* slot = &region_info.shared_region->procs[i];
 
@@ -573,13 +573,16 @@ int rm_gpu_device_memory_usage(int32_t pid, int cudadev, size_t usage, int type)
             atomic_fetch_sub_explicit(&slot->used[dev].total, usage, memory_order_release);
             switch (type) {
                 case 0:
-                    atomic_fetch_sub_explicit(&slot->used[dev].context_size, usage, memory_order_release);
+                    atomic_fetch_sub_explicit(&slot->used[dev].context_size, usage,
+                                              memory_order_release);
                     break;
                 case 1:
-                    atomic_fetch_sub_explicit(&slot->used[dev].module_size, usage, memory_order_release);
+                    atomic_fetch_sub_explicit(&slot->used[dev].module_size, usage,
+                                              memory_order_release);
                     break;
                 case 2:
-                    atomic_fetch_sub_explicit(&slot->used[dev].data_size, usage, memory_order_release);
+                    atomic_fetch_sub_explicit(&slot->used[dev].data_size, usage,
+                                              memory_order_release);
                     break;
             }
 
@@ -620,8 +623,10 @@ int fix_lock_shrreg() {
     if (current_owner != 0) {
         int flag = 0;
         if (current_owner == region_info.pid) {
-            LOG_INFO("Detect owner pid = self pid (%d), "
-                "indicates pid loopback or race condition", current_owner);
+            LOG_INFO(
+                "Detect owner pid = self pid (%d), "
+                "indicates pid loopback or race condition",
+                current_owner);
             flag = 1;
         } else {
             int proc_status = proc_alive(current_owner);
@@ -634,7 +639,7 @@ int fix_lock_shrreg() {
             LOG_INFO("Take upgraded lock (%d)", region_info.pid);
             region->owner_pid = region_info.pid;
             SEQ_POINT_MARK(SEQ_FIX_SHRREG_UPDATE_OWNER_OK);
-            res = 0;     
+            res = 0;
         }
     }
 
@@ -657,36 +662,50 @@ void exit_withlock(int exitcode) {
  * member individually using atomic loads and stores.
  */
 static inline void copy_proc_slot_atomic(shrreg_proc_slot_t* dst, shrreg_proc_slot_t* src) {
-    atomic_store_explicit(&dst->pid,
-        atomic_load_explicit(&src->pid, memory_order_relaxed), memory_order_relaxed);
-    atomic_store_explicit(&dst->hostpid,
-        atomic_load_explicit(&src->hostpid, memory_order_relaxed), memory_order_relaxed);
-    atomic_store_explicit(&dst->seqlock,
-        atomic_load_explicit(&src->seqlock, memory_order_relaxed), memory_order_relaxed);
-    atomic_store_explicit(&dst->status,
-        atomic_load_explicit(&src->status, memory_order_relaxed), memory_order_relaxed);
+    atomic_store_explicit(&dst->pid, atomic_load_explicit(&src->pid, memory_order_relaxed),
+                          memory_order_relaxed);
+    atomic_store_explicit(&dst->hostpid, atomic_load_explicit(&src->hostpid, memory_order_relaxed),
+                          memory_order_relaxed);
+    atomic_store_explicit(&dst->seqlock, atomic_load_explicit(&src->seqlock, memory_order_relaxed),
+                          memory_order_relaxed);
+    atomic_store_explicit(&dst->status, atomic_load_explicit(&src->status, memory_order_relaxed),
+                          memory_order_relaxed);
 
     for (int dev = 0; dev < CUDA_DEVICE_MAX_COUNT; dev++) {
         atomic_store_explicit(&dst->used[dev].total,
-            atomic_load_explicit(&src->used[dev].total, memory_order_relaxed), memory_order_relaxed);
-        atomic_store_explicit(&dst->used[dev].context_size,
-            atomic_load_explicit(&src->used[dev].context_size, memory_order_relaxed), memory_order_relaxed);
-        atomic_store_explicit(&dst->used[dev].module_size,
-            atomic_load_explicit(&src->used[dev].module_size, memory_order_relaxed), memory_order_relaxed);
+                              atomic_load_explicit(&src->used[dev].total, memory_order_relaxed),
+                              memory_order_relaxed);
+        atomic_store_explicit(
+            &dst->used[dev].context_size,
+            atomic_load_explicit(&src->used[dev].context_size, memory_order_relaxed),
+            memory_order_relaxed);
+        atomic_store_explicit(
+            &dst->used[dev].module_size,
+            atomic_load_explicit(&src->used[dev].module_size, memory_order_relaxed),
+            memory_order_relaxed);
         atomic_store_explicit(&dst->used[dev].data_size,
-            atomic_load_explicit(&src->used[dev].data_size, memory_order_relaxed), memory_order_relaxed);
+                              atomic_load_explicit(&src->used[dev].data_size, memory_order_relaxed),
+                              memory_order_relaxed);
         atomic_store_explicit(&dst->used[dev].offset,
-            atomic_load_explicit(&src->used[dev].offset, memory_order_relaxed), memory_order_relaxed);
+                              atomic_load_explicit(&src->used[dev].offset, memory_order_relaxed),
+                              memory_order_relaxed);
 
         atomic_store_explicit(&dst->monitorused[dev],
-            atomic_load_explicit(&src->monitorused[dev], memory_order_relaxed), memory_order_relaxed);
+                              atomic_load_explicit(&src->monitorused[dev], memory_order_relaxed),
+                              memory_order_relaxed);
 
-        atomic_store_explicit(&dst->device_util[dev].dec_util,
-            atomic_load_explicit(&src->device_util[dev].dec_util, memory_order_relaxed), memory_order_relaxed);
-        atomic_store_explicit(&dst->device_util[dev].enc_util,
-            atomic_load_explicit(&src->device_util[dev].enc_util, memory_order_relaxed), memory_order_relaxed);
-        atomic_store_explicit(&dst->device_util[dev].sm_util,
-            atomic_load_explicit(&src->device_util[dev].sm_util, memory_order_relaxed), memory_order_relaxed);
+        atomic_store_explicit(
+            &dst->device_util[dev].dec_util,
+            atomic_load_explicit(&src->device_util[dev].dec_util, memory_order_relaxed),
+            memory_order_relaxed);
+        atomic_store_explicit(
+            &dst->device_util[dev].enc_util,
+            atomic_load_explicit(&src->device_util[dev].enc_util, memory_order_relaxed),
+            memory_order_relaxed);
+        atomic_store_explicit(
+            &dst->device_util[dev].sm_util,
+            atomic_load_explicit(&src->device_util[dev].sm_util, memory_order_relaxed),
+            memory_order_relaxed);
     }
 }
 
@@ -739,7 +758,6 @@ void exit_handler() {
     LOG_MSG("Exit cleanup complete for PID %d", my_pid);
 }
 
-
 void lock_shrreg() {
     shared_region_t* region = region_info.shared_region;
     int trials = 0;
@@ -764,23 +782,25 @@ void lock_shrreg() {
             size_t current_owner = atomic_load_explicit(&region->owner_pid, memory_order_acquire);
 
             if (trials <= 3 || trials % 5 == 0) {  // Log first 3, then every 5th
-                LOG_WARN("Lock shrreg timeout (trial %d/%d), owner=%ld",
-                         trials, SEM_WAIT_RETRY_TIMES, current_owner);
+                LOG_WARN("Lock shrreg timeout (trial %d/%d), owner=%ld", trials,
+                         SEM_WAIT_RETRY_TIMES, current_owner);
             }
 
             // SIGKILL RECOVERY: Check if owner is dead (the ONLY case where exit cleanup fails)
             if (current_owner != 0) {
                 int owner_status = proc_alive((int32_t)current_owner);
                 if (owner_status == PROC_STATE_NONALIVE) {
-                    LOG_WARN("Owner %ld is dead (was SIGKILL'd), cleaning up stale lock", current_owner);
+                    LOG_WARN("Owner %ld is dead (was SIGKILL'd), cleaning up stale lock",
+                             current_owner);
                     // Use CAS so only one process does this
                     size_t expected = current_owner;
                     if (atomic_compare_exchange_strong_explicit(&region->owner_pid, &expected, 0,
-                                                               memory_order_release, memory_order_acquire)) {
+                                                                memory_order_release,
+                                                                memory_order_acquire)) {
                         LOG_WARN("Cleared dead owner_pid and posting semaphore");
                         sem_post(&region->sem);  // Unlock
-                        usleep(10000);  // 10ms for semaphore to propagate
-                        continue;  // Retry immediately
+                        usleep(10000);           // 10ms for semaphore to propagate
+                        continue;                // Retry immediately
                     }
                     // Another process is handling it, wait a bit
                     usleep(100000);  // 100ms
@@ -837,13 +857,14 @@ int lock_postinit() {
             return 1;  // Success
         } else if (errno == ETIMEDOUT) {
             trials++;
-            LOG_MSG("Waiting for postinit lock (trial %d/%d, waited %ds, PID %d)",
-                    trials, SEM_WAIT_RETRY_TIMES_POSTINIT, trials * SEM_WAIT_TIME_POSTINIT, getpid());
+            LOG_MSG("Waiting for postinit lock (trial %d/%d, waited %ds, PID %d)", trials,
+                    SEM_WAIT_RETRY_TIMES_POSTINIT, trials * SEM_WAIT_TIME_POSTINIT, getpid());
 
             // After many retries, give up
             if (trials > SEM_WAIT_RETRY_TIMES_POSTINIT) {
-                LOG_ERROR("Postinit lock timeout after %d seconds - another process may have crashed",
-                          SEM_WAIT_RETRY_TIMES_POSTINIT * SEM_WAIT_TIME_POSTINIT);
+                LOG_ERROR(
+                    "Postinit lock timeout after %d seconds - another process may have crashed",
+                    SEM_WAIT_RETRY_TIMES_POSTINIT * SEM_WAIT_TIME_POSTINIT);
                 LOG_ERROR("Skipping host PID detection for this process (will use container PID)");
                 return 0;  // Timeout - didn't acquire lock
             }
@@ -862,10 +883,9 @@ void unlock_postinit() {
     sem_post(&region->sem_postinit);
 }
 
-
 int clear_proc_slot_nolock(int do_clear) {
     int slot = 0;
-    int res=0;
+    int res = 0;
     int cleaned_pid_zero = 0;
     int cleaned_dead = 0;
     shared_region_t* region = region_info.shared_region;
@@ -876,27 +896,34 @@ int clear_proc_slot_nolock(int do_clear) {
         if (pid == 0) {
             LOG_DEBUG("Removing slot %d with PID=0 (marked dead by exit cleanup)", slot);
             cleaned_pid_zero++;
-            res=1;
+            res = 1;
             region->proc_num--;
             copy_proc_slot_atomic(&region->procs[slot], &region->procs[region->proc_num]);
-            if (region_info.my_slot != NULL && region_info.my_slot == &region->procs[region->proc_num]) {
+            if (region_info.my_slot != NULL &&
+                region_info.my_slot == &region->procs[region->proc_num]) {
                 region_info.my_slot = &region->procs[slot];
-                atomic_store_explicit(&region->procs[region->proc_num].seqlock, 0, memory_order_relaxed);
-                atomic_store_explicit(&region->procs[region->proc_num].pid, 0, memory_order_release);
-                atomic_store_explicit(&region->procs[region->proc_num].hostpid, 0, memory_order_relaxed);
-                atomic_store_explicit(&region->procs[region->proc_num].status, 0, memory_order_release);
+                atomic_store_explicit(&region->procs[region->proc_num].seqlock, 0,
+                                      memory_order_relaxed);
+                atomic_store_explicit(&region->procs[region->proc_num].pid, 0,
+                                      memory_order_release);
+                atomic_store_explicit(&region->procs[region->proc_num].hostpid, 0,
+                                      memory_order_relaxed);
+                atomic_store_explicit(&region->procs[region->proc_num].status, 0,
+                                      memory_order_release);
 
                 for (int dev = 0; dev < CUDA_DEVICE_MAX_COUNT; dev++) {
-                    atomic_store_explicit(&region->procs[region->proc_num].used[dev].total, 0, memory_order_relaxed);
-                    atomic_store_explicit(
-                        &region->procs[region->proc_num].used[dev].context_size, 0, memory_order_relaxed);
-                    atomic_store_explicit(
-                        &region->procs[region->proc_num].used[dev].module_size, 0, memory_order_relaxed);
-                    atomic_store_explicit(
-                        &region->procs[region->proc_num].used[dev].data_size, 0, memory_order_relaxed);
-                    atomic_store_explicit(
-                        &region->procs[region->proc_num].device_util[dev].sm_util, 0, memory_order_relaxed);
-                    atomic_store_explicit(&region->procs[region->proc_num].monitorused[dev], 0, memory_order_relaxed);
+                    atomic_store_explicit(&region->procs[region->proc_num].used[dev].total, 0,
+                                          memory_order_relaxed);
+                    atomic_store_explicit(&region->procs[region->proc_num].used[dev].context_size,
+                                          0, memory_order_relaxed);
+                    atomic_store_explicit(&region->procs[region->proc_num].used[dev].module_size, 0,
+                                          memory_order_relaxed);
+                    atomic_store_explicit(&region->procs[region->proc_num].used[dev].data_size, 0,
+                                          memory_order_relaxed);
+                    atomic_store_explicit(&region->procs[region->proc_num].device_util[dev].sm_util,
+                                          0, memory_order_relaxed);
+                    atomic_store_explicit(&region->procs[region->proc_num].monitorused[dev], 0,
+                                          memory_order_relaxed);
                 }
             }
             __sync_synchronize();
@@ -913,24 +940,31 @@ int clear_proc_slot_nolock(int do_clear) {
             res = 1;
             region->proc_num--;
             copy_proc_slot_atomic(&region->procs[slot], &region->procs[region->proc_num]);
-            if (region_info.my_slot != NULL && region_info.my_slot == &region->procs[region->proc_num]) {
+            if (region_info.my_slot != NULL &&
+                region_info.my_slot == &region->procs[region->proc_num]) {
                 region_info.my_slot = &region->procs[slot];
-                atomic_store_explicit(&region->procs[region->proc_num].seqlock, 0, memory_order_relaxed);
-                atomic_store_explicit(&region->procs[region->proc_num].pid, 0, memory_order_release);
-                atomic_store_explicit(&region->procs[region->proc_num].hostpid, 0, memory_order_relaxed);
-                atomic_store_explicit(&region->procs[region->proc_num].status, 0, memory_order_release);
+                atomic_store_explicit(&region->procs[region->proc_num].seqlock, 0,
+                                      memory_order_relaxed);
+                atomic_store_explicit(&region->procs[region->proc_num].pid, 0,
+                                      memory_order_release);
+                atomic_store_explicit(&region->procs[region->proc_num].hostpid, 0,
+                                      memory_order_relaxed);
+                atomic_store_explicit(&region->procs[region->proc_num].status, 0,
+                                      memory_order_release);
 
                 for (int dev = 0; dev < CUDA_DEVICE_MAX_COUNT; dev++) {
-                    atomic_store_explicit(&region->procs[region->proc_num].used[dev].total, 0, memory_order_relaxed);
-                    atomic_store_explicit(
-                        &region->procs[region->proc_num].used[dev].context_size, 0, memory_order_relaxed);
-                    atomic_store_explicit(
-                        &region->procs[region->proc_num].used[dev].module_size, 0, memory_order_relaxed);
-                    atomic_store_explicit(
-                        &region->procs[region->proc_num].used[dev].data_size, 0, memory_order_relaxed);
-                    atomic_store_explicit(
-                        &region->procs[region->proc_num].device_util[dev].sm_util, 0, memory_order_relaxed);
-                    atomic_store_explicit(&region->procs[region->proc_num].monitorused[dev], 0, memory_order_relaxed);
+                    atomic_store_explicit(&region->procs[region->proc_num].used[dev].total, 0,
+                                          memory_order_relaxed);
+                    atomic_store_explicit(&region->procs[region->proc_num].used[dev].context_size,
+                                          0, memory_order_relaxed);
+                    atomic_store_explicit(&region->procs[region->proc_num].used[dev].module_size, 0,
+                                          memory_order_relaxed);
+                    atomic_store_explicit(&region->procs[region->proc_num].used[dev].data_size, 0,
+                                          memory_order_relaxed);
+                    atomic_store_explicit(&region->procs[region->proc_num].device_util[dev].sm_util,
+                                          0, memory_order_relaxed);
+                    atomic_store_explicit(&region->procs[region->proc_num].monitorused[dev], 0,
+                                          memory_order_relaxed);
                 }
             }
             __sync_synchronize();
@@ -942,8 +976,8 @@ int clear_proc_slot_nolock(int do_clear) {
         slot++;
     }
     if (cleaned_pid_zero > 0 || cleaned_dead > 0) {
-        LOG_INFO("Cleaned %d PID=0 slots, %d dead proc slots (proc_num now %d)",
-                 cleaned_pid_zero, cleaned_dead, region->proc_num);
+        LOG_INFO("Cleaned %d PID=0 slots, %d dead proc slots (proc_num now %d)", cleaned_pid_zero,
+                 cleaned_dead, region->proc_num);
     }
     return res;
 }
@@ -957,25 +991,30 @@ void init_proc_slot_withlock() {
     if (proc_num >= SHARED_REGION_MAX_PROCESS_NUM) {
         exit_withlock(-1);
     }
-    signal(SIGUSR2,sig_swap_stub);
-    signal(SIGUSR1,sig_restore_stub);
+    signal(SIGUSR2, sig_swap_stub);
+    signal(SIGUSR1, sig_restore_stub);
 
-    // If, by any means a pid of itself is found in region->process, then it is probably caused by crashloop
-    // we need to reset it.
-    int i,found=0;
-    for (i=0; i < proc_num; i++) {
+    // If, by any means a pid of itself is found in region->process, then it is probably caused by
+    // crashloop we need to reset it.
+    int i, found = 0;
+    for (i = 0; i < proc_num; i++) {
         int32_t slot_pid = atomic_load_explicit(&region->procs[i].pid, memory_order_acquire);
         if (slot_pid == current_pid) {
-            atomic_store_explicit(&region->procs[i].seqlock, 0, memory_order_relaxed);  // Reset seqlock
+            atomic_store_explicit(&region->procs[i].seqlock, 0,
+                                  memory_order_relaxed);  // Reset seqlock
             atomic_store_explicit(&region->procs[i].status, 1, memory_order_release);
 
             // Zero out atomics
             for (int dev = 0; dev < CUDA_DEVICE_MAX_COUNT; dev++) {
                 atomic_store_explicit(&region->procs[i].used[dev].total, 0, memory_order_relaxed);
-                atomic_store_explicit(&region->procs[i].used[dev].context_size, 0, memory_order_relaxed);
-                atomic_store_explicit(&region->procs[i].used[dev].module_size, 0, memory_order_relaxed);
-                atomic_store_explicit(&region->procs[i].used[dev].data_size, 0, memory_order_relaxed);
-                atomic_store_explicit(&region->procs[i].device_util[dev].sm_util, 0, memory_order_relaxed);
+                atomic_store_explicit(&region->procs[i].used[dev].context_size, 0,
+                                      memory_order_relaxed);
+                atomic_store_explicit(&region->procs[i].used[dev].module_size, 0,
+                                      memory_order_relaxed);
+                atomic_store_explicit(&region->procs[i].used[dev].data_size, 0,
+                                      memory_order_relaxed);
+                atomic_store_explicit(&region->procs[i].device_util[dev].sm_util, 0,
+                                      memory_order_relaxed);
                 atomic_store_explicit(&region->procs[i].monitorused[dev], 0, memory_order_relaxed);
             }
 
@@ -987,18 +1026,25 @@ void init_proc_slot_withlock() {
 
     if (!found) {
         // Initialize new slot with atomics
-        atomic_store_explicit(&region->procs[proc_num].seqlock, 0, memory_order_relaxed);  // Start with even (no write)
+        atomic_store_explicit(&region->procs[proc_num].seqlock, 0,
+                              memory_order_relaxed);  // Start with even (no write)
         atomic_store_explicit(&region->procs[proc_num].pid, current_pid, memory_order_release);
         atomic_store_explicit(&region->procs[proc_num].hostpid, 0, memory_order_relaxed);
         atomic_store_explicit(&region->procs[proc_num].status, 1, memory_order_release);
 
         for (int dev = 0; dev < CUDA_DEVICE_MAX_COUNT; dev++) {
-            atomic_store_explicit(&region->procs[proc_num].used[dev].total, 0, memory_order_relaxed);
-            atomic_store_explicit(&region->procs[proc_num].used[dev].context_size, 0, memory_order_relaxed);
-            atomic_store_explicit(&region->procs[proc_num].used[dev].module_size, 0, memory_order_relaxed);
-            atomic_store_explicit(&region->procs[proc_num].used[dev].data_size, 0, memory_order_relaxed);
-            atomic_store_explicit(&region->procs[proc_num].device_util[dev].sm_util, 0, memory_order_relaxed);
-            atomic_store_explicit(&region->procs[proc_num].monitorused[dev], 0, memory_order_relaxed);
+            atomic_store_explicit(&region->procs[proc_num].used[dev].total, 0,
+                                  memory_order_relaxed);
+            atomic_store_explicit(&region->procs[proc_num].used[dev].context_size, 0,
+                                  memory_order_relaxed);
+            atomic_store_explicit(&region->procs[proc_num].used[dev].module_size, 0,
+                                  memory_order_relaxed);
+            atomic_store_explicit(&region->procs[proc_num].used[dev].data_size, 0,
+                                  memory_order_relaxed);
+            atomic_store_explicit(&region->procs[proc_num].device_util[dev].sm_util, 0,
+                                  memory_order_relaxed);
+            atomic_store_explicit(&region->procs[proc_num].monitorused[dev], 0,
+                                  memory_order_relaxed);
         }
 
         region_info.my_slot = &region->procs[proc_num];  // Cache our slot pointer
@@ -1011,47 +1057,43 @@ void init_proc_slot_withlock() {
 
 void print_all() {
     int i;
-    LOG_INFO("Total process: %d",region_info.shared_region->proc_num);
-    for (i=0;i<region_info.shared_region->proc_num;i++) {
-        for (int dev=0;dev<CUDA_DEVICE_MAX_COUNT;dev++){
+    LOG_INFO("Total process: %d", region_info.shared_region->proc_num);
+    for (i = 0; i < region_info.shared_region->proc_num; i++) {
+        for (int dev = 0; dev < CUDA_DEVICE_MAX_COUNT; dev++) {
             LOG_INFO("Process %d hostPid: %d, sm: %lu, memory: %lu, record: %lu",
-                region_info.shared_region->procs[i].pid,
-                region_info.shared_region->procs[i].hostpid, 
-                region_info.shared_region->procs[i].device_util[dev].sm_util, 
-                region_info.shared_region->procs[i].monitorused[dev], 
-                region_info.shared_region->procs[i].used[dev].total);
+                     region_info.shared_region->procs[i].pid,
+                     region_info.shared_region->procs[i].hostpid,
+                     region_info.shared_region->procs[i].device_util[dev].sm_util,
+                     region_info.shared_region->procs[i].monitorused[dev],
+                     region_info.shared_region->procs[i].used[dev].total);
         }
     }
 }
 
 void child_reinit_flag() {
-    LOG_DEBUG("Detect child pid: %d -> %d", region_info.pid, getpid());   
+    LOG_DEBUG("Detect child pid: %d -> %d", region_info.pid, getpid());
     region_info.init_status = PTHREAD_ONCE_INIT;
 }
 
 int set_active_oom_killer() {
-    char *oom_killer_env;
+    char* oom_killer_env;
     oom_killer_env = getenv("ACTIVE_OOM_KILLER");
-    if (oom_killer_env!=NULL){
-        if (strcmp(oom_killer_env,"false") == 0)
-            return 0;
-        if (strcmp(oom_killer_env,"true") == 0)
-            return 1;
-        if (strcmp(oom_killer_env,"0")==0)
-            return 0;
-        if (strcmp(oom_killer_env,"1")==0)
-            return 1;
+    if (oom_killer_env != NULL) {
+        if (strcmp(oom_killer_env, "false") == 0) return 0;
+        if (strcmp(oom_killer_env, "true") == 0) return 1;
+        if (strcmp(oom_killer_env, "0") == 0) return 0;
+        if (strcmp(oom_killer_env, "1") == 0) return 1;
     }
     return 1;
 }
 
 int set_env_utilization_switch() {
-    char *utilization_env;
+    char* utilization_env;
     utilization_env = getenv("GPU_CORE_UTILIZATION_POLICY");
-    if (utilization_env!=NULL){
-        if ((strcmp(utilization_env,"FORCE") ==0 ) || (strcmp(utilization_env,"force") ==0))
+    if (utilization_env != NULL) {
+        if ((strcmp(utilization_env, "FORCE") == 0) || (strcmp(utilization_env, "force") == 0))
             return 1;
-        if ((strcmp(utilization_env,"DISABLE") ==0 ) || (strcmp(utilization_env,"disable") ==0 ))
+        if ((strcmp(utilization_env, "DISABLE") == 0) || (strcmp(utilization_env, "disable") == 0))
             return 2;
     }
     return 0;
@@ -1082,7 +1124,7 @@ void try_create_shrreg() {
         shr_reg_file = MULTIPROCESS_SHARED_REGION_CACHE_DEFAULT;
     }
     // Initialize NVML BEFORE!! open it
-    //nvmlInit();
+    // nvmlInit();
 
     /* If you need sm modification, do it here */
     /* ... set_sm_scale */
@@ -1103,9 +1145,8 @@ void try_create_shrreg() {
     if (lseek(fd, 0, SEEK_SET) != 0) {
         LOG_ERROR("Fail to reseek shrreg %s: errno=%d", shr_reg_file, errno);
     }
-    region_info.shared_region = (shared_region_t*) mmap(
-        NULL, SHARED_REGION_SIZE_MAGIC, 
-        PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
+    region_info.shared_region = (shared_region_t*)mmap(NULL, SHARED_REGION_SIZE_MAGIC,
+                                                       PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
     shared_region_t* region = region_info.shared_region;
     if (region == NULL) {
         LOG_ERROR("Fail to map shrreg %s: errno=%d", shr_reg_file, errno);
@@ -1113,15 +1154,13 @@ void try_create_shrreg() {
     if (lockf(fd, F_LOCK, SHARED_REGION_SIZE_MAGIC) != 0) {
         LOG_ERROR("Fail to lock shrreg %s: errno=%d", shr_reg_file, errno);
     }
-    //put_device_info();
+    // put_device_info();
     int32_t init_flag = atomic_load_explicit(&region->initialized_flag, memory_order_acquire);
     if (init_flag != MULTIPROCESS_SHARED_REGION_MAGIC_FLAG) {
         region->major_version = MAJOR_VERSION;
         region->minor_version = MINOR_VERSION;
-        do_init_device_memory_limits(
-            region->limit, CUDA_DEVICE_MAX_COUNT);
-        do_init_device_sm_limits(
-            region->sm_limit,CUDA_DEVICE_MAX_COUNT);
+        do_init_device_memory_limits(region->limit, CUDA_DEVICE_MAX_COUNT);
+        do_init_device_sm_limits(region->sm_limit, CUDA_DEVICE_MAX_COUNT);
         if (sem_init(&region->sem, 1, 1) != 0) {
             LOG_ERROR("Fail to init sem %s: errno=%d", shr_reg_file, errno);
         }
@@ -1134,37 +1173,39 @@ void try_create_shrreg() {
         atomic_store_explicit(&region->recent_kernel, 2, memory_order_relaxed);
         atomic_store_explicit(&region->proc_num, 0, memory_order_relaxed);
         region->priority = 1;
-        if (getenv(CUDA_TASK_PRIORITY_ENV)!=NULL)
+        if (getenv(CUDA_TASK_PRIORITY_ENV) != NULL)
             region->priority = atoi(getenv(CUDA_TASK_PRIORITY_ENV));
 
         // Release barrier ensures all initialization is visible before flag is set
         atomic_thread_fence(memory_order_release);
-        atomic_store_explicit(&region->initialized_flag, MULTIPROCESS_SHARED_REGION_MAGIC_FLAG, memory_order_release);
+        atomic_store_explicit(&region->initialized_flag, MULTIPROCESS_SHARED_REGION_MAGIC_FLAG,
+                              memory_order_release);
     } else {
-        if (region->major_version != MAJOR_VERSION || 
-                region->minor_version != MINOR_VERSION) {
-            LOG_ERROR("The current version number %d.%d"
-                    " is different from the file's version number %d.%d",
-                    MAJOR_VERSION, MINOR_VERSION,
-                    region->major_version, region->minor_version);
+        if (region->major_version != MAJOR_VERSION || region->minor_version != MINOR_VERSION) {
+            LOG_ERROR(
+                "The current version number %d.%d"
+                " is different from the file's version number %d.%d",
+                MAJOR_VERSION, MINOR_VERSION, region->major_version, region->minor_version);
         }
         uint64_t local_limits[CUDA_DEVICE_MAX_COUNT];
         do_init_device_memory_limits(local_limits, CUDA_DEVICE_MAX_COUNT);
         int i;
         for (i = 0; i < CUDA_DEVICE_MAX_COUNT; ++i) {
             if (local_limits[i] != region->limit[i]) {
-                LOG_ERROR("Limit inconsistency detected for %dth device"
-                    ", %lu expected, get %lu", 
+                LOG_ERROR(
+                    "Limit inconsistency detected for %dth device"
+                    ", %lu expected, get %lu",
                     i, local_limits[i], region->limit[i]);
             }
         }
-        do_init_device_sm_limits(local_limits,CUDA_DEVICE_MAX_COUNT);
+        do_init_device_sm_limits(local_limits, CUDA_DEVICE_MAX_COUNT);
         for (i = 0; i < CUDA_DEVICE_MAX_COUNT; ++i) {
             if (local_limits[i] != region->sm_limit[i]) {
-                LOG_INFO("SM limit inconsistency detected for %dth device"
-                    ", %lu expected, get %lu", 
+                LOG_INFO(
+                    "SM limit inconsistency detected for %dth device"
+                    ", %lu expected, get %lu",
                     i, local_limits[i], region->sm_limit[i]);
-            //    exit(1); 
+                //    exit(1);
             }
         }
     }
@@ -1185,34 +1226,31 @@ void initialized() {
     init_proc_slot_withlock();
 }
 
-void ensure_initialized() {
-    (void) pthread_once(&region_info.init_status, initialized);
-}
+void ensure_initialized() { (void)pthread_once(&region_info.init_status, initialized); }
 
 int update_host_pid() {
     int i;
-    for (i=0;i<region_info.shared_region->proc_num;i++){
-        if (region_info.shared_region->procs[i].pid == getpid()){
-            if (region_info.shared_region->procs[i].hostpid!=0)
-                pidfound=1; 
+    for (i = 0; i < region_info.shared_region->proc_num; i++) {
+        if (region_info.shared_region->procs[i].pid == getpid()) {
+            if (region_info.shared_region->procs[i].hostpid != 0) pidfound = 1;
         }
     }
     return 0;
 }
 
 int set_host_pid(int hostpid) {
-    int i,j,found=0;
-    for (i=0;i<region_info.shared_region->proc_num;i++){
-        if (region_info.shared_region->procs[i].pid == getpid()){
-            LOG_INFO("SET PID= %d",hostpid);
-            found=1;
+    int i, j, found = 0;
+    for (i = 0; i < region_info.shared_region->proc_num; i++) {
+        if (region_info.shared_region->procs[i].pid == getpid()) {
+            LOG_INFO("SET PID= %d", hostpid);
+            found = 1;
             region_info.shared_region->procs[i].hostpid = hostpid;
-            for (j=0;j<CUDA_DEVICE_MAX_COUNT;j++)
-                region_info.shared_region->procs[i].monitorused[j]=0;
+            for (j = 0; j < CUDA_DEVICE_MAX_COUNT; j++)
+                region_info.shared_region->procs[i].monitorused[j] = 0;
         }
     }
     if (!found) {
-        LOG_ERROR("HOST PID NOT FOUND. %d",hostpid);
+        LOG_ERROR("HOST PID NOT FOUND. %d", hostpid);
         return -1;
     }
     setspec();
@@ -1221,12 +1259,12 @@ int set_host_pid(int hostpid) {
 
 int set_current_device_sm_limit_scale(int dev, int scale) {
     ensure_initialized();
-    if (region_info.shared_region->sm_init_flag==1) return 0;
+    if (region_info.shared_region->sm_init_flag == 1) return 0;
     if (dev < 0 || dev >= CUDA_DEVICE_MAX_COUNT) {
         LOG_ERROR("Illegal device id: %d", dev);
     }
-    LOG_INFO("dev %d new sm limit set mul by %d",dev,scale);
-    region_info.shared_region->sm_limit[dev]=region_info.shared_region->sm_limit[dev]*scale;
+    LOG_INFO("dev %d new sm limit set mul by %d", dev, scale);
+    region_info.shared_region->sm_limit[dev] = region_info.shared_region->sm_limit[dev] * scale;
     region_info.shared_region->sm_init_flag = 1;
     return 0;
 }
@@ -1239,14 +1277,14 @@ int get_current_device_sm_limit(int dev) {
     return region_info.shared_region->sm_limit[dev];
 }
 
-int set_current_device_memory_limit(const int dev,size_t newlimit) {
+int set_current_device_memory_limit(const int dev, size_t newlimit) {
     ensure_initialized();
     if (dev < 0 || dev >= CUDA_DEVICE_MAX_COUNT) {
         LOG_ERROR("Illegal device id: %d", dev);
     }
-    LOG_INFO("dev %d new limit set to %ld",dev,newlimit);
-    region_info.shared_region->limit[dev]=newlimit;
-    return 0; 
+    LOG_INFO("dev %d new limit set to %ld", dev, newlimit);
+    region_info.shared_region->limit[dev] = newlimit;
+    return 0;
 }
 
 uint64_t get_current_device_memory_limit(const int dev) {
@@ -1254,7 +1292,7 @@ uint64_t get_current_device_memory_limit(const int dev) {
     if (dev < 0 || dev >= CUDA_DEVICE_MAX_COUNT) {
         LOG_ERROR("Illegal device id: %d", dev);
     }
-    return region_info.shared_region->limit[dev];       
+    return region_info.shared_region->limit[dev];
 }
 
 uint64_t get_current_device_memory_monitor(const int dev) {
@@ -1263,12 +1301,12 @@ uint64_t get_current_device_memory_monitor(const int dev) {
         LOG_ERROR("Illegal device id: %d", dev);
     }
     uint64_t result = get_gpu_memory_monitor(dev);
-//    result= nvml_get_device_memory_usage(dev);
+    //    result= nvml_get_device_memory_usage(dev);
     return result;
 }
 
 uint64_t get_current_device_memory_usage(const int dev) {
-    clock_t start,finish;
+    clock_t start, finish;
     uint64_t result;
     start = clock();
     ensure_initialized();
@@ -1276,50 +1314,44 @@ uint64_t get_current_device_memory_usage(const int dev) {
         LOG_ERROR("Illegal device id: %d", dev);
     }
     result = get_gpu_memory_usage(dev);
-//    result= nvml_get_device_memory_usage(dev);
-    finish=clock();
-    LOG_DEBUG("get_current_device_memory_usage:tick=%lu result=%lu\n",finish-start,result);
+    //    result= nvml_get_device_memory_usage(dev);
+    finish = clock();
+    LOG_DEBUG("get_current_device_memory_usage:tick=%lu result=%lu\n", finish - start, result);
     return result;
 }
 
-int get_current_priority() {
-    return region_info.shared_region->priority;
-}
+int get_current_priority() { return region_info.shared_region->priority; }
 
-int get_recent_kernel(){
-    return region_info.shared_region->recent_kernel;
-}
+int get_recent_kernel() { return region_info.shared_region->recent_kernel; }
 
-int set_recent_kernel(int value){
-    region_info.shared_region->recent_kernel=value;
+int set_recent_kernel(int value) {
+    region_info.shared_region->recent_kernel = value;
     return 0;
 }
 
 int get_utilization_switch() {
-    if (env_utilization_switch==1)
-        return 1;
-    if (env_utilization_switch==2)
-        return 0;
-    return region_info.shared_region->utilization_switch; 
+    if (env_utilization_switch == 1) return 1;
+    if (env_utilization_switch == 2) return 0;
+    return region_info.shared_region->utilization_switch;
 }
 
-void suspend_all(){
+void suspend_all() {
     int i;
-    for (i=0;i<region_info.shared_region->proc_num;i++){
-        LOG_INFO("Sending USR2 to %d",region_info.shared_region->procs[i].pid);
-        kill(region_info.shared_region->procs[i].pid,SIGUSR2);
+    for (i = 0; i < region_info.shared_region->proc_num; i++) {
+        LOG_INFO("Sending USR2 to %d", region_info.shared_region->procs[i].pid);
+        kill(region_info.shared_region->procs[i].pid, SIGUSR2);
     }
 }
 
-void resume_all(){
+void resume_all() {
     int i;
-    for (i=0;i<region_info.shared_region->proc_num;i++){
-        LOG_INFO("Sending USR1 to %d",region_info.shared_region->procs[i].pid);
-        kill(region_info.shared_region->procs[i].pid,SIGUSR1);
+    for (i = 0; i < region_info.shared_region->proc_num; i++) {
+        LOG_INFO("Sending USR1 to %d", region_info.shared_region->procs[i].pid);
+        kill(region_info.shared_region->procs[i].pid, SIGUSR1);
     }
 }
 
-int wait_status_self(int status){
+int wait_status_self(int status) {
     // Fast path: use cached slot pointer (set during init_proc_slot_withlock)
     if (region_info.my_slot != NULL) {
         int32_t cur = atomic_load_explicit(&region_info.my_slot->status, memory_order_acquire);
@@ -1330,10 +1362,12 @@ int wait_status_self(int status){
     int i;
     int proc_num = atomic_load_explicit(&region_info.shared_region->proc_num, memory_order_acquire);
     int32_t my_pid = getpid();
-    for (i=0; i < proc_num; i++) {
-        int32_t slot_pid = atomic_load_explicit(&region_info.shared_region->procs[i].pid, memory_order_acquire);
+    for (i = 0; i < proc_num; i++) {
+        int32_t slot_pid =
+            atomic_load_explicit(&region_info.shared_region->procs[i].pid, memory_order_acquire);
         if (slot_pid == my_pid) {
-            if (atomic_load_explicit(&region_info.shared_region->procs[i].status, memory_order_acquire) == status)
+            if (atomic_load_explicit(&region_info.shared_region->procs[i].status,
+                                     memory_order_acquire) == status)
                 return 1;
             else
                 return 0;
@@ -1342,37 +1376,37 @@ int wait_status_self(int status){
     return -1;
 }
 
-int wait_status_all(int status){
+int wait_status_all(int status) {
     int i;
     int released = 1;
-    for (i=0;i<region_info.shared_region->proc_num;i++) {
-        LOG_INFO("i=%d pid=%d status=%d",i,region_info.shared_region->procs[i].pid,region_info.shared_region->procs[i].status);
-        if ((region_info.shared_region->procs[i].status!=status) && (region_info.shared_region->procs[i].pid!=getpid()))
-            released = 0; 
-    }
-    LOG_INFO("Return released=%d",released);
+    for (i = 0; i < region_info.shared_region->proc_num; i++) {
+        LOG_INFO("i=%d pid=%d status=%d", i, region_info.shared_region->procs[i].pid,
+                 region_info.shared_region->procs[i].status);
+        if ((region_info.shared_region->procs[i].status != status) &&
+            (region_info.shared_region->procs[i].pid != getpid()))
+            released = 0;
+    }
+    LOG_INFO("Return released=%d", released);
     return released;
 }
 
-shrreg_proc_slot_t *find_proc_by_hostpid(int hostpid) {
+shrreg_proc_slot_t* find_proc_by_hostpid(int hostpid) {
     int i;
-    for (i=0;i<region_info.shared_region->proc_num;i++) {
-        if (region_info.shared_region->procs[i].hostpid == hostpid) 
+    for (i = 0; i < region_info.shared_region->proc_num; i++) {
+        if (region_info.shared_region->procs[i].hostpid == hostpid)
             return &region_info.shared_region->procs[i];
     }
     return NULL;
 }
 
-
-int comparelwr(const char *s1,char *s2){
-    if ((s1==NULL) || (s2==NULL))
-        return 1;
-    if (strlen(s1)!=strlen(s2)) {
+int comparelwr(const char* s1, char* s2) {
+    if ((s1 == NULL) || (s2 == NULL)) return 1;
+    if (strlen(s1) != strlen(s2)) {
         return 1;
     }
     int i;
-    for (i=0;i<strlen(s1);i++)
-        if (tolower(s1[i])!=tolower(s2[i])){
+    for (i = 0; i < strlen(s1); i++)
+        if (tolower(s1[i]) != tolower(s2[i])) {
             return 1;
         }
     return 0;
diff --git a/src/multiprocess/multiprocess_memory_limit.h b/src/multiprocess/multiprocess_memory_limit.h
old mode 100755
new mode 100644
index 39bcbbf2..6a356518
--- a/src/multiprocess/multiprocess_memory_limit.h
+++ b/src/multiprocess/multiprocess_memory_limit.h
@@ -1,31 +1,30 @@
 #ifndef __MULTIPROCESS_MEMORY_LIMIT_H__
 #define __MULTIPROCESS_MEMORY_LIMIT_H__
 
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdlib.h>
+#include <ctype.h>
+#include <cuda.h>
 #include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <stdatomic.h>
 #include <stddef.h>
 #include <stdint.h>
-#include <semaphore.h>
-#include <unistd.h>
-#include <time.h>
-#include <ctype.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
-#include <cuda.h>
-#include <pthread.h>
-#include <stdatomic.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
 
-#include "static_config.h"
 #include "include/log_utils.h"
+#include "static_config.h"
 
-
-#define MULTIPROCESS_SHARED_REGION_MAGIC_FLAG  19920718
-#define MULTIPROCESS_SHARED_REGION_CACHE_ENV   "CUDA_DEVICE_MEMORY_SHARED_CACHE"
-#define MULTIPROCESS_SHARED_REGION_CACHE_DEFAULT  "/tmp/cudevshr.cache"
+#define MULTIPROCESS_SHARED_REGION_MAGIC_FLAG 19920718
+#define MULTIPROCESS_SHARED_REGION_CACHE_ENV "CUDA_DEVICE_MEMORY_SHARED_CACHE"
+#define MULTIPROCESS_SHARED_REGION_CACHE_DEFAULT "/tmp/cudevshr.cache"
 #define ENV_OVERRIDE_FILE "/overrideEnv"
 #define CUDA_TASK_PRIORITY_ENV "CUDA_TASK_PRIORITY"
 
@@ -34,7 +33,7 @@
 #define CUDA_DEVICE_MEMORY_UPDATE_FAILURE 1
 #define MEMORY_LIMIT_TOLERATION_RATE 1.1
 
-#define SHARED_REGION_SIZE_MAGIC  sizeof(shared_region_t)
+#define SHARED_REGION_SIZE_MAGIC sizeof(shared_region_t)
 #define SHARED_REGION_MAX_PROCESS_NUM 1024
 
 // macros for debugging
@@ -51,7 +50,7 @@
 #define SEQ_AFTER_DEC 9
 
 #ifndef SEQ_POINT_MARK
-    #define SEQ_POINT_MARK(s)
+#define SEQ_POINT_MARK(s)
 #endif
 
 #define FACTOR 32
@@ -76,13 +75,13 @@ typedef struct {
 } device_util_t;
 
 typedef struct {
-    _Atomic int32_t pid;           // Atomic to detect slot allocation
+    _Atomic int32_t pid;  // Atomic to detect slot allocation
     _Atomic int32_t hostpid;
     device_memory_t used[CUDA_DEVICE_MAX_COUNT];
     _Atomic uint64_t monitorused[CUDA_DEVICE_MAX_COUNT];
     device_util_t device_util[CUDA_DEVICE_MAX_COUNT];
     _Atomic int32_t status;
-    _Atomic uint64_t seqlock;      // Sequence lock for consistent snapshots
+    _Atomic uint64_t seqlock;  // Sequence lock for consistent snapshots
     uint64_t unused[2];
 } shrreg_proc_slot_t;
 
@@ -112,24 +111,23 @@ typedef struct {
     int32_t pid;
     int fd;
     pthread_once_t init_status;
-    shared_region_t* shared_region;
-    uint64_t last_kernel_time; // cache for current process
-    shrreg_proc_slot_t* my_slot;  // Cached pointer to this process's slot (lock-free access)
+    shared_region_t *shared_region;
+    uint64_t last_kernel_time;    // cache for current process
+    shrreg_proc_slot_t *my_slot;  // Cached pointer to this process's slot (lock-free access)
 } shared_region_info_t;
 
-
 typedef struct {
-  size_t tid;
-  CUcontext ctx;
+    size_t tid;
+    CUcontext ctx;
 } thread_context_map;
 
 void ensure_initialized();
 
 int get_current_device_sm_limit(int dev);
 uint64_t get_current_device_memory_limit(const int dev);
-int set_current_device_memory_limit(const int dev,size_t newlimit);
-int set_current_device_sm_limit(int dev,int scale);
-int set_current_device_sm_limit_scale(int dev,int scale);
+int set_current_device_memory_limit(const int dev, size_t newlimit);
+int set_current_device_sm_limit(int dev, int scale);
+int set_current_device_sm_limit_scale(int dev, int scale);
 int update_host_pid();
 int set_host_pid(int hostpid);
 
@@ -144,11 +142,11 @@ int get_recent_kernel();
 int get_utilization_switch();
 int set_env_utilization_switch();
 
-int set_gpu_device_memory_monitor(int32_t pid,int dev,size_t monitor);
-int set_gpu_device_sm_utilization(int32_t pid,int dev, unsigned int smUtil);
+int set_gpu_device_memory_monitor(int32_t pid, int dev, size_t monitor);
+int set_gpu_device_sm_utilization(int32_t pid, int dev, unsigned int smUtil);
 int init_gpu_device_utilization();
-int add_gpu_device_memory_usage(int32_t pid,int dev,size_t usage,int type);
-int rm_gpu_device_memory_usage(int32_t pid,int dev,size_t usage,int type);
+int add_gpu_device_memory_usage(int32_t pid, int dev, size_t usage, int type);
+int rm_gpu_device_memory_usage(int32_t pid, int dev, size_t usage, int type);
 
 shrreg_proc_slot_t *find_proc_by_hostpid(int hostpid);
 int active_oom_killer();
@@ -158,10 +156,10 @@ int shrreg_major_version();
 int shrreg_minor_version();
 int init_device_info();
 
-//void inc_current_device_memory_usage(const int dev, const uint64_t usage);
-//void decl_current_device_memory_usage(const int dev, const uint64_t usage);
+// void inc_current_device_memory_usage(const int dev, const uint64_t usage);
+// void decl_current_device_memory_usage(const int dev, const uint64_t usage);
 
-//int oom_check(const int dev,int addon);
+// int oom_check(const int dev,int addon);
 
 void lock_shrreg();
 void unlock_shrreg();
@@ -169,9 +167,9 @@ void unlock_shrreg();
 int lock_postinit();  // Returns 1 on success, 0 on timeout
 void unlock_postinit();
 
-//Setspec of the corresponding device
+// Setspec of the corresponding device
 int setspec();
-//Remove quit process
+// Remove quit process
 
 void suspend_all();
 void resume_all();
@@ -180,7 +178,7 @@ int wait_status_all(int status);
 void print_all();
 
 int load_env_from_file(char *filename);
-int comparelwr(const char *s1,char *s2);
+int comparelwr(const char *s1, char *s2);
 int put_device_info();
 unsigned int nvml_to_cuda_map(unsigned int nvmldev);
 unsigned int cuda_to_nvml_map(unsigned int cudadev);
diff --git a/src/multiprocess/multiprocess_utilization_watcher.c b/src/multiprocess/multiprocess_utilization_watcher.c
index b4620d30..c80095be 100644
--- a/src/multiprocess/multiprocess_utilization_watcher.c
+++ b/src/multiprocess/multiprocess_utilization_watcher.c
@@ -1,29 +1,27 @@
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <semaphore.h>
-#include <unistd.h>
-#include <time.h>
-#include <signal.h>
+#include "multiprocess/multiprocess_utilization_watcher.h"
 
 #include <cuda.h>
-#include "include/nvml_prefix.h"
+#include <errno.h>
+#include <fcntl.h>
 #include <nvml.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
 #include <sys/time.h>
+#include <sys/types.h>
 #include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
 
-#include "multiprocess/multiprocess_memory_limit.h"
-#include "multiprocess/multiprocess_utilization_watcher.h"
 #include "include/log_utils.h"
 #include "include/nvml_override.h"
-
+#include "include/nvml_prefix.h"
+#include "multiprocess/multiprocess_memory_limit.h"
 
 static int g_sm_num[CUDA_DEVICE_MAX_COUNT];
 static int g_max_thread_per_sm[CUDA_DEVICE_MAX_COUNT];
@@ -37,89 +35,83 @@ static int cached_sm_limit[CUDA_DEVICE_MAX_COUNT] = {0};
 static int cached_util_switch = 0;
 
 void rate_limiter(int grids, int blocks) {
-  CUdevice current_device;
-  CUresult res = cuCtxGetDevice(&current_device);
-  int device_id = (res == CUDA_SUCCESS) ? (int)current_device : 0;
-
-  int64_t before_cuda_cores = 0;
-  int64_t after_cuda_cores = 0;
-  int64_t kernel_size = grids;
-
-  /* Fast exit using cached values — no shared memory access needed */
-  if (cached_sm_limit[device_id] >= 100 || cached_sm_limit[device_id] == 0) {
-      return;
-  }
-  if (cached_util_switch == 0) {
-      return;
-  }
-
-  while (get_recent_kernel()<0) {
-    sleep(1);
-  }
-  set_recent_kernel(2);
-
-  do {
-CHECK:
-      before_cuda_cores = g_cur_cuda_cores[device_id];
-      if (before_cuda_cores < 0) {
-        nanosleep(&g_cycle, NULL);
-        goto CHECK;
-      }
-      after_cuda_cores = before_cuda_cores - kernel_size;
-  } while (!CAS(&g_cur_cuda_cores[device_id], before_cuda_cores, after_cuda_cores));
+    CUdevice current_device;
+    CUresult res = cuCtxGetDevice(&current_device);
+    int device_id = (res == CUDA_SUCCESS) ? (int)current_device : 0;
+
+    int64_t before_cuda_cores = 0;
+    int64_t after_cuda_cores = 0;
+    int64_t kernel_size = grids;
+
+    /* Fast exit using cached values — no shared memory access needed */
+    if (cached_sm_limit[device_id] >= 100 || cached_sm_limit[device_id] == 0) {
+        return;
+    }
+    if (cached_util_switch == 0) {
+        return;
+    }
+
+    while (get_recent_kernel() < 0) {
+        sleep(1);
+    }
+    set_recent_kernel(2);
+
+    do {
+    CHECK:
+        before_cuda_cores = g_cur_cuda_cores[device_id];
+        if (before_cuda_cores < 0) {
+            nanosleep(&g_cycle, NULL);
+            goto CHECK;
+        }
+        after_cuda_cores = before_cuda_cores - kernel_size;
+    } while (!CAS(&g_cur_cuda_cores[device_id], before_cuda_cores, after_cuda_cores));
 }
 
 static void change_token(int64_t delta, int device_id) {
-  int64_t cuda_cores_before = 0, cuda_cores_after = 0;
+    int64_t cuda_cores_before = 0, cuda_cores_after = 0;
 
-  LOG_DEBUG("device %d: delta: %ld, curr: %ld", device_id, delta, g_cur_cuda_cores[device_id]);
-  do {
-    cuda_cores_before = g_cur_cuda_cores[device_id];
-    cuda_cores_after = cuda_cores_before + delta;
+    LOG_DEBUG("device %d: delta: %ld, curr: %ld", device_id, delta, g_cur_cuda_cores[device_id]);
+    do {
+        cuda_cores_before = g_cur_cuda_cores[device_id];
+        cuda_cores_after = cuda_cores_before + delta;
 
-    if (cuda_cores_after > g_total_cuda_cores[device_id]) {
-      cuda_cores_after = g_total_cuda_cores[device_id];
-    }
-  } while (!CAS(&g_cur_cuda_cores[device_id], cuda_cores_before, cuda_cores_after));
+        if (cuda_cores_after > g_total_cuda_cores[device_id]) {
+            cuda_cores_after = g_total_cuda_cores[device_id];
+        }
+    } while (!CAS(&g_cur_cuda_cores[device_id], cuda_cores_before, cuda_cores_after));
 }
 
 static int64_t delta(int up_limit, int user_current, int64_t share, int device_id) {
-  int utilization_diff =
-      abs(up_limit - user_current) < 5 ? 5 : abs(up_limit - user_current);
-  int64_t increment =
-      (int64_t)g_sm_num[device_id] * (int64_t)g_sm_num[device_id] *
-      (int64_t)g_max_thread_per_sm[device_id] * (int64_t)utilization_diff / 2560;
-
-  /* Accelerate cuda cores allocation when utilization vary widely */
-  if (utilization_diff > up_limit / 2) {
-    increment = increment * utilization_diff * 2 / (up_limit + 1);
-  }
-
-  if (user_current <= up_limit) {
-    share = (share + increment) > g_total_cuda_cores[device_id]
-            ? g_total_cuda_cores[device_id]
-            : (share + increment);
-  } else {
-    share = (share - increment) < 0 ? 0 : (share - increment);
-  }
-
-  return share;
+    int utilization_diff = abs(up_limit - user_current) < 5 ? 5 : abs(up_limit - user_current);
+    int64_t increment = (int64_t)g_sm_num[device_id] * (int64_t)g_sm_num[device_id] *
+                        (int64_t)g_max_thread_per_sm[device_id] * (int64_t)utilization_diff / 2560;
+
+    /* Accelerate cuda cores allocation when utilization vary widely */
+    if (utilization_diff > up_limit / 2) {
+        increment = increment * utilization_diff * 2 / (up_limit + 1);
+    }
+
+    if (user_current <= up_limit) {
+        share = (share + increment) > g_total_cuda_cores[device_id] ? g_total_cuda_cores[device_id]
+                                                                    : (share + increment);
+    } else {
+        share = (share - increment) < 0 ? 0 : (share - increment);
+    }
+
+    return share;
 }
 
-unsigned int nvml_to_cuda_map(unsigned int nvmldev){
+unsigned int nvml_to_cuda_map(unsigned int nvmldev) {
     unsigned int devcount;
     CHECK_NVML_API(nvmlDeviceGetCount_v2(&devcount));
-    int i=0;
-    for (i=0;i<devcount;i++){
-        if (cuda_to_nvml_map(i)==nvmldev)
-          return i;
+    int i = 0;
+    for (i = 0; i < devcount; i++) {
+        if (cuda_to_nvml_map(i) == nvmldev) return i;
     }
     return -1;
 }
 
-unsigned int cuda_to_nvml_map(unsigned int cudadev){
-    return cuda_to_nvml_map_array[cudadev];
-}
+unsigned int cuda_to_nvml_map(unsigned int cudadev) { return cuda_to_nvml_map_array[cudadev]; }
 
 int setspec() {
     unsigned int device_count;
@@ -130,10 +122,10 @@ int setspec() {
     for (unsigned int dev = 0; dev < device_count && dev < CUDA_DEVICE_MAX_COUNT; dev++) {
         CUdevice cu_dev;
         CHECK_CU_RESULT(cuDeviceGet(&cu_dev, dev));
-        CHECK_CU_RESULT(cuDeviceGetAttribute(&g_sm_num[dev],
-            CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, cu_dev));
-        CHECK_CU_RESULT(cuDeviceGetAttribute(&g_max_thread_per_sm[dev],
-            CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, cu_dev));
+        CHECK_CU_RESULT(
+            cuDeviceGetAttribute(&g_sm_num[dev], CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, cu_dev));
+        CHECK_CU_RESULT(cuDeviceGetAttribute(
+            &g_max_thread_per_sm[dev], CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, cu_dev));
         g_total_cuda_cores[dev] = g_max_thread_per_sm[dev] * g_sm_num[dev] * FACTOR;
         LOG_INFO("setspec: device %d sm_num=%d max_threads_per_sm=%d total_cores=%ld FACTOR=%d",
                  dev, g_sm_num[dev], g_max_thread_per_sm[dev], g_total_cuda_cores[dev], FACTOR);
@@ -141,7 +133,7 @@ int setspec() {
     return 0;
 }
 
-int get_used_gpu_utilization(int *userutil,int *sysprocnum) {
+int get_used_gpu_utilization(int *userutil, int *sysprocnum) {
     struct timeval cur;
     size_t microsec;
 
@@ -152,63 +144,62 @@ int get_used_gpu_utilization(int *userutil,int *sysprocnum) {
     unsigned int nvmlCounts;
     CHECK_NVML_API(nvmlDeviceGetCount(&nvmlCounts));
 
-    int devi,cudadev;
-    for (devi=0;devi<nvmlCounts;devi++){
-      uint64_t sum=0;
-      infcount = SHARED_REGION_MAX_PROCESS_NUM;
-      shrreg_proc_slot_t *proc;
-      cudadev = nvml_to_cuda_map((unsigned int)(devi));
-      if (cudadev<0)
-        continue;
-      userutil[cudadev] = 0;
-      nvmlDevice_t device;
-      CHECK_NVML_API(nvmlDeviceGetHandleByIndex(cudadev, &device));
-
-      // OPTIMIZATION: Do slow NVML queries WITHOUT holding lock
-      // This prevents blocking memory allocation operations
-
-      //Get Memory for container
-      nvmlReturn_t res = nvmlDeviceGetComputeRunningProcesses(device,&infcount,infos);
-
-      // Get SM util for container
-      gettimeofday(&cur, NULL);
-      microsec = (cur.tv_sec - 1) * 1000UL * 1000UL + cur.tv_usec;
-      nvmlProcessUtilizationSample_t processes_sample[SHARED_REGION_MAX_PROCESS_NUM];
-      unsigned int processes_num = SHARED_REGION_MAX_PROCESS_NUM;
-      nvmlReturn_t res2 = nvmlDeviceGetProcessUtilization(device, processes_sample, &processes_num, microsec);
-
-      // Now acquire lock only for the brief period needed to update shared memory
-      lock_shrreg();
-
-      if (res == NVML_SUCCESS) {
-        for (i=0; i<infcount; i++){
-          proc = find_proc_by_hostpid(infos[i].pid);
-          if (proc != NULL){
-              proc->monitorused[cudadev] = infos[i].usedGpuMemory;
-          }
+    int devi, cudadev;
+    for (devi = 0; devi < nvmlCounts; devi++) {
+        uint64_t sum = 0;
+        infcount = SHARED_REGION_MAX_PROCESS_NUM;
+        shrreg_proc_slot_t *proc;
+        cudadev = nvml_to_cuda_map((unsigned int)(devi));
+        if (cudadev < 0) continue;
+        userutil[cudadev] = 0;
+        nvmlDevice_t device;
+        CHECK_NVML_API(nvmlDeviceGetHandleByIndex(cudadev, &device));
+
+        // OPTIMIZATION: Do slow NVML queries WITHOUT holding lock
+        // This prevents blocking memory allocation operations
+
+        // Get Memory for container
+        nvmlReturn_t res = nvmlDeviceGetComputeRunningProcesses(device, &infcount, infos);
+
+        // Get SM util for container
+        gettimeofday(&cur, NULL);
+        microsec = (cur.tv_sec - 1) * 1000UL * 1000UL + cur.tv_usec;
+        nvmlProcessUtilizationSample_t processes_sample[SHARED_REGION_MAX_PROCESS_NUM];
+        unsigned int processes_num = SHARED_REGION_MAX_PROCESS_NUM;
+        nvmlReturn_t res2 =
+            nvmlDeviceGetProcessUtilization(device, processes_sample, &processes_num, microsec);
+
+        // Now acquire lock only for the brief period needed to update shared memory
+        lock_shrreg();
+
+        if (res == NVML_SUCCESS) {
+            for (i = 0; i < infcount; i++) {
+                proc = find_proc_by_hostpid(infos[i].pid);
+                if (proc != NULL) {
+                    proc->monitorused[cudadev] = infos[i].usedGpuMemory;
+                }
+            }
         }
-      }
-
-      if (res2 == NVML_SUCCESS) {
-        for (i=0; i<processes_num; i++){
-          proc = find_proc_by_hostpid(processes_sample[i].pid);
-          if (proc != NULL){
-              sum += processes_sample[i].smUtil;
-              proc->device_util[cudadev].sm_util = processes_sample[i].smUtil;
-          }
+
+        if (res2 == NVML_SUCCESS) {
+            for (i = 0; i < processes_num; i++) {
+                proc = find_proc_by_hostpid(processes_sample[i].pid);
+                if (proc != NULL) {
+                    sum += processes_sample[i].smUtil;
+                    proc->device_util[cudadev].sm_util = processes_sample[i].smUtil;
+                }
+            }
         }
-      }
 
-      unlock_shrreg();
+        unlock_shrreg();
 
-      if (sum < 0)
-        sum = 0;
-      userutil[cudadev] = sum;
+        if (sum < 0) sum = 0;
+        userutil[cudadev] = sum;
     }
     return 0;
 }
 
-void* utilization_watcher() {
+void *utilization_watcher() {
     nvmlInit();
     int userutil[CUDA_DEVICE_MAX_COUNT];
     int sysprocnum;
@@ -222,17 +213,16 @@ void* utilization_watcher() {
 
     ensure_initialized();
 
-    while (1){
+    while (1) {
         nanosleep(&g_wait, NULL);
-        if (pidfound==0) {
-          update_host_pid();
-          if (pidfound==0)
-            continue;
+        if (pidfound == 0) {
+            update_host_pid();
+            if (pidfound == 0) continue;
         }
         cached_util_switch = get_utilization_switch();
         LOG_INFO("init_utilization_watcher: util_switch=%d", cached_util_switch);
         init_gpu_device_utilization();
-        get_used_gpu_utilization(userutil,&sysprocnum);
+        get_used_gpu_utilization(userutil, &sysprocnum);
 
         // Calculate independently for each device
         for (unsigned int dev = 0; dev < device_count && dev < CUDA_DEVICE_MAX_COUNT; dev++) {
@@ -241,17 +231,17 @@ void* utilization_watcher() {
             }
 
             if ((share[dev] == g_total_cuda_cores[dev]) && (g_cur_cuda_cores[dev] < 0)) {
-              g_total_cuda_cores[dev] *= 2;
-              share[dev] = g_total_cuda_cores[dev];
+                g_total_cuda_cores[dev] *= 2;
+                share[dev] = g_total_cuda_cores[dev];
             }
 
             if ((userutil[dev] <= 100) && (userutil[dev] >= 0)) {
-              share[dev] = delta(cached_sm_limit[dev], userutil[dev], share[dev], dev);
-              change_token(share[dev], dev);
+                share[dev] = delta(cached_sm_limit[dev], userutil[dev], share[dev], dev);
+                change_token(share[dev], dev);
             }
 
-            LOG_INFO("device %d: userutil=%d currentcores=%ld total=%ld limit=%d share=%ld\n",
-                     dev, userutil[dev], g_cur_cuda_cores[dev], g_total_cuda_cores[dev],
+            LOG_INFO("device %d: userutil=%d currentcores=%ld total=%ld limit=%d share=%ld\n", dev,
+                     userutil[dev], g_cur_cuda_cores[dev], g_total_cuda_cores[dev],
                      cached_sm_limit[dev], share[dev]);
         }
     }
@@ -282,4 +272,3 @@ void init_utilization_watcher() {
     }
     return;
 }
-
diff --git a/src/multiprocess/multiprocess_utilization_watcher.h b/src/multiprocess/multiprocess_utilization_watcher.h
index c7411f3b..791a907d 100644
--- a/src/multiprocess/multiprocess_utilization_watcher.h
+++ b/src/multiprocess/multiprocess_utilization_watcher.h
@@ -16,7 +16,6 @@ static const struct timespec g_wait = {
     .tv_nsec = 120 * MILLISEC,
 };
 
-
 void rate_limiter(int grids, int blocks);
 void init_utilization_watcher();
 void* utilization_watcher();
diff --git a/src/multiprocess/shrreg_tool.c b/src/multiprocess/shrreg_tool.c
old mode 100755
new mode 100644
index 78c14016..14f6caca
--- a/src/multiprocess/shrreg_tool.c
+++ b/src/multiprocess/shrreg_tool.c
@@ -1,62 +1,55 @@
+#include <assert.h>
 #include <stdint.h>
-#include <string.h>
-#include <unistd.h>
 #include <stdio.h>
-#include <assert.h>
+#include <string.h>
 #include <sys/stat.h>
+#include <unistd.h>
 
 #include "include/memory_limit.h"
 
-
 void create_new() {
     load_env_from_file(ENV_OVERRIDE_FILE);
     umask(000);
-	char* shrreg_file = getenv(MULTIPROCESS_SHARED_REGION_CACHE_ENV);
+    char* shrreg_file = getenv(MULTIPROCESS_SHARED_REGION_CACHE_ENV);
     if (shrreg_file == NULL) {
         shrreg_file = MULTIPROCESS_SHARED_REGION_CACHE_DEFAULT;
     }
     int fd = open(shrreg_file, O_RDWR | O_CREAT | O_TRUNC, 0666);
     if (fd < 0) {
-    	LOG_ERROR("Fail to create new shrreg file\n");
+        LOG_ERROR("Fail to create new shrreg file\n");
         assert(0);
     }
     close(fd);
     ensure_initialized();
 }
 
-
-void print_shared_region(){
+void print_shared_region() {
     ensure_initialized();
     print_all();
 }
 
-
-void send_stop_signal(){
+void send_stop_signal() {
     ensure_initialized();
     suspend_all();
-    while (1){
-        if (wait_status_all(2))
-            break;
+    while (1) {
+        if (wait_status_all(2)) break;
         sleep(1);
     }
 }
 
-void send_resume_signal(){
+void send_resume_signal() {
     ensure_initialized();
     resume_all();
-    while (1){
-        if (wait_status_all(1))
-            break;
+    while (1) {
+        if (wait_status_all(1)) break;
         sleep(1);
     }
 }
 
 int main(int argc, char* argv[]) {
-	int k;
+    int k;
     if (argc > 1 && strcmp(argv[1], "--help") == 0) {
-        printf(
-            "--create_new    Create new shared region file\n"
-        );
+        printf("--create_new    Create new shared region file\n");
         return 0;
     }
 
@@ -65,25 +58,22 @@ int main(int argc, char* argv[]) {
         if (strcmp(arg, "--create_new") == 0) {
             create_new();
         }
-        if (strcmp(arg, "--suspend") == 0){
+        if (strcmp(arg, "--suspend") == 0) {
             send_stop_signal();
         }
-        if (strcmp(arg, "--print") == 0){
+        if (strcmp(arg, "--print") == 0) {
             print_shared_region();
         }
-        if (strcmp(arg, "--resume") == 0){
+        if (strcmp(arg, "--resume") == 0) {
             send_resume_signal();
         }
-        if (strcmp(arg, "--print") == 0){
+        if (strcmp(arg, "--print") == 0) {
             print_shared_region();
         }
-        if (strcmp(arg, "--version") == 0){
-            printf("shrreg size: %ld, version %d.%d\n", 
-                    sizeof(shared_region_t),
-                    shrreg_major_version(),
-                    shrreg_minor_version());
+        if (strcmp(arg, "--version") == 0) {
+            printf("shrreg size: %ld, version %d.%d\n", sizeof(shared_region_t),
+                   shrreg_major_version(), shrreg_minor_version());
         }
     }
     return 0;
 }
-
diff --git a/src/nvml/hook.c b/src/nvml/hook.c
index 0bc16d31..67ab6046 100644
--- a/src/nvml/hook.c
+++ b/src/nvml/hook.c
@@ -1,6 +1,7 @@
-#include <string.h>
 #include <ctype.h>
 #include <dlfcn.h>
+#include <string.h>
+
 #include "include/libnvml_hook.h"
 #include "include/nvml-subset.h"
 #include "include/utils.h"
@@ -262,24 +263,22 @@ extern fp_dlsym real_dlsym;
 extern int virtual_nvml_devices;
 extern int cuda_to_nvml_map_array[CUDA_DEVICE_MAX_COUNT];
 
-nvmlReturn_t nvmlDeviceGetIndex(nvmlDevice_t device, unsigned int *index) {
+nvmlReturn_t nvmlDeviceGetIndex(nvmlDevice_t device, unsigned int* index) {
     return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetIndex, device, index);
 }
 
-
 void load_nvml_libraries() {
-    void *table = NULL;
+    void* table = NULL;
     char driver_filename[FILENAME_MAX];
 
     if (real_dlsym == NULL) {
-        real_dlsym = dlvsym(RTLD_NEXT,"dlsym","GLIBC_2.2.5");
+        real_dlsym = dlvsym(RTLD_NEXT, "dlsym", "GLIBC_2.2.5");
         if (real_dlsym == NULL) {
-            void *libc_handle = dlopen("libc.so.6", RTLD_LAZY);
+            void* libc_handle = dlopen("libc.so.6", RTLD_LAZY);
             if (libc_handle) {
                 real_dlsym = dlsym(libc_handle, "dlsym");
             }
-            if (real_dlsym == NULL)
-                LOG_ERROR("real dlsym not found");
+            if (real_dlsym == NULL) LOG_ERROR("real dlsym not found");
         }
     }
     snprintf(driver_filename, FILENAME_MAX - 1, "%s", "libnvidia-ml.so.1");
@@ -287,15 +286,14 @@ void load_nvml_libraries() {
 
     table = dlopen(driver_filename, RTLD_NOW | RTLD_NODELETE);
     if (!table) {
-        LOG_WARN("can't find library %s", driver_filename);  
+        LOG_WARN("can't find library %s", driver_filename);
     }
     int i;
     for (i = 0; i < NVML_ENTRY_END; i++) {
-        LOG_DEBUG("loading %s:%d",nvml_library_entry[i].name,i);
+        LOG_DEBUG("loading %s:%d", nvml_library_entry[i].name, i);
         nvml_library_entry[i].fn_ptr = real_dlsym(table, nvml_library_entry[i].name);
         if (!nvml_library_entry[i].fn_ptr) {
-            LOG_INFO("can't find function %s in %s", nvml_library_entry[i].name,
-                driver_filename);
+            LOG_INFO("can't find function %s in %s", nvml_library_entry[i].name, driver_filename);
         }
     }
     LOG_INFO("loaded nvml libraries");
@@ -308,14 +306,12 @@ void nvml_preInit() {
     load_nvml_libraries();
     for (int i = 0; i < CUDA_DEVICE_MAX_COUNT; i++) {
         cuda_to_nvml_map_array[i] = i;
-    }   
+    }
 }
 
-void nvml_postInit() {
-    init_device_info();
-}
+void nvml_postInit() { init_device_info(); }
 
-nvmlReturn_t _nvmlDeviceGetMemoryInfo(nvmlDevice_t device,void* memory,int version) {
+nvmlReturn_t _nvmlDeviceGetMemoryInfo(nvmlDevice_t device, void* memory, int version) {
     LOG_DEBUG("into nvmlDeviceGetMemoryInfo");
     if (memory == NULL) {
         return NVML_SUCCESS;
@@ -324,12 +320,16 @@ nvmlReturn_t _nvmlDeviceGetMemoryInfo(nvmlDevice_t device,void* memory,int versi
 
     switch (version) {
         case 1:
-            CHECK_NVML_API(NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetMemoryInfo, device, memory));
-            LOG_DEBUG("origin_free=%lld total=%lld\n", ((nvmlMemory_t*)memory)->free, ((nvmlMemory_t*)memory)->total);
+            CHECK_NVML_API(
+                NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMemoryInfo, device, memory));
+            LOG_DEBUG("origin_free=%lld total=%lld\n", ((nvmlMemory_t*)memory)->free,
+                      ((nvmlMemory_t*)memory)->total);
             break;
         case 2:
-            CHECK_NVML_API(NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetMemoryInfo_v2, device, (nvmlMemory_v2_t *)memory));
-            LOG_DEBUG("origin_free=%lld total=%lld\n", ((nvmlMemory_v2_t*)memory)->free, ((nvmlMemory_v2_t*)memory)->total);
+            CHECK_NVML_API(NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMemoryInfo_v2,
+                                              device, (nvmlMemory_v2_t*)memory));
+            LOG_DEBUG("origin_free=%lld total=%lld\n", ((nvmlMemory_v2_t*)memory)->free,
+                      ((nvmlMemory_v2_t*)memory)->total);
             break;
         default:
             return NVML_ERROR_INVALID_ARGUMENT;
@@ -345,138 +345,138 @@ nvmlReturn_t _nvmlDeviceGetMemoryInfo(nvmlDevice_t device,void* memory,int versi
     LOG_DEBUG("usage=%ld limit=%ld monitor=%ld", usage, limit, monitor);
     if (limit == 0) {
         switch (version) {
-        case 1:
-             ((nvmlMemory_t*)memory)->used = usage;
-            return NVML_SUCCESS;
-        case 2:
-            ((nvmlMemory_v2_t *)memory)->used = usage;
-            return NVML_SUCCESS;
+            case 1:
+                ((nvmlMemory_t*)memory)->used = usage;
+                return NVML_SUCCESS;
+            case 2:
+                ((nvmlMemory_v2_t*)memory)->used = usage;
+                return NVML_SUCCESS;
         }
     } else {
         switch (version) {
-        case 1:
-             ((nvmlMemory_t*)memory)->free = (limit-usage);
-             ((nvmlMemory_t*)memory)->total = limit;
-             ((nvmlMemory_t*)memory)->used = usage;
-            return NVML_SUCCESS;
-        case 2:
-            ((nvmlMemory_v2_t *)memory)->free = (limit-usage);
-            ((nvmlMemory_v2_t *)memory)->total = limit;
-            ((nvmlMemory_v2_t *)memory)->used = usage;
-            return NVML_SUCCESS;
-        } 
+            case 1:
+                ((nvmlMemory_t*)memory)->free = (limit - usage);
+                ((nvmlMemory_t*)memory)->total = limit;
+                ((nvmlMemory_t*)memory)->used = usage;
+                return NVML_SUCCESS;
+            case 2:
+                ((nvmlMemory_v2_t*)memory)->free = (limit - usage);
+                ((nvmlMemory_v2_t*)memory)->total = limit;
+                ((nvmlMemory_v2_t*)memory)->used = usage;
+                return NVML_SUCCESS;
+        }
     }
     return NVML_SUCCESS;
 }
 
 nvmlReturn_t nvmlDeviceGetMemoryInfo(nvmlDevice_t device, nvmlMemory_t* memory) {
-    return _nvmlDeviceGetMemoryInfo(device,memory,1); 
+    return _nvmlDeviceGetMemoryInfo(device, memory, 1);
 }
 
 nvmlReturn_t nvmlDeviceGetMemoryInfo_v2(nvmlDevice_t device, nvmlMemory_v2_t* memory) {
-    return _nvmlDeviceGetMemoryInfo(device,memory,2);
+    return _nvmlDeviceGetMemoryInfo(device, memory, 2);
 }
 
-
-nvmlReturn_t nvmlDeviceGetNvLinkRemotePciInfo_v2 ( nvmlDevice_t device, unsigned int  link, nvmlPciInfo_t* pci ) {
-    nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetNvLinkRemotePciInfo_v2,device,link,pci);
+nvmlReturn_t nvmlDeviceGetNvLinkRemotePciInfo_v2(nvmlDevice_t device, unsigned int link,
+                                                 nvmlPciInfo_t* pci) {
+    nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkRemotePciInfo_v2,
+                                          device, link, pci);
     return res;
 }
 
-nvmlReturn_t nvmlDeviceGetNvLinkRemotePciInfo ( nvmlDevice_t device, unsigned int  link, nvmlPciInfo_t* pci ) {
-    nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetNvLinkRemotePciInfo,device,link,pci);
+nvmlReturn_t nvmlDeviceGetNvLinkRemotePciInfo(nvmlDevice_t device, unsigned int link,
+                                              nvmlPciInfo_t* pci) {
+    nvmlReturn_t res =
+        NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkRemotePciInfo, device, link, pci);
     return res;
 }
 
-nvmlReturn_t nvmlDeviceGetHandleByIndex ( unsigned int  index, nvmlDevice_t* device ){
+nvmlReturn_t nvmlDeviceGetHandleByIndex(unsigned int index, nvmlDevice_t* device) {
     nvmlReturn_t res;
-    LOG_DEBUG("nvmlDeviceGetHandleByIndex index=%u",index); 
-    res = NVML_OVERRIDE_CALL_NO_LOG(nvml_library_entry,nvmlDeviceGetHandleByIndex,index,device);
+    LOG_DEBUG("nvmlDeviceGetHandleByIndex index=%u", index);
+    res = NVML_OVERRIDE_CALL_NO_LOG(nvml_library_entry, nvmlDeviceGetHandleByIndex, index, device);
     return res;
 }
 
-nvmlReturn_t nvmlDeviceGetHandleByIndex_v2 ( unsigned int  index, nvmlDevice_t* device ){
+nvmlReturn_t nvmlDeviceGetHandleByIndex_v2(unsigned int index, nvmlDevice_t* device) {
     nvmlReturn_t res;
-    LOG_DEBUG("nvmlDeviceGetHandleByIndex_v2 index=%u",index); 
-    res = NVML_OVERRIDE_CALL_NO_LOG(nvml_library_entry,nvmlDeviceGetHandleByIndex_v2,index,device);
+    LOG_DEBUG("nvmlDeviceGetHandleByIndex_v2 index=%u", index);
+    res =
+        NVML_OVERRIDE_CALL_NO_LOG(nvml_library_entry, nvmlDeviceGetHandleByIndex_v2, index, device);
     return res;
 }
 
-nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2 ( const char* pciBusId, nvmlDevice_t* device ) {
-    LOG_INFO("NVML DeviceGetHandleByPciBusID_v2 %s",pciBusId);
-    return NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetHandleByPciBusId_v2,pciBusId,device);
+nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2(const char* pciBusId, nvmlDevice_t* device) {
+    LOG_INFO("NVML DeviceGetHandleByPciBusID_v2 %s", pciBusId);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetHandleByPciBusId_v2, pciBusId,
+                              device);
 }
 
-
-nvmlReturn_t nvmlDeviceGetHandleByPciBusId(const char *pciBusId,
-                                           nvmlDevice_t *device) {
-    LOG_DEBUG("NVML DeviceGetHandleByPciBusId %s",pciBusId);
-    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetHandleByPciBusId,
-                         pciBusId, device);
+nvmlReturn_t nvmlDeviceGetHandleByPciBusId(const char* pciBusId, nvmlDevice_t* device) {
+    LOG_DEBUG("NVML DeviceGetHandleByPciBusId %s", pciBusId);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetHandleByPciBusId, pciBusId, device);
 }
 
-nvmlReturn_t nvmlDeviceGetHandleBySerial ( const char* serial, nvmlDevice_t* device ) {
-    LOG_INFO("NVML DeviceGetHandleBySerial Not supported %s",serial);
-    return NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetHandleBySerial,serial,device);
+nvmlReturn_t nvmlDeviceGetHandleBySerial(const char* serial, nvmlDevice_t* device) {
+    LOG_INFO("NVML DeviceGetHandleBySerial Not supported %s", serial);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetHandleBySerial, serial, device);
 }
 
-nvmlReturn_t nvmlDeviceGetHandleByUUID ( const char* uuid, nvmlDevice_t* device ) {
-    nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetHandleByUUID,uuid,device);
+nvmlReturn_t nvmlDeviceGetHandleByUUID(const char* uuid, nvmlDevice_t* device) {
+    nvmlReturn_t res =
+        NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetHandleByUUID, uuid, device);
     return res;
 }
 
-nvmlReturn_t nvmlDeviceGetCount ( unsigned int* deviceCount ) {
-    return NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetCount_v2,deviceCount);
+nvmlReturn_t nvmlDeviceGetCount(unsigned int* deviceCount) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCount_v2, deviceCount);
 }
 
-nvmlReturn_t nvmlDeviceGetCount_v2 ( unsigned int* deviceCount ) {
-    return NVML_OVERRIDE_CALL(nvml_library_entry,nvmlDeviceGetCount_v2,deviceCount);
+nvmlReturn_t nvmlDeviceGetCount_v2(unsigned int* deviceCount) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCount_v2, deviceCount);
 }
 
-nvmlReturn_t nvmlInitWithFlags( unsigned int  flags ) {
+nvmlReturn_t nvmlInitWithFlags(unsigned int flags) {
     LOG_DEBUG("nvmlInitWithFlags")
-    pthread_once(&init_virtual_map_pre_flag, (void(*) (void))nvml_preInit);
-    nvmlReturn_t res =  NVML_OVERRIDE_CALL(nvml_library_entry, nvmlInitWithFlags,flags);
-    pthread_once(&init_virtual_map_post_flag,(void (*)(void))nvml_postInit);
+    pthread_once(&init_virtual_map_pre_flag, (void (*)(void))nvml_preInit);
+    nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlInitWithFlags, flags);
+    pthread_once(&init_virtual_map_post_flag, (void (*)(void))nvml_postInit);
     return res;
 }
 
 nvmlReturn_t nvmlInit(void) {
     LOG_DEBUG("nvmlInit")
-    pthread_once(&init_virtual_map_pre_flag,(void (*)(void))nvml_preInit);
+    pthread_once(&init_virtual_map_pre_flag, (void (*)(void))nvml_preInit);
     nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlInit_v2);
-    pthread_once(&init_virtual_map_post_flag,(void (*)(void))nvml_postInit);
+    pthread_once(&init_virtual_map_post_flag, (void (*)(void))nvml_postInit);
     return res;
 }
 
 nvmlReturn_t nvmlInit_v2(void) {
     LOG_DEBUG("nvmlInit_v2");
-    pthread_once(&init_virtual_map_pre_flag,(void (*)(void))nvml_preInit);
+    pthread_once(&init_virtual_map_pre_flag, (void (*)(void))nvml_preInit);
     nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlInit_v2);
-    pthread_once(&init_virtual_map_post_flag,(void (*)(void))nvml_postInit);
+    pthread_once(&init_virtual_map_post_flag, (void (*)(void))nvml_postInit);
     return res;
 }
 
-nvmlReturn_t nvmlDeviceGetPciInfo_v3(nvmlDevice_t device, nvmlPciInfo_t *pci) {
-  nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPciInfo_v3, device,
-                         pci);
-  return res;
+nvmlReturn_t nvmlDeviceGetPciInfo_v3(nvmlDevice_t device, nvmlPciInfo_t* pci) {
+    nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPciInfo_v3, device, pci);
+    return res;
 }
 
-nvmlReturn_t nvmlDeviceGetPciInfo_v2(nvmlDevice_t device, nvmlPciInfo_t *pci) {
-  nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPciInfo_v2, device,
-                         pci);
-  return res;
+nvmlReturn_t nvmlDeviceGetPciInfo_v2(nvmlDevice_t device, nvmlPciInfo_t* pci) {
+    nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPciInfo_v2, device, pci);
+    return res;
 }
 
-nvmlReturn_t nvmlDeviceGetPciInfo(nvmlDevice_t device, nvmlPciInfo_t *pci) {
-  nvmlReturn_t res =  NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPciInfo, device, pci);
-  return res;
+nvmlReturn_t nvmlDeviceGetPciInfo(nvmlDevice_t device, nvmlPciInfo_t* pci) {
+    nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPciInfo, device, pci);
+    return res;
 }
 
-nvmlReturn_t nvmlDeviceGetUUID(nvmlDevice_t device, char *uuid,
-                               unsigned int length) {
-    nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetUUID, device, uuid,
-                         length);
+nvmlReturn_t nvmlDeviceGetUUID(nvmlDevice_t device, char* uuid, unsigned int length) {
+    nvmlReturn_t res =
+        NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetUUID, device, uuid, length);
     return res;
 }
diff --git a/src/nvml/nvml_entry.c b/src/nvml/nvml_entry.c
index 2019869e..e5a5ca62 100644
--- a/src/nvml/nvml_entry.c
+++ b/src/nvml/nvml_entry.c
@@ -1,21 +1,19 @@
 #include <pthread.h>
-#include "include/nvml_prefix.h"
+
 #include "include/libnvml_hook.h"
+#include "include/nvml_prefix.h"
 #include "include/utils.h"
 
 extern entry_t cuda_library_entry[];
 extern entry_t nvml_library_entry[];
-//extern resource_data_t g_vcuda_config;
+// extern resource_data_t g_vcuda_config;
 
-nvmlReturn_t nvmlShutdown(void) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlShutdown);
-}
+nvmlReturn_t nvmlShutdown(void) { return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlShutdown); }
 
 const char *nvmlErrorString(nvmlReturn_t result) {
-  const char *(*_entry)(nvmlReturn_t) =
-      NVML_FIND_ENTRY(nvml_library_entry, nvmlErrorString);
+    const char *(*_entry)(nvmlReturn_t) = NVML_FIND_ENTRY(nvml_library_entry, nvmlErrorString);
 
-  return _entry(result);
+    return _entry(result);
 }
 
 /*
@@ -31,12 +29,10 @@ nvmlReturn_t nvmlDeviceGetHandleByIndex(unsigned int index,
 }
 */
 
-nvmlReturn_t nvmlDeviceGetComputeRunningProcesses(nvmlDevice_t device,
-                                                  unsigned int *infoCount,
+nvmlReturn_t nvmlDeviceGetComputeRunningProcesses(nvmlDevice_t device, unsigned int *infoCount,
                                                   nvmlProcessInfo_t *infos) {
-  return NVML_OVERRIDE_CALL_NO_LOG(nvml_library_entry,
-                         nvmlDeviceGetComputeRunningProcesses, device,
-                         infoCount, infos);
+    return NVML_OVERRIDE_CALL_NO_LOG(nvml_library_entry, nvmlDeviceGetComputeRunningProcesses,
+                                     device, infoCount, infos);
 }
 /*
 nvmlReturn_t nvmlDeviceGetPciInfo_v3(nvmlDevice_t device, nvmlPciInfo_t *pci) {
@@ -53,12 +49,12 @@ nvmlReturn_t nvmlDeviceGetPciInfo(nvmlDevice_t device, nvmlPciInfo_t *pci) {
   return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPciInfo, device, pci);
 }*/
 
-nvmlReturn_t nvmlDeviceGetProcessUtilization(
-    nvmlDevice_t device, nvmlProcessUtilizationSample_t *utilization,
-    unsigned int *processSamplesCount, unsigned long long lastSeenTimeStamp) {
-  return NVML_OVERRIDE_CALL_NO_LOG(nvml_library_entry, nvmlDeviceGetProcessUtilization,
-                         device, utilization, processSamplesCount,
-                         lastSeenTimeStamp);
+nvmlReturn_t nvmlDeviceGetProcessUtilization(nvmlDevice_t device,
+                                             nvmlProcessUtilizationSample_t *utilization,
+                                             unsigned int *processSamplesCount,
+                                             unsigned long long lastSeenTimeStamp) {
+    return NVML_OVERRIDE_CALL_NO_LOG(nvml_library_entry, nvmlDeviceGetProcessUtilization, device,
+                                     utilization, processSamplesCount, lastSeenTimeStamp);
 }
 /*
 nvmlReturn_t nvmlDeviceGetCount_v2(unsigned int *deviceCount) {
@@ -71,301 +67,245 @@ nvmlReturn_t nvmlDeviceGetCount(unsigned int *deviceCount) {
 }*/
 
 nvmlReturn_t nvmlDeviceClearAccountingPids(nvmlDevice_t device) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceClearAccountingPids,
-                         device);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceClearAccountingPids, device);
 }
 
 nvmlReturn_t nvmlDeviceClearCpuAffinity(nvmlDevice_t device) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceClearCpuAffinity,
-                         device);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceClearCpuAffinity, device);
 }
 
-nvmlReturn_t nvmlDeviceClearEccErrorCounts(nvmlDevice_t device,
-                                           nvmlEccCounterType_t counterType) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceClearEccErrorCounts,
-                         device, counterType);
+nvmlReturn_t nvmlDeviceClearEccErrorCounts(nvmlDevice_t device, nvmlEccCounterType_t counterType) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceClearEccErrorCounts, device,
+                              counterType);
 }
 
 nvmlReturn_t nvmlDeviceDiscoverGpus(nvmlPciInfo_t *pciInfo) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceDiscoverGpus, pciInfo);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceDiscoverGpus, pciInfo);
 }
 
-nvmlReturn_t
-nvmlDeviceFreezeNvLinkUtilizationCounter(nvmlDevice_t device, unsigned int link,
-                                         unsigned int counter,
-                                         nvmlEnableState_t freeze) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceFreezeNvLinkUtilizationCounter, device, link,
-                         counter, freeze);
+nvmlReturn_t nvmlDeviceFreezeNvLinkUtilizationCounter(nvmlDevice_t device, unsigned int link,
+                                                      unsigned int counter,
+                                                      nvmlEnableState_t freeze) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceFreezeNvLinkUtilizationCounter, device,
+                              link, counter, freeze);
 }
 
-nvmlReturn_t nvmlDeviceGetAccountingBufferSize(nvmlDevice_t device,
-                                               unsigned int *bufferSize) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAccountingBufferSize,
-                         device, bufferSize);
+nvmlReturn_t nvmlDeviceGetAccountingBufferSize(nvmlDevice_t device, unsigned int *bufferSize) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAccountingBufferSize, device,
+                              bufferSize);
 }
 
-nvmlReturn_t nvmlDeviceGetAccountingMode(nvmlDevice_t device,
-                                         nvmlEnableState_t *mode) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAccountingMode,
-                         device, mode);
+nvmlReturn_t nvmlDeviceGetAccountingMode(nvmlDevice_t device, nvmlEnableState_t *mode) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAccountingMode, device, mode);
 }
 
-nvmlReturn_t nvmlDeviceGetAccountingPids(nvmlDevice_t device,
-                                         unsigned int *count,
+nvmlReturn_t nvmlDeviceGetAccountingPids(nvmlDevice_t device, unsigned int *count,
                                          unsigned int *pids) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAccountingPids,
-                         device, count, pids);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAccountingPids, device, count, pids);
 }
 
 nvmlReturn_t nvmlDeviceGetAccountingStats(nvmlDevice_t device, unsigned int pid,
                                           nvmlAccountingStats_t *stats) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAccountingStats,
-                         device, pid, stats);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAccountingStats, device, pid, stats);
 }
 
-nvmlReturn_t nvmlDeviceGetActiveVgpus(nvmlDevice_t device,
-                                      unsigned int *vgpuCount,
+nvmlReturn_t nvmlDeviceGetActiveVgpus(nvmlDevice_t device, unsigned int *vgpuCount,
                                       nvmlVgpuInstance_t *vgpuInstances) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetActiveVgpus, device,
-                         vgpuCount, vgpuInstances);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetActiveVgpus, device, vgpuCount,
+                              vgpuInstances);
 }
 
-nvmlReturn_t nvmlDeviceGetAPIRestriction(nvmlDevice_t device,
-                                         nvmlRestrictedAPI_t apiType,
+nvmlReturn_t nvmlDeviceGetAPIRestriction(nvmlDevice_t device, nvmlRestrictedAPI_t apiType,
                                          nvmlEnableState_t *isRestricted) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAPIRestriction,
-                         device, apiType, isRestricted);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAPIRestriction, device, apiType,
+                              isRestricted);
 }
 
-nvmlReturn_t nvmlDeviceGetApplicationsClock(nvmlDevice_t device,
-                                            nvmlClockType_t clockType,
+nvmlReturn_t nvmlDeviceGetApplicationsClock(nvmlDevice_t device, nvmlClockType_t clockType,
                                             unsigned int *clockMHz) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetApplicationsClock,
-                      		device, clockType, clockMHz);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetApplicationsClock, device, clockType,
+                              clockMHz);
 }
 
-nvmlReturn_t
-nvmlDeviceGetAutoBoostedClocksEnabled(nvmlDevice_t device,
-                                      nvmlEnableState_t *isEnabled,
-                                      nvmlEnableState_t *defaultIsEnabled) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetAutoBoostedClocksEnabled, device,
-                         isEnabled, defaultIsEnabled);
+nvmlReturn_t nvmlDeviceGetAutoBoostedClocksEnabled(nvmlDevice_t device,
+                                                   nvmlEnableState_t *isEnabled,
+                                                   nvmlEnableState_t *defaultIsEnabled) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAutoBoostedClocksEnabled, device,
+                              isEnabled, defaultIsEnabled);
 }
 
-nvmlReturn_t nvmlDeviceGetBAR1MemoryInfo(nvmlDevice_t device,
-                                         nvmlBAR1Memory_t *bar1Memory) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBAR1MemoryInfo,
-                         device, bar1Memory);
+nvmlReturn_t nvmlDeviceGetBAR1MemoryInfo(nvmlDevice_t device, nvmlBAR1Memory_t *bar1Memory) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBAR1MemoryInfo, device, bar1Memory);
 }
 
 nvmlReturn_t nvmlDeviceGetBoardId(nvmlDevice_t device, unsigned int *boardId) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBoardId, device,
-                         boardId);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBoardId, device, boardId);
 }
 
 nvmlReturn_t nvmlDeviceGetBoardPartNumber(nvmlDevice_t device, char *partNumber,
                                           unsigned int length) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBoardPartNumber,
-                         device, partNumber, length);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBoardPartNumber, device, partNumber,
+                              length);
 }
 
 nvmlReturn_t nvmlDeviceGetBrand(nvmlDevice_t device, nvmlBrandType_t *type) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBrand, device, type);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBrand, device, type);
 }
 
-nvmlReturn_t
-nvmlDeviceGetBridgeChipInfo(nvmlDevice_t device,
-                            nvmlBridgeChipHierarchy_t *bridgeHierarchy) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBridgeChipInfo,
-                         device, bridgeHierarchy);
+nvmlReturn_t nvmlDeviceGetBridgeChipInfo(nvmlDevice_t device,
+                                         nvmlBridgeChipHierarchy_t *bridgeHierarchy) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetBridgeChipInfo, device,
+                              bridgeHierarchy);
 }
 
 nvmlReturn_t nvmlDeviceGetClock(nvmlDevice_t device, nvmlClockType_t clockType,
                                 nvmlClockId_t clockId, unsigned int *clockMHz) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetClock, device,
-                         clockType, clockId, clockMHz);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetClock, device, clockType, clockId,
+                              clockMHz);
 }
 
 nvmlReturn_t nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t type,
                                     unsigned int *clock) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetClockInfo, device,
-                         type, clock);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetClockInfo, device, type, clock);
 }
 
-nvmlReturn_t nvmlDeviceGetComputeMode(nvmlDevice_t device,
-                                      nvmlComputeMode_t *mode) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetComputeMode, device,
-                         mode);
+nvmlReturn_t nvmlDeviceGetComputeMode(nvmlDevice_t device, nvmlComputeMode_t *mode) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetComputeMode, device, mode);
 }
 
-nvmlReturn_t nvmlDeviceGetCpuAffinity(nvmlDevice_t device,
-                                      unsigned int cpuSetSize,
+nvmlReturn_t nvmlDeviceGetCpuAffinity(nvmlDevice_t device, unsigned int cpuSetSize,
                                       unsigned long *cpuSet) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCpuAffinity, device,
-                         cpuSetSize, cpuSet);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCpuAffinity, device, cpuSetSize,
+                              cpuSet);
 }
 
-nvmlReturn_t nvmlDeviceGetCreatableVgpus(nvmlDevice_t device,
-                                         unsigned int *vgpuCount,
+nvmlReturn_t nvmlDeviceGetCreatableVgpus(nvmlDevice_t device, unsigned int *vgpuCount,
                                          nvmlVgpuTypeId_t *vgpuTypeIds) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCreatableVgpus,
-                         device, vgpuCount, vgpuTypeIds);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCreatableVgpus, device, vgpuCount,
+                              vgpuTypeIds);
 }
 
-nvmlReturn_t nvmlDeviceGetCudaComputeCapability(nvmlDevice_t device, int *major,
-                                                int *minor) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCudaComputeCapability,
-                         device, major, minor);
+nvmlReturn_t nvmlDeviceGetCudaComputeCapability(nvmlDevice_t device, int *major, int *minor) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCudaComputeCapability, device, major,
+                              minor);
 }
 
-nvmlReturn_t nvmlDeviceGetCurrentClocksThrottleReasons(
-    nvmlDevice_t device, unsigned long long *clocksThrottleReasons) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetCurrentClocksThrottleReasons, device,
-                         clocksThrottleReasons);
+nvmlReturn_t nvmlDeviceGetCurrentClocksThrottleReasons(nvmlDevice_t device,
+                                                       unsigned long long *clocksThrottleReasons) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCurrentClocksThrottleReasons, device,
+                              clocksThrottleReasons);
 }
 
-nvmlReturn_t nvmlDeviceGetCurrPcieLinkGeneration(nvmlDevice_t device,
-                                                 unsigned int *currLinkGen) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetCurrPcieLinkGeneration, device,
-                         currLinkGen);
+nvmlReturn_t nvmlDeviceGetCurrPcieLinkGeneration(nvmlDevice_t device, unsigned int *currLinkGen) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCurrPcieLinkGeneration, device,
+                              currLinkGen);
 }
 
-nvmlReturn_t nvmlDeviceGetCurrPcieLinkWidth(nvmlDevice_t device,
-                                            unsigned int *currLinkWidth) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCurrPcieLinkWidth,
-                         device, currLinkWidth);
+nvmlReturn_t nvmlDeviceGetCurrPcieLinkWidth(nvmlDevice_t device, unsigned int *currLinkWidth) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCurrPcieLinkWidth, device,
+                              currLinkWidth);
 }
 
-nvmlReturn_t nvmlDeviceGetDecoderUtilization(nvmlDevice_t device,
-                                             unsigned int *utilization,
+nvmlReturn_t nvmlDeviceGetDecoderUtilization(nvmlDevice_t device, unsigned int *utilization,
                                              unsigned int *samplingPeriodUs) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDecoderUtilization,
-                         device, utilization, samplingPeriodUs);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDecoderUtilization, device,
+                              utilization, samplingPeriodUs);
 }
 
-nvmlReturn_t nvmlDeviceGetDefaultApplicationsClock(nvmlDevice_t device,
-                                                   nvmlClockType_t clockType,
+nvmlReturn_t nvmlDeviceGetDefaultApplicationsClock(nvmlDevice_t device, nvmlClockType_t clockType,
                                                    unsigned int *clockMHz) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetDefaultApplicationsClock, device,
-                         clockType, clockMHz);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDefaultApplicationsClock, device,
+                              clockType, clockMHz);
 }
 
-nvmlReturn_t nvmlDeviceGetDetailedEccErrors(nvmlDevice_t device,
-                                            nvmlMemoryErrorType_t errorType,
+nvmlReturn_t nvmlDeviceGetDetailedEccErrors(nvmlDevice_t device, nvmlMemoryErrorType_t errorType,
                                             nvmlEccCounterType_t counterType,
                                             nvmlEccErrorCounts_t *eccCounts) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDetailedEccErrors,
-                         device, errorType, counterType, eccCounts);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDetailedEccErrors, device, errorType,
+                              counterType, eccCounts);
 }
 
-nvmlReturn_t nvmlDeviceGetDisplayActive(nvmlDevice_t device,
-                                        nvmlEnableState_t *isActive) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDisplayActive, device,
-                         isActive);
+nvmlReturn_t nvmlDeviceGetDisplayActive(nvmlDevice_t device, nvmlEnableState_t *isActive) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDisplayActive, device, isActive);
 }
 
-nvmlReturn_t nvmlDeviceGetDisplayMode(nvmlDevice_t device,
-                                      nvmlEnableState_t *display) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDisplayMode, device,
-                         display);
+nvmlReturn_t nvmlDeviceGetDisplayMode(nvmlDevice_t device, nvmlEnableState_t *display) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDisplayMode, device, display);
 }
 
-nvmlReturn_t nvmlDeviceGetDriverModel(nvmlDevice_t device,
-                                      nvmlDriverModel_t *current,
+nvmlReturn_t nvmlDeviceGetDriverModel(nvmlDevice_t device, nvmlDriverModel_t *current,
                                       nvmlDriverModel_t *pending) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDriverModel, device,
-                         current, pending);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDriverModel, device, current,
+                              pending);
 }
 
-nvmlReturn_t nvmlDeviceGetEccMode(nvmlDevice_t device,
-                                  nvmlEnableState_t *current,
+nvmlReturn_t nvmlDeviceGetEccMode(nvmlDevice_t device, nvmlEnableState_t *current,
                                   nvmlEnableState_t *pending) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEccMode, device,
-                         current, pending);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEccMode, device, current, pending);
 }
 
 nvmlReturn_t nvmlDeviceSetEccMode(nvmlDevice_t device, nvmlEnableState_t ecc) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetEccMode, device, ecc);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetEccMode, device, ecc);
 }
 
-nvmlReturn_t nvmlDeviceGetEncoderCapacity(nvmlDevice_t device,
-                                          nvmlEncoderType_t encoderQueryType,
+nvmlReturn_t nvmlDeviceGetEncoderCapacity(nvmlDevice_t device, nvmlEncoderType_t encoderQueryType,
                                           unsigned int *encoderCapacity) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEncoderCapacity,
-                         device, encoderQueryType, encoderCapacity);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEncoderCapacity, device,
+                              encoderQueryType, encoderCapacity);
 }
 
-nvmlReturn_t
-nvmlDeviceGetEncoderSessions(nvmlDevice_t device, unsigned int *sessionCount,
-                             nvmlEncoderSessionInfo_t *sessionInfos) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEncoderSessions,
-                         device, sessionCount, sessionInfos);
+nvmlReturn_t nvmlDeviceGetEncoderSessions(nvmlDevice_t device, unsigned int *sessionCount,
+                                          nvmlEncoderSessionInfo_t *sessionInfos) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEncoderSessions, device,
+                              sessionCount, sessionInfos);
 }
 
-nvmlReturn_t nvmlDeviceGetEncoderStats(nvmlDevice_t device,
-                                       unsigned int *sessionCount,
-                                       unsigned int *averageFps,
-                                       unsigned int *averageLatency) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEncoderStats, device,
-                         sessionCount, averageFps, averageLatency);
+nvmlReturn_t nvmlDeviceGetEncoderStats(nvmlDevice_t device, unsigned int *sessionCount,
+                                       unsigned int *averageFps, unsigned int *averageLatency) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEncoderStats, device, sessionCount,
+                              averageFps, averageLatency);
 }
 
-nvmlReturn_t nvmlDeviceGetEncoderUtilization(nvmlDevice_t device,
-                                             unsigned int *utilization,
+nvmlReturn_t nvmlDeviceGetEncoderUtilization(nvmlDevice_t device, unsigned int *utilization,
                                              unsigned int *samplingPeriodUs) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEncoderUtilization,
-                         device, utilization, samplingPeriodUs);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEncoderUtilization, device,
+                              utilization, samplingPeriodUs);
 }
 
-nvmlReturn_t nvmlDeviceGetEnforcedPowerLimit(nvmlDevice_t device,
-                                             unsigned int *limit) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEnforcedPowerLimit,
-                         device, limit);
+nvmlReturn_t nvmlDeviceGetEnforcedPowerLimit(nvmlDevice_t device, unsigned int *limit) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetEnforcedPowerLimit, device, limit);
 }
 
 nvmlReturn_t nvmlDeviceGetFanSpeed(nvmlDevice_t device, unsigned int *speed) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFanSpeed, device,
-                         speed);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFanSpeed, device, speed);
 }
 
-nvmlReturn_t nvmlDeviceGetFanSpeed_v2(nvmlDevice_t device,
-                                      unsigned int *speed) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFanSpeed_v2, device,
-                         speed);
+nvmlReturn_t nvmlDeviceGetFanSpeed_v2(nvmlDevice_t device, unsigned int *speed) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFanSpeed_v2, device, speed);
 }
 
 nvmlReturn_t nvmlDeviceGetFieldValues(nvmlDevice_t device, int valuesCount,
                                       nvmlFieldValue_t *values) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFieldValues, device,
-                         valuesCount, values);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFieldValues, device, valuesCount,
+                              values);
 }
 
-nvmlReturn_t nvmlDeviceGetGpuOperationMode(nvmlDevice_t device,
-                                           nvmlGpuOperationMode_t *current,
+nvmlReturn_t nvmlDeviceGetGpuOperationMode(nvmlDevice_t device, nvmlGpuOperationMode_t *current,
                                            nvmlGpuOperationMode_t *pending) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuOperationMode,
-                         device, current, pending);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuOperationMode, device, current,
+                              pending);
 }
 
-nvmlReturn_t nvmlDeviceGetGraphicsRunningProcesses(nvmlDevice_t device,
-                                                   unsigned int *infoCount,
+nvmlReturn_t nvmlDeviceGetGraphicsRunningProcesses(nvmlDevice_t device, unsigned int *infoCount,
                                                    nvmlProcessInfo_t *infos) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetGraphicsRunningProcesses, device,
-                         infoCount, infos);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGraphicsRunningProcesses, device,
+                              infoCount, infos);
 }
 
 nvmlReturn_t nvmlDeviceGetGridLicensableFeatures(
-    nvmlDevice_t device,
-    nvmlGridLicensableFeatures_t *pGridLicensableFeatures) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetGridLicensableFeatures, device,
-                         pGridLicensableFeatures);
+    nvmlDevice_t device, nvmlGridLicensableFeatures_t *pGridLicensableFeatures) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGridLicensableFeatures, device,
+                              pGridLicensableFeatures);
 }
 /*
 nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2(const char *pciBusId,
@@ -391,61 +331,51 @@ nvmlReturn_t nvmlDeviceGetHandleByUUID(const char *uuid, nvmlDevice_t *device) {
                          device);
 }*/
 
-
 nvmlReturn_t nvmlDeviceGetInforomConfigurationChecksum(nvmlDevice_t device,
                                                        unsigned int *checksum) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetInforomConfigurationChecksum, device,
-                         checksum);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetInforomConfigurationChecksum, device,
+                              checksum);
 }
 
-nvmlReturn_t nvmlDeviceGetInforomImageVersion(nvmlDevice_t device,
-                                              char *version,
+nvmlReturn_t nvmlDeviceGetInforomImageVersion(nvmlDevice_t device, char *version,
                                               unsigned int length) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetInforomImageVersion,
-                         device, version, length);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetInforomImageVersion, device, version,
+                              length);
 }
 
-nvmlReturn_t nvmlDeviceGetInforomVersion(nvmlDevice_t device,
-                                         nvmlInforomObject_t object,
+nvmlReturn_t nvmlDeviceGetInforomVersion(nvmlDevice_t device, nvmlInforomObject_t object,
                                          char *version, unsigned int length) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetInforomVersion,
-                         device, object, version, length);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetInforomVersion, device, object,
+                              version, length);
 }
 
-nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device,
-                                       nvmlClockType_t type,
+nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device, nvmlClockType_t type,
                                        unsigned int *clock) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxClockInfo, device,
-                         type, clock);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxClockInfo, device, type, clock);
 }
 
-nvmlReturn_t nvmlDeviceGetMaxCustomerBoostClock(nvmlDevice_t device,
-                                                nvmlClockType_t clockType,
+nvmlReturn_t nvmlDeviceGetMaxCustomerBoostClock(nvmlDevice_t device, nvmlClockType_t clockType,
                                                 unsigned int *clockMHz) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxCustomerBoostClock,
-                         device, clockType, clockMHz);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxCustomerBoostClock, device,
+                              clockType, clockMHz);
 }
 
-nvmlReturn_t nvmlDeviceGetMaxPcieLinkGeneration(nvmlDevice_t device,
-                                                unsigned int *maxLinkGen) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxPcieLinkGeneration,
-                         device, maxLinkGen);
+nvmlReturn_t nvmlDeviceGetMaxPcieLinkGeneration(nvmlDevice_t device, unsigned int *maxLinkGen) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxPcieLinkGeneration, device,
+                              maxLinkGen);
 }
 
-nvmlReturn_t nvmlDeviceGetMaxPcieLinkWidth(nvmlDevice_t device,
-                                           unsigned int *maxLinkWidth) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxPcieLinkWidth,
-                         device, maxLinkWidth);
+nvmlReturn_t nvmlDeviceGetMaxPcieLinkWidth(nvmlDevice_t device, unsigned int *maxLinkWidth) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxPcieLinkWidth, device,
+                              maxLinkWidth);
 }
 
-nvmlReturn_t nvmlDeviceGetMemoryErrorCounter(nvmlDevice_t device,
-                                             nvmlMemoryErrorType_t errorType,
+nvmlReturn_t nvmlDeviceGetMemoryErrorCounter(nvmlDevice_t device, nvmlMemoryErrorType_t errorType,
                                              nvmlEccCounterType_t counterType,
                                              nvmlMemoryLocation_t locationType,
                                              unsigned long long *count) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMemoryErrorCounter,
-                         device, errorType, counterType, locationType, count);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMemoryErrorCounter, device,
+                              errorType, counterType, locationType, count);
 }
 /*
 nvmlReturn_t nvmlDeviceGetMemoryInfo(nvmlDevice_t device,
@@ -454,47 +384,37 @@ nvmlReturn_t nvmlDeviceGetMemoryInfo(nvmlDevice_t device,
                          memory);
 }
 */
-nvmlReturn_t nvmlDeviceGetMinorNumber(nvmlDevice_t device,
-                                      unsigned int *minorNumber) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMinorNumber, device,
-                         minorNumber);
+nvmlReturn_t nvmlDeviceGetMinorNumber(nvmlDevice_t device, unsigned int *minorNumber) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMinorNumber, device, minorNumber);
 }
 
 // Guessed function proto type
-nvmlReturn_t nvmlDeviceGetMPSComputeRunningProcesses(nvmlDevice_t device,
-                                                     unsigned int *infoCount,
+nvmlReturn_t nvmlDeviceGetMPSComputeRunningProcesses(nvmlDevice_t device, unsigned int *infoCount,
                                                      void *infos) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetMPSComputeRunningProcesses, device,
-                         infoCount, infos);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMPSComputeRunningProcesses, device,
+                              infoCount, infos);
 }
 
-nvmlReturn_t nvmlDeviceGetMultiGpuBoard(nvmlDevice_t device,
-                                        unsigned int *multiGpuBool) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMultiGpuBoard, device,
-                         multiGpuBool);
+nvmlReturn_t nvmlDeviceGetMultiGpuBoard(nvmlDevice_t device, unsigned int *multiGpuBool) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMultiGpuBoard, device, multiGpuBool);
 }
 
-nvmlReturn_t nvmlDeviceGetName(nvmlDevice_t device, char *name,
-                               unsigned int length) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetName, device, name,
-                         length);
+nvmlReturn_t nvmlDeviceGetName(nvmlDevice_t device, char *name, unsigned int length) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetName, device, name, length);
 }
 
-nvmlReturn_t nvmlDeviceGetNvLinkCapability(nvmlDevice_t device,
-                                           unsigned int link,
+nvmlReturn_t nvmlDeviceGetNvLinkCapability(nvmlDevice_t device, unsigned int link,
                                            nvmlNvLinkCapability_t capability,
                                            unsigned int *capResult) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkCapability,
-                         device, link, capability, capResult);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkCapability, device, link,
+                              capability, capResult);
 }
 
-nvmlReturn_t nvmlDeviceGetNvLinkErrorCounter(nvmlDevice_t device,
-                                             unsigned int link,
+nvmlReturn_t nvmlDeviceGetNvLinkErrorCounter(nvmlDevice_t device, unsigned int link,
                                              nvmlNvLinkErrorCounter_t counter,
                                              unsigned long long *counterValue) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkErrorCounter,
-                         device, link, counter, counterValue);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkErrorCounter, device, link,
+                              counter, counterValue);
 }
 /*
 nvmlReturn_t nvmlDeviceGetNvLinkRemotePciInfo_v2(nvmlDevice_t device,
@@ -514,1039 +434,867 @@ nvmlReturn_t nvmlDeviceGetNvLinkRemotePciInfo(nvmlDevice_t device,
 
 nvmlReturn_t nvmlDeviceGetNvLinkState(nvmlDevice_t device, unsigned int link,
                                       nvmlEnableState_t *isActive) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkState, device,
-                         link, isActive);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkState, device, link, isActive);
 }
 
-nvmlReturn_t
-nvmlDeviceGetNvLinkUtilizationControl(nvmlDevice_t device, unsigned int link,
-                                      unsigned int counter,
-                                      nvmlNvLinkUtilizationControl_t *control) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetNvLinkUtilizationControl, device, link,
-                         counter, control);
+nvmlReturn_t nvmlDeviceGetNvLinkUtilizationControl(nvmlDevice_t device, unsigned int link,
+                                                   unsigned int counter,
+                                                   nvmlNvLinkUtilizationControl_t *control) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkUtilizationControl, device,
+                              link, counter, control);
 }
 
-nvmlReturn_t nvmlDeviceGetNvLinkUtilizationCounter(
-    nvmlDevice_t device, unsigned int link, unsigned int counter,
-    unsigned long long *rxcounter, unsigned long long *txcounter) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetNvLinkUtilizationCounter, device, link,
-                         counter, rxcounter, txcounter);
+nvmlReturn_t nvmlDeviceGetNvLinkUtilizationCounter(nvmlDevice_t device, unsigned int link,
+                                                   unsigned int counter,
+                                                   unsigned long long *rxcounter,
+                                                   unsigned long long *txcounter) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkUtilizationCounter, device,
+                              link, counter, rxcounter, txcounter);
 }
 
 nvmlReturn_t nvmlDeviceGetNvLinkVersion(nvmlDevice_t device, unsigned int link,
                                         unsigned int *version) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkVersion, device,
-                         link, version);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetNvLinkVersion, device, link,
+                              version);
 }
 
 nvmlReturn_t nvmlDeviceGetP2PStatus(nvmlDevice_t device1, nvmlDevice_t device2,
-                                    nvmlGpuP2PCapsIndex_t p2pIndex,
-                                    nvmlGpuP2PStatus_t *p2pStatus) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetP2PStatus, device1,
-                         device2, p2pIndex, p2pStatus);
+                                    nvmlGpuP2PCapsIndex_t p2pIndex, nvmlGpuP2PStatus_t *p2pStatus) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetP2PStatus, device1, device2,
+                              p2pIndex, p2pStatus);
 }
 
-nvmlReturn_t nvmlDeviceGetPcieReplayCounter(nvmlDevice_t device,
-                                            unsigned int *value) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPcieReplayCounter,
-                         device, value);
+nvmlReturn_t nvmlDeviceGetPcieReplayCounter(nvmlDevice_t device, unsigned int *value) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPcieReplayCounter, device, value);
 }
 
-nvmlReturn_t nvmlDeviceGetPcieThroughput(nvmlDevice_t device,
-                                         nvmlPcieUtilCounter_t counter,
+nvmlReturn_t nvmlDeviceGetPcieThroughput(nvmlDevice_t device, nvmlPcieUtilCounter_t counter,
                                          unsigned int *value) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPcieThroughput,
-                         device, counter, value);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPcieThroughput, device, counter,
+                              value);
 }
 
-nvmlReturn_t nvmlDeviceGetPerformanceState(nvmlDevice_t device,
-                                           nvmlPstates_t *pState) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPerformanceState,
-                         device, pState);
+nvmlReturn_t nvmlDeviceGetPerformanceState(nvmlDevice_t device, nvmlPstates_t *pState) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPerformanceState, device, pState);
 }
 
-nvmlReturn_t nvmlDeviceGetPersistenceMode(nvmlDevice_t device,
-                                          nvmlEnableState_t *mode) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPersistenceMode,
-                         device, mode);
+nvmlReturn_t nvmlDeviceGetPersistenceMode(nvmlDevice_t device, nvmlEnableState_t *mode) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPersistenceMode, device, mode);
 }
 
-nvmlReturn_t
-nvmlDeviceGetPowerManagementDefaultLimit(nvmlDevice_t device,
-                                         unsigned int *defaultLimit) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetPowerManagementDefaultLimit, device,
-                         defaultLimit);
+nvmlReturn_t nvmlDeviceGetPowerManagementDefaultLimit(nvmlDevice_t device,
+                                                      unsigned int *defaultLimit) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerManagementDefaultLimit, device,
+                              defaultLimit);
 }
 
-nvmlReturn_t nvmlDeviceGetPowerManagementLimit(nvmlDevice_t device,
-                                               unsigned int *limit) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerManagementLimit,
-                         device, limit);
+nvmlReturn_t nvmlDeviceGetPowerManagementLimit(nvmlDevice_t device, unsigned int *limit) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerManagementLimit, device, limit);
 }
 
-nvmlReturn_t nvmlDeviceGetPowerManagementLimitConstraints(
-    nvmlDevice_t device, unsigned int *minLimit, unsigned int *maxLimit) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetPowerManagementLimitConstraints, device,
-                         minLimit, maxLimit);
+nvmlReturn_t nvmlDeviceGetPowerManagementLimitConstraints(nvmlDevice_t device,
+                                                          unsigned int *minLimit,
+                                                          unsigned int *maxLimit) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerManagementLimitConstraints,
+                              device, minLimit, maxLimit);
 }
 
-nvmlReturn_t nvmlDeviceGetPowerManagementMode(nvmlDevice_t device,
-                                              nvmlEnableState_t *mode) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerManagementMode,
-                         device, mode);
+nvmlReturn_t nvmlDeviceGetPowerManagementMode(nvmlDevice_t device, nvmlEnableState_t *mode) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerManagementMode, device, mode);
 }
 
-nvmlReturn_t nvmlDeviceGetPowerState(nvmlDevice_t device,
-                                     nvmlPstates_t *pState) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerState, device,
-                         pState);
+nvmlReturn_t nvmlDeviceGetPowerState(nvmlDevice_t device, nvmlPstates_t *pState) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerState, device, pState);
 }
 
 nvmlReturn_t nvmlDeviceGetPowerUsage(nvmlDevice_t device, unsigned int *power) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerUsage, device,
-                         power);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPowerUsage, device, power);
 }
 
-nvmlReturn_t nvmlDeviceGetRetiredPages(nvmlDevice_t device,
-                                       nvmlPageRetirementCause_t cause,
-                                       unsigned int *pageCount,
-                                       unsigned long long *addresses) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRetiredPages, device,
-                         cause, pageCount, addresses);
+nvmlReturn_t nvmlDeviceGetRetiredPages(nvmlDevice_t device, nvmlPageRetirementCause_t cause,
+                                       unsigned int *pageCount, unsigned long long *addresses) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRetiredPages, device, cause,
+                              pageCount, addresses);
 }
 
-nvmlReturn_t
-nvmlDeviceGetRetiredPagesPendingStatus(nvmlDevice_t device,
-                                       nvmlEnableState_t *isPending) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetRetiredPagesPendingStatus, device,
-                         isPending);
+nvmlReturn_t nvmlDeviceGetRetiredPagesPendingStatus(nvmlDevice_t device,
+                                                    nvmlEnableState_t *isPending) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRetiredPagesPendingStatus, device,
+                              isPending);
 }
 
 nvmlReturn_t nvmlDeviceGetSamples(nvmlDevice_t device, nvmlSamplingType_t type,
                                   unsigned long long lastSeenTimeStamp,
-                                  nvmlValueType_t *sampleValType,
-                                  unsigned int *sampleCount,
+                                  nvmlValueType_t *sampleValType, unsigned int *sampleCount,
                                   nvmlSample_t *samples) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSamples, device, type,
-                         lastSeenTimeStamp, sampleValType, sampleCount,
-                         samples);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSamples, device, type,
+                              lastSeenTimeStamp, sampleValType, sampleCount, samples);
 }
 
-nvmlReturn_t nvmlDeviceGetSerial(nvmlDevice_t device, char *serial,
-                                 unsigned int length) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSerial, device,
-                         serial, length);
+nvmlReturn_t nvmlDeviceGetSerial(nvmlDevice_t device, char *serial, unsigned int length) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSerial, device, serial, length);
 }
 
 nvmlReturn_t nvmlDeviceGetSupportedClocksThrottleReasons(
     nvmlDevice_t device, unsigned long long *supportedClocksThrottleReasons) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetSupportedClocksThrottleReasons, device,
-                         supportedClocksThrottleReasons);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedClocksThrottleReasons,
+                              device, supportedClocksThrottleReasons);
 }
 
-nvmlReturn_t nvmlDeviceGetSupportedEventTypes(nvmlDevice_t device,
-                                              unsigned long long *eventTypes) {
-  nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedEventTypes,
-                         device, eventTypes);
-  // nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedEventTypes,
-  //                       device, eventTypes); 
-  return res;
+nvmlReturn_t nvmlDeviceGetSupportedEventTypes(nvmlDevice_t device, unsigned long long *eventTypes) {
+    nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedEventTypes,
+                                          device, eventTypes);
+    // nvmlReturn_t res = NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedEventTypes,
+    //                       device, eventTypes);
+    return res;
 }
 
-nvmlReturn_t nvmlDeviceGetSupportedGraphicsClocks(nvmlDevice_t device,
-                                                  unsigned int memoryClockMHz,
-                                                  unsigned int *count,
-                                                  unsigned int *clocksMHz) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetSupportedGraphicsClocks, device,
-                         memoryClockMHz, count, clocksMHz);
+nvmlReturn_t nvmlDeviceGetSupportedGraphicsClocks(nvmlDevice_t device, unsigned int memoryClockMHz,
+                                                  unsigned int *count, unsigned int *clocksMHz) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedGraphicsClocks, device,
+                              memoryClockMHz, count, clocksMHz);
 }
 
-nvmlReturn_t nvmlDeviceGetSupportedMemoryClocks(nvmlDevice_t device,
-                                                unsigned int *count,
+nvmlReturn_t nvmlDeviceGetSupportedMemoryClocks(nvmlDevice_t device, unsigned int *count,
                                                 unsigned int *clocksMHz) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedMemoryClocks,
-                         device, count, clocksMHz);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedMemoryClocks, device, count,
+                              clocksMHz);
 }
 
-nvmlReturn_t nvmlDeviceGetSupportedVgpus(nvmlDevice_t device,
-                                         unsigned int *vgpuCount,
+nvmlReturn_t nvmlDeviceGetSupportedVgpus(nvmlDevice_t device, unsigned int *vgpuCount,
                                          nvmlVgpuTypeId_t *vgpuTypeIds) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedVgpus,
-                         device, vgpuCount, vgpuTypeIds);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetSupportedVgpus, device, vgpuCount,
+                              vgpuTypeIds);
 }
 
-nvmlReturn_t nvmlDeviceGetTemperature(nvmlDevice_t device,
-                                      nvmlTemperatureSensors_t sensorType,
+nvmlReturn_t nvmlDeviceGetTemperature(nvmlDevice_t device, nvmlTemperatureSensors_t sensorType,
                                       unsigned int *temp) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTemperature, device,
-                         sensorType, temp);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTemperature, device, sensorType,
+                              temp);
 }
 
-nvmlReturn_t
-nvmlDeviceGetTemperatureThreshold(nvmlDevice_t device,
-                                  nvmlTemperatureThresholds_t thresholdType,
-                                  unsigned int *temp) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTemperatureThreshold,
-                         device, thresholdType, temp);
+nvmlReturn_t nvmlDeviceGetTemperatureThreshold(nvmlDevice_t device,
+                                               nvmlTemperatureThresholds_t thresholdType,
+                                               unsigned int *temp) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTemperatureThreshold, device,
+                              thresholdType, temp);
 }
 
-nvmlReturn_t
-nvmlDeviceGetTopologyCommonAncestor(nvmlDevice_t device1, nvmlDevice_t device2,
-                                    nvmlGpuTopologyLevel_t *pathInfo) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetTopologyCommonAncestor, device1, device2,
-                         pathInfo);
+nvmlReturn_t nvmlDeviceGetTopologyCommonAncestor(nvmlDevice_t device1, nvmlDevice_t device2,
+                                                 nvmlGpuTopologyLevel_t *pathInfo) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTopologyCommonAncestor, device1,
+                              device2, pathInfo);
 }
 
-nvmlReturn_t nvmlDeviceGetTopologyNearestGpus(nvmlDevice_t device,
-                                              nvmlGpuTopologyLevel_t level,
-                                              unsigned int *count,
-                                              nvmlDevice_t *deviceArray) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTopologyNearestGpus,
-                         device, level, count, deviceArray);
+nvmlReturn_t nvmlDeviceGetTopologyNearestGpus(nvmlDevice_t device, nvmlGpuTopologyLevel_t level,
+                                              unsigned int *count, nvmlDevice_t *deviceArray) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTopologyNearestGpus, device, level,
+                              count, deviceArray);
 }
 
-nvmlReturn_t nvmlDeviceGetTotalEccErrors(nvmlDevice_t device,
-                                         nvmlMemoryErrorType_t errorType,
+nvmlReturn_t nvmlDeviceGetTotalEccErrors(nvmlDevice_t device, nvmlMemoryErrorType_t errorType,
                                          nvmlEccCounterType_t counterType,
                                          unsigned long long *eccCounts) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTotalEccErrors,
-                         device, errorType, counterType, eccCounts);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTotalEccErrors, device, errorType,
+                              counterType, eccCounts);
 }
 
-nvmlReturn_t nvmlDeviceGetTotalEnergyConsumption(nvmlDevice_t device,
-                                                 unsigned long long *energy) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetTotalEnergyConsumption, device, energy);
+nvmlReturn_t nvmlDeviceGetTotalEnergyConsumption(nvmlDevice_t device, unsigned long long *energy) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetTotalEnergyConsumption, device,
+                              energy);
 }
 
-nvmlReturn_t nvmlDeviceGetUtilizationRates(nvmlDevice_t device,
-                                           nvmlUtilization_t *utilization) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetUtilizationRates,
-                         device, utilization);
+nvmlReturn_t nvmlDeviceGetUtilizationRates(nvmlDevice_t device, nvmlUtilization_t *utilization) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetUtilizationRates, device,
+                              utilization);
 }
 
-nvmlReturn_t nvmlDeviceGetVbiosVersion(nvmlDevice_t device, char *version,
-                                       unsigned int length) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVbiosVersion, device,
-                         version, length);
+nvmlReturn_t nvmlDeviceGetVbiosVersion(nvmlDevice_t device, char *version, unsigned int length) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVbiosVersion, device, version,
+                              length);
 }
 
-nvmlReturn_t nvmlDeviceGetVgpuMetadata(nvmlDevice_t device,
-                                       nvmlVgpuPgpuMetadata_t *pgpuMetadata,
+nvmlReturn_t nvmlDeviceGetVgpuMetadata(nvmlDevice_t device, nvmlVgpuPgpuMetadata_t *pgpuMetadata,
                                        unsigned int *bufferSize) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVgpuMetadata, device,
-                         pgpuMetadata, bufferSize);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVgpuMetadata, device, pgpuMetadata,
+                              bufferSize);
 }
 
 nvmlReturn_t nvmlDeviceGetVgpuProcessUtilization(
     nvmlDevice_t device, unsigned long long lastSeenTimeStamp,
-    unsigned int *vgpuProcessSamplesCount,
-    nvmlVgpuProcessUtilizationSample_t *utilizationSamples) {
-  return NVML_OVERRIDE_CALL(
-      nvml_library_entry, nvmlDeviceGetVgpuProcessUtilization, device,
-      lastSeenTimeStamp, vgpuProcessSamplesCount, utilizationSamples);
+    unsigned int *vgpuProcessSamplesCount, nvmlVgpuProcessUtilizationSample_t *utilizationSamples) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVgpuProcessUtilization, device,
+                              lastSeenTimeStamp, vgpuProcessSamplesCount, utilizationSamples);
 }
 
-nvmlReturn_t nvmlDeviceGetVgpuUtilization(
-    nvmlDevice_t device, unsigned long long lastSeenTimeStamp,
-    nvmlValueType_t *sampleValType, unsigned int *vgpuInstanceSamplesCount,
-    nvmlVgpuInstanceUtilizationSample_t *utilizationSamples) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVgpuUtilization,
-                         device, lastSeenTimeStamp, sampleValType,
-                         vgpuInstanceSamplesCount, utilizationSamples);
+nvmlReturn_t nvmlDeviceGetVgpuUtilization(nvmlDevice_t device, unsigned long long lastSeenTimeStamp,
+                                          nvmlValueType_t *sampleValType,
+                                          unsigned int *vgpuInstanceSamplesCount,
+                                          nvmlVgpuInstanceUtilizationSample_t *utilizationSamples) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVgpuUtilization, device,
+                              lastSeenTimeStamp, sampleValType, vgpuInstanceSamplesCount,
+                              utilizationSamples);
 }
 
-nvmlReturn_t nvmlDeviceGetViolationStatus(nvmlDevice_t device,
-                                          nvmlPerfPolicyType_t perfPolicyType,
+nvmlReturn_t nvmlDeviceGetViolationStatus(nvmlDevice_t device, nvmlPerfPolicyType_t perfPolicyType,
                                           nvmlViolationTime_t *violTime) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetViolationStatus,
-                         device, perfPolicyType, violTime);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetViolationStatus, device,
+                              perfPolicyType, violTime);
 }
 
-nvmlReturn_t
-nvmlDeviceGetVirtualizationMode(nvmlDevice_t device,
-                                nvmlGpuVirtualizationMode_t *pVirtualMode) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVirtualizationMode,
-                         device, pVirtualMode);
+nvmlReturn_t nvmlDeviceGetVirtualizationMode(nvmlDevice_t device,
+                                             nvmlGpuVirtualizationMode_t *pVirtualMode) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetVirtualizationMode, device,
+                              pVirtualMode);
 }
 
-nvmlReturn_t nvmlDeviceModifyDrainState(nvmlPciInfo_t *pciInfo,
-                                        nvmlEnableState_t newState) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceModifyDrainState,
-                         pciInfo, newState);
+nvmlReturn_t nvmlDeviceModifyDrainState(nvmlPciInfo_t *pciInfo, nvmlEnableState_t newState) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceModifyDrainState, pciInfo, newState);
 }
 
-nvmlReturn_t nvmlDeviceOnSameBoard(nvmlDevice_t device1, nvmlDevice_t device2,
-                                   int *onSameBoard) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceOnSameBoard, device1,
-                         device2, onSameBoard);
+nvmlReturn_t nvmlDeviceOnSameBoard(nvmlDevice_t device1, nvmlDevice_t device2, int *onSameBoard) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceOnSameBoard, device1, device2,
+                              onSameBoard);
 }
 
-nvmlReturn_t nvmlDeviceQueryDrainState(nvmlPciInfo_t *pciInfo,
-                                       nvmlEnableState_t *currentState) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceQueryDrainState, pciInfo,
-                         currentState);
+nvmlReturn_t nvmlDeviceQueryDrainState(nvmlPciInfo_t *pciInfo, nvmlEnableState_t *currentState) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceQueryDrainState, pciInfo, currentState);
 }
 
-nvmlReturn_t nvmlDeviceRegisterEvents(nvmlDevice_t device,
-                                      unsigned long long eventTypes,
+nvmlReturn_t nvmlDeviceRegisterEvents(nvmlDevice_t device, unsigned long long eventTypes,
                                       nvmlEventSet_t set) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceRegisterEvents, device,
-                         eventTypes, set);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceRegisterEvents, device, eventTypes,
+                              set);
 }
 
-nvmlReturn_t nvmlDeviceRemoveGpu_v2(nvmlPciInfo_t *pciInfo,
-                                    nvmlDetachGpuState_t gpuState,
+nvmlReturn_t nvmlDeviceRemoveGpu_v2(nvmlPciInfo_t *pciInfo, nvmlDetachGpuState_t gpuState,
                                     nvmlPcieLinkState_t linkState) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceRemoveGpu_v2, pciInfo,
-                         gpuState, linkState);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceRemoveGpu_v2, pciInfo, gpuState,
+                              linkState);
 }
 
-nvmlReturn_t nvmlDeviceRemoveGpu(nvmlPciInfo_t *pciInfo,
-                                 nvmlDetachGpuState_t gpuState,
+nvmlReturn_t nvmlDeviceRemoveGpu(nvmlPciInfo_t *pciInfo, nvmlDetachGpuState_t gpuState,
                                  nvmlPcieLinkState_t linkState) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceRemoveGpu, pciInfo,
-                         gpuState, linkState);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceRemoveGpu, pciInfo, gpuState,
+                              linkState);
 }
 
 nvmlReturn_t nvmlDeviceResetApplicationsClocks(nvmlDevice_t device) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceResetApplicationsClocks,
-                         device);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceResetApplicationsClocks, device);
 }
 
-nvmlReturn_t nvmlDeviceResetNvLinkErrorCounters(nvmlDevice_t device,
-                                                unsigned int link) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceResetNvLinkErrorCounters,
-                         device, link);
+nvmlReturn_t nvmlDeviceResetNvLinkErrorCounters(nvmlDevice_t device, unsigned int link) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceResetNvLinkErrorCounters, device, link);
 }
 
-nvmlReturn_t nvmlDeviceResetNvLinkUtilizationCounter(nvmlDevice_t device,
-                                                     unsigned int link,
+nvmlReturn_t nvmlDeviceResetNvLinkUtilizationCounter(nvmlDevice_t device, unsigned int link,
                                                      unsigned int counter) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceResetNvLinkUtilizationCounter, device, link,
-                         counter);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceResetNvLinkUtilizationCounter, device,
+                              link, counter);
 }
 
-nvmlReturn_t nvmlDeviceSetAccountingMode(nvmlDevice_t device,
-                                         nvmlEnableState_t mode) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetAccountingMode,
-                         device, mode);
+nvmlReturn_t nvmlDeviceSetAccountingMode(nvmlDevice_t device, nvmlEnableState_t mode) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetAccountingMode, device, mode);
 }
 
-nvmlReturn_t nvmlDeviceSetAPIRestriction(nvmlDevice_t device,
-                                         nvmlRestrictedAPI_t apiType,
+nvmlReturn_t nvmlDeviceSetAPIRestriction(nvmlDevice_t device, nvmlRestrictedAPI_t apiType,
                                          nvmlEnableState_t isRestricted) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetAPIRestriction,
-                         device, apiType, isRestricted);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetAPIRestriction, device, apiType,
+                              isRestricted);
 }
 
-nvmlReturn_t nvmlDeviceSetApplicationsClocks(nvmlDevice_t device,
-                                             unsigned int memClockMHz,
+nvmlReturn_t nvmlDeviceSetApplicationsClocks(nvmlDevice_t device, unsigned int memClockMHz,
                                              unsigned int graphicsClockMHz) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetApplicationsClocks,
-                         device, memClockMHz, graphicsClockMHz);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetApplicationsClocks, device,
+                              memClockMHz, graphicsClockMHz);
 }
 
-nvmlReturn_t nvmlDeviceSetAutoBoostedClocksEnabled(nvmlDevice_t device,
-                                                   nvmlEnableState_t enabled) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceSetAutoBoostedClocksEnabled, device,
-                         enabled);
+nvmlReturn_t nvmlDeviceSetAutoBoostedClocksEnabled(nvmlDevice_t device, nvmlEnableState_t enabled) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetAutoBoostedClocksEnabled, device,
+                              enabled);
 }
 
-nvmlReturn_t nvmlDeviceSetComputeMode(nvmlDevice_t device,
-                                      nvmlComputeMode_t mode) {
-  //if (g_vcuda_config.enable) {
-  //  return NVML_ERROR_NOT_SUPPORTED;
-  //}
+nvmlReturn_t nvmlDeviceSetComputeMode(nvmlDevice_t device, nvmlComputeMode_t mode) {
+    // if (g_vcuda_config.enable) {
+    //   return NVML_ERROR_NOT_SUPPORTED;
+    // }
 
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetComputeMode, device,
-                         mode);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetComputeMode, device, mode);
 }
 
 nvmlReturn_t nvmlDeviceSetCpuAffinity(nvmlDevice_t device) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetCpuAffinity, device);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetCpuAffinity, device);
 }
 
-nvmlReturn_t nvmlDeviceSetDefaultAutoBoostedClocksEnabled(
-    nvmlDevice_t device, nvmlEnableState_t enabled, unsigned int flags) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceSetDefaultAutoBoostedClocksEnabled, device,
-                         enabled, flags);
+nvmlReturn_t nvmlDeviceSetDefaultAutoBoostedClocksEnabled(nvmlDevice_t device,
+                                                          nvmlEnableState_t enabled,
+                                                          unsigned int flags) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetDefaultAutoBoostedClocksEnabled,
+                              device, enabled, flags);
 }
 
-nvmlReturn_t nvmlDeviceSetDriverModel(nvmlDevice_t device,
-                                      nvmlDriverModel_t driverModel,
+nvmlReturn_t nvmlDeviceSetDriverModel(nvmlDevice_t device, nvmlDriverModel_t driverModel,
                                       unsigned int flags) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetDriverModel, device,
-                         driverModel, flags);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetDriverModel, device, driverModel,
+                              flags);
 }
 
-nvmlReturn_t nvmlDeviceSetGpuOperationMode(nvmlDevice_t device,
-                                           nvmlGpuOperationMode_t mode) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetGpuOperationMode,
-                         device, mode);
+nvmlReturn_t nvmlDeviceSetGpuOperationMode(nvmlDevice_t device, nvmlGpuOperationMode_t mode) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetGpuOperationMode, device, mode);
 }
 
-nvmlReturn_t nvmlDeviceSetNvLinkUtilizationControl(
-    nvmlDevice_t device, unsigned int link, unsigned int counter,
-    nvmlNvLinkUtilizationControl_t *control, unsigned int reset) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceSetNvLinkUtilizationControl, device, link,
-                         counter, control, reset);
+nvmlReturn_t nvmlDeviceSetNvLinkUtilizationControl(nvmlDevice_t device, unsigned int link,
+                                                   unsigned int counter,
+                                                   nvmlNvLinkUtilizationControl_t *control,
+                                                   unsigned int reset) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetNvLinkUtilizationControl, device,
+                              link, counter, control, reset);
 }
 
-nvmlReturn_t nvmlDeviceSetPersistenceMode(nvmlDevice_t device,
-                                          nvmlEnableState_t mode) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetPersistenceMode,
-                         device, mode);
+nvmlReturn_t nvmlDeviceSetPersistenceMode(nvmlDevice_t device, nvmlEnableState_t mode) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetPersistenceMode, device, mode);
 }
 
-nvmlReturn_t nvmlDeviceSetPowerManagementLimit(nvmlDevice_t device,
-                                               unsigned int limit) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetPowerManagementLimit,
-                         device, limit);
+nvmlReturn_t nvmlDeviceSetPowerManagementLimit(nvmlDevice_t device, unsigned int limit) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetPowerManagementLimit, device, limit);
 }
 
-nvmlReturn_t
-nvmlDeviceSetVirtualizationMode(nvmlDevice_t device,
-                                nvmlGpuVirtualizationMode_t virtualMode) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetVirtualizationMode,
-                         device, virtualMode);
+nvmlReturn_t nvmlDeviceSetVirtualizationMode(nvmlDevice_t device,
+                                             nvmlGpuVirtualizationMode_t virtualMode) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetVirtualizationMode, device,
+                              virtualMode);
 }
 
 nvmlReturn_t nvmlDeviceValidateInforom(nvmlDevice_t device) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceValidateInforom, device);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceValidateInforom, device);
 }
 
 nvmlReturn_t nvmlEventSetCreate(nvmlEventSet_t *set) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlEventSetCreate, set);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlEventSetCreate, set);
 }
 
 nvmlReturn_t nvmlEventSetFree(nvmlEventSet_t set) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlEventSetFree, set);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlEventSetFree, set);
 }
 
-nvmlReturn_t nvmlEventSetWait(nvmlEventSet_t set, nvmlEventData_t *data,
-                              unsigned int timeoutms) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlEventSetWait, set, data,
-                         timeoutms);
+nvmlReturn_t nvmlEventSetWait(nvmlEventSet_t set, nvmlEventData_t *data, unsigned int timeoutms) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlEventSetWait, set, data, timeoutms);
 }
 
-nvmlReturn_t
-nvmlGetVgpuCompatibility(nvmlVgpuMetadata_t *vgpuMetadata,
-                         nvmlVgpuPgpuMetadata_t *pgpuMetadata,
-                         nvmlVgpuPgpuCompatibility_t *compatibilityInfo) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGetVgpuCompatibility,
-                         vgpuMetadata, pgpuMetadata, compatibilityInfo);
+nvmlReturn_t nvmlGetVgpuCompatibility(nvmlVgpuMetadata_t *vgpuMetadata,
+                                      nvmlVgpuPgpuMetadata_t *pgpuMetadata,
+                                      nvmlVgpuPgpuCompatibility_t *compatibilityInfo) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGetVgpuCompatibility, vgpuMetadata,
+                              pgpuMetadata, compatibilityInfo);
 }
 
-nvmlReturn_t nvmlInternalGetExportTable(const void **ppExportTable,
-                                        void *pExportTableId) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlInternalGetExportTable,
-                         ppExportTable, pExportTableId);
+nvmlReturn_t nvmlInternalGetExportTable(const void **ppExportTable, void *pExportTableId) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlInternalGetExportTable, ppExportTable,
+                              pExportTableId);
 }
 
 nvmlReturn_t nvmlSystemGetCudaDriverVersion(int *cudaDriverVersion) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetCudaDriverVersion,
-                         cudaDriverVersion);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetCudaDriverVersion,
+                              cudaDriverVersion);
 }
 
 nvmlReturn_t nvmlSystemGetCudaDriverVersion_v2(int *cudaDriverVersion) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetCudaDriverVersion_v2,
-                         cudaDriverVersion);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetCudaDriverVersion_v2,
+                              cudaDriverVersion);
 }
 
 nvmlReturn_t nvmlSystemGetDriverVersion(char *version, unsigned int length) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetDriverVersion,
-                         version, length);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetDriverVersion, version, length);
 }
 
-nvmlReturn_t nvmlSystemGetHicVersion(unsigned int *hwbcCount,
-                                     nvmlHwbcEntry_t *hwbcEntries) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetHicVersion, hwbcCount,
-                         hwbcEntries);
+nvmlReturn_t nvmlSystemGetHicVersion(unsigned int *hwbcCount, nvmlHwbcEntry_t *hwbcEntries) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetHicVersion, hwbcCount, hwbcEntries);
 }
 
 nvmlReturn_t nvmlSystemGetNVMLVersion(char *version, unsigned int length) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetNVMLVersion, version,
-                         length);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetNVMLVersion, version, length);
 }
 
-nvmlReturn_t nvmlSystemGetProcessName(unsigned int pid, char *name,
-                                      unsigned int length) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetProcessName, pid,
-                         name, length);
+nvmlReturn_t nvmlSystemGetProcessName(unsigned int pid, char *name, unsigned int length) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetProcessName, pid, name, length);
 }
 
-nvmlReturn_t nvmlSystemGetTopologyGpuSet(unsigned int cpuNumber,
-                                         unsigned int *count,
+nvmlReturn_t nvmlSystemGetTopologyGpuSet(unsigned int cpuNumber, unsigned int *count,
                                          nvmlDevice_t *deviceArray) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetTopologyGpuSet,
-                         cpuNumber, count, deviceArray);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSystemGetTopologyGpuSet, cpuNumber, count,
+                              deviceArray);
 }
 
 nvmlReturn_t nvmlUnitGetCount(unsigned int *unitCount) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetCount, unitCount);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetCount, unitCount);
 }
 
-nvmlReturn_t nvmlUnitGetDevices(nvmlUnit_t unit, unsigned int *deviceCount,
-                                nvmlDevice_t *devices) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetDevices, unit,
-                         deviceCount, devices);
+nvmlReturn_t nvmlUnitGetDevices(nvmlUnit_t unit, unsigned int *deviceCount, nvmlDevice_t *devices) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetDevices, unit, deviceCount, devices);
 }
 
-nvmlReturn_t nvmlUnitGetFanSpeedInfo(nvmlUnit_t unit,
-                                     nvmlUnitFanSpeeds_t *fanSpeeds) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetFanSpeedInfo, unit,
-                         fanSpeeds);
+nvmlReturn_t nvmlUnitGetFanSpeedInfo(nvmlUnit_t unit, nvmlUnitFanSpeeds_t *fanSpeeds) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetFanSpeedInfo, unit, fanSpeeds);
 }
 
 nvmlReturn_t nvmlUnitGetHandleByIndex(unsigned int index, nvmlUnit_t *unit) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetHandleByIndex, index,
-                         unit);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetHandleByIndex, index, unit);
 }
 
 nvmlReturn_t nvmlUnitGetLedState(nvmlUnit_t unit, nvmlLedState_t *state) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetLedState, unit, state);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetLedState, unit, state);
 }
 
 nvmlReturn_t nvmlUnitGetPsuInfo(nvmlUnit_t unit, nvmlPSUInfo_t *psu) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetPsuInfo, unit, psu);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetPsuInfo, unit, psu);
 }
 
-nvmlReturn_t nvmlUnitGetTemperature(nvmlUnit_t unit, unsigned int type,
-                                    unsigned int *temp) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetTemperature, unit, type,
-                         temp);
+nvmlReturn_t nvmlUnitGetTemperature(nvmlUnit_t unit, unsigned int type, unsigned int *temp) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetTemperature, unit, type, temp);
 }
 
 nvmlReturn_t nvmlUnitGetUnitInfo(nvmlUnit_t unit, nvmlUnitInfo_t *info) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetUnitInfo, unit, info);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitGetUnitInfo, unit, info);
 }
 
 nvmlReturn_t nvmlUnitSetLedState(nvmlUnit_t unit, nvmlLedColor_t color) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitSetLedState, unit, color);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlUnitSetLedState, unit, color);
 }
 
 nvmlReturn_t nvmlVgpuInstanceGetEncoderCapacity(nvmlVgpuInstance_t vgpuInstance,
                                                 unsigned int *encoderCapacity) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetEncoderCapacity,
-                         vgpuInstance, encoderCapacity);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetEncoderCapacity, vgpuInstance,
+                              encoderCapacity);
 }
 
-nvmlReturn_t
-nvmlVgpuInstanceGetEncoderSessions(nvmlVgpuInstance_t vgpuInstance,
-                                   unsigned int *sessionCount,
-                                   nvmlEncoderSessionInfo_t *sessionInfo) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetEncoderSessions,
-                         vgpuInstance, sessionCount, sessionInfo);
+nvmlReturn_t nvmlVgpuInstanceGetEncoderSessions(nvmlVgpuInstance_t vgpuInstance,
+                                                unsigned int *sessionCount,
+                                                nvmlEncoderSessionInfo_t *sessionInfo) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetEncoderSessions, vgpuInstance,
+                              sessionCount, sessionInfo);
 }
 
 nvmlReturn_t nvmlVgpuInstanceGetEncoderStats(nvmlVgpuInstance_t vgpuInstance,
-                                             unsigned int *sessionCount,
-                                             unsigned int *averageFps,
+                                             unsigned int *sessionCount, unsigned int *averageFps,
                                              unsigned int *averageLatency) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetEncoderStats,
-                         vgpuInstance, sessionCount, averageFps,
-                         averageLatency);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetEncoderStats, vgpuInstance,
+                              sessionCount, averageFps, averageLatency);
 }
 
 nvmlReturn_t nvmlVgpuInstanceGetFbUsage(nvmlVgpuInstance_t vgpuInstance,
                                         unsigned long long *fbUsage) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetFbUsage,
-                         vgpuInstance, fbUsage);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetFbUsage, vgpuInstance,
+                              fbUsage);
 }
 
 nvmlReturn_t nvmlVgpuInstanceGetFrameRateLimit(nvmlVgpuInstance_t vgpuInstance,
                                                unsigned int *frameRateLimit) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetFrameRateLimit,
-                         vgpuInstance, frameRateLimit);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetFrameRateLimit, vgpuInstance,
+                              frameRateLimit);
 }
 
 nvmlReturn_t nvmlVgpuInstanceGetLicenseStatus(nvmlVgpuInstance_t vgpuInstance,
                                               unsigned int *licensed) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetLicenseStatus,
-                         vgpuInstance, licensed);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetLicenseStatus, vgpuInstance,
+                              licensed);
 }
 
 nvmlReturn_t nvmlVgpuInstanceGetMetadata(nvmlVgpuInstance_t vgpuInstance,
                                          nvmlVgpuMetadata_t *vgpuMetadata,
                                          unsigned int *bufferSize) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetMetadata,
-                         vgpuInstance, vgpuMetadata, bufferSize);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetMetadata, vgpuInstance,
+                              vgpuMetadata, bufferSize);
 }
 
 nvmlReturn_t nvmlVgpuInstanceGetType(nvmlVgpuInstance_t vgpuInstance,
                                      nvmlVgpuTypeId_t *vgpuTypeId) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetType,
-                         vgpuInstance, vgpuTypeId);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetType, vgpuInstance,
+                              vgpuTypeId);
 }
 
-nvmlReturn_t nvmlVgpuInstanceGetUUID(nvmlVgpuInstance_t vgpuInstance,
-                                     char *uuid, unsigned int size) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetUUID,
-                         vgpuInstance, uuid, size);
+nvmlReturn_t nvmlVgpuInstanceGetUUID(nvmlVgpuInstance_t vgpuInstance, char *uuid,
+                                     unsigned int size) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetUUID, vgpuInstance, uuid,
+                              size);
 }
 
-nvmlReturn_t nvmlVgpuInstanceGetVmDriverVersion(nvmlVgpuInstance_t vgpuInstance,
-                                                char *version,
+nvmlReturn_t nvmlVgpuInstanceGetVmDriverVersion(nvmlVgpuInstance_t vgpuInstance, char *version,
                                                 unsigned int length) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetVmDriverVersion,
-                         vgpuInstance, version, length);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetVmDriverVersion, vgpuInstance,
+                              version, length);
 }
 
-nvmlReturn_t nvmlVgpuInstanceGetVmID(nvmlVgpuInstance_t vgpuInstance,
-                                     char *vmId, unsigned int size,
+nvmlReturn_t nvmlVgpuInstanceGetVmID(nvmlVgpuInstance_t vgpuInstance, char *vmId, unsigned int size,
                                      nvmlVgpuVmIdType_t *vmIdType) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetVmID,
-                         vgpuInstance, vmId, size, vmIdType);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetVmID, vgpuInstance, vmId, size,
+                              vmIdType);
 }
 
 nvmlReturn_t nvmlVgpuInstanceSetEncoderCapacity(nvmlVgpuInstance_t vgpuInstance,
                                                 unsigned int encoderCapacity) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceSetEncoderCapacity,
-                         vgpuInstance, encoderCapacity);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceSetEncoderCapacity, vgpuInstance,
+                              encoderCapacity);
 }
 
-nvmlReturn_t nvmlVgpuTypeGetClass(nvmlVgpuTypeId_t vgpuTypeId,
-                                  char *vgpuTypeClass, unsigned int *size) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetClass, vgpuTypeId,
-                         vgpuTypeClass, size);
+nvmlReturn_t nvmlVgpuTypeGetClass(nvmlVgpuTypeId_t vgpuTypeId, char *vgpuTypeClass,
+                                  unsigned int *size) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetClass, vgpuTypeId, vgpuTypeClass,
+                              size);
 }
 
-nvmlReturn_t nvmlVgpuTypeGetDeviceID(nvmlVgpuTypeId_t vgpuTypeId,
-                                     unsigned long long *deviceID,
+nvmlReturn_t nvmlVgpuTypeGetDeviceID(nvmlVgpuTypeId_t vgpuTypeId, unsigned long long *deviceID,
                                      unsigned long long *subsystemID) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetDeviceID,
-                         vgpuTypeId, deviceID, subsystemID);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetDeviceID, vgpuTypeId, deviceID,
+                              subsystemID);
 }
 
 nvmlReturn_t nvmlVgpuTypeGetFramebufferSize(nvmlVgpuTypeId_t vgpuTypeId,
                                             unsigned long long *fbSize) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetFramebufferSize,
-                         vgpuTypeId, fbSize);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetFramebufferSize, vgpuTypeId,
+                              fbSize);
 }
 
 nvmlReturn_t nvmlVgpuTypeGetFrameRateLimit(nvmlVgpuTypeId_t vgpuTypeId,
                                            unsigned int *frameRateLimit) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetFrameRateLimit,
-                         vgpuTypeId, frameRateLimit);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetFrameRateLimit, vgpuTypeId,
+                              frameRateLimit);
 }
 
-nvmlReturn_t nvmlVgpuTypeGetLicense(nvmlVgpuTypeId_t vgpuTypeId,
-                                    char *vgpuTypeLicenseString,
+nvmlReturn_t nvmlVgpuTypeGetLicense(nvmlVgpuTypeId_t vgpuTypeId, char *vgpuTypeLicenseString,
                                     unsigned int size) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetLicense, vgpuTypeId,
-                         vgpuTypeLicenseString, size);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetLicense, vgpuTypeId,
+                              vgpuTypeLicenseString, size);
 }
 
-nvmlReturn_t nvmlVgpuTypeGetMaxInstances(nvmlDevice_t device,
-                                         nvmlVgpuTypeId_t vgpuTypeId,
+nvmlReturn_t nvmlVgpuTypeGetMaxInstances(nvmlDevice_t device, nvmlVgpuTypeId_t vgpuTypeId,
                                          unsigned int *vgpuInstanceCount) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetMaxInstances,
-                         device, vgpuTypeId, vgpuInstanceCount);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetMaxInstances, device, vgpuTypeId,
+                              vgpuInstanceCount);
 }
 
-nvmlReturn_t nvmlVgpuTypeGetName(nvmlVgpuTypeId_t vgpuTypeId,
-                                 char *vgpuTypeName, unsigned int *size) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetName, vgpuTypeId,
-                         vgpuTypeName, size);
+nvmlReturn_t nvmlVgpuTypeGetName(nvmlVgpuTypeId_t vgpuTypeId, char *vgpuTypeName,
+                                 unsigned int *size) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetName, vgpuTypeId, vgpuTypeName,
+                              size);
 }
 
 nvmlReturn_t nvmlVgpuTypeGetNumDisplayHeads(nvmlVgpuTypeId_t vgpuTypeId,
                                             unsigned int *numDisplayHeads) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetNumDisplayHeads,
-                         vgpuTypeId, numDisplayHeads);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetNumDisplayHeads, vgpuTypeId,
+                              numDisplayHeads);
 }
 
-nvmlReturn_t nvmlVgpuTypeGetResolution(nvmlVgpuTypeId_t vgpuTypeId,
-                                       unsigned int displayIndex,
+nvmlReturn_t nvmlVgpuTypeGetResolution(nvmlVgpuTypeId_t vgpuTypeId, unsigned int displayIndex,
                                        unsigned int *xdim, unsigned int *ydim) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetResolution,
-                         vgpuTypeId, displayIndex, xdim, ydim);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetResolution, vgpuTypeId,
+                              displayIndex, xdim, ydim);
 }
 
-nvmlReturn_t nvmlDeviceGetFBCSessions(nvmlDevice_t device,
-                                      unsigned int *sessionCount,
+nvmlReturn_t nvmlDeviceGetFBCSessions(nvmlDevice_t device, unsigned int *sessionCount,
                                       nvmlFBCSessionInfo_t *sessionInfo) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFBCSessions, device,
-                         sessionCount, sessionInfo);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFBCSessions, device, sessionCount,
+                              sessionInfo);
 }
 
-nvmlReturn_t nvmlDeviceGetFBCStats(nvmlDevice_t device,
-                                   nvmlFBCStats_t *fbcStats) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFBCStats, device,
-                         fbcStats);
+nvmlReturn_t nvmlDeviceGetFBCStats(nvmlDevice_t device, nvmlFBCStats_t *fbcStats) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetFBCStats, device, fbcStats);
 }
 
 nvmlReturn_t nvmlDeviceGetGridLicensableFeatures_v2(
-    nvmlDevice_t device,
-    nvmlGridLicensableFeatures_t *pGridLicensableFeatures) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetGridLicensableFeatures_v2, device,
-                         pGridLicensableFeatures);
+    nvmlDevice_t device, nvmlGridLicensableFeatures_t *pGridLicensableFeatures) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGridLicensableFeatures_v2, device,
+                              pGridLicensableFeatures);
 }
 
-nvmlReturn_t nvmlDeviceGetRetiredPages_v2(nvmlDevice_t device,
-                                          nvmlPageRetirementCause_t cause,
-                                          unsigned int *pageCount,
-                                          unsigned long long *addresses) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRetiredPages_v2,
-                         device, cause, pageCount, addresses);
+nvmlReturn_t nvmlDeviceGetRetiredPages_v2(nvmlDevice_t device, nvmlPageRetirementCause_t cause,
+                                          unsigned int *pageCount, unsigned long long *addresses) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRetiredPages_v2, device, cause,
+                              pageCount, addresses);
 }
 
 nvmlReturn_t nvmlDeviceResetGpuLockedClocks(nvmlDevice_t device) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceResetGpuLockedClocks,
-                         device);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceResetGpuLockedClocks, device);
 }
 
-nvmlReturn_t nvmlDeviceSetGpuLockedClocks(nvmlDevice_t device,
-                                          unsigned int minGpuClockMHz,
+nvmlReturn_t nvmlDeviceSetGpuLockedClocks(nvmlDevice_t device, unsigned int minGpuClockMHz,
                                           unsigned int maxGpuClockMHz) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetGpuLockedClocks,
-                         device, minGpuClockMHz, maxGpuClockMHz);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetGpuLockedClocks, device,
+                              minGpuClockMHz, maxGpuClockMHz);
 }
 
 nvmlReturn_t nvmlVgpuInstanceGetAccountingMode(nvmlVgpuInstance_t vgpuInstance,
                                                nvmlEnableState_t *mode) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetAccountingMode,
-                         vgpuInstance, mode);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetAccountingMode, vgpuInstance,
+                              mode);
 }
 
-nvmlReturn_t nvmlVgpuInstanceGetAccountingPids(nvmlVgpuInstance_t vgpuInstance,
-                                               unsigned int *count,
+nvmlReturn_t nvmlVgpuInstanceGetAccountingPids(nvmlVgpuInstance_t vgpuInstance, unsigned int *count,
                                                unsigned int *pids) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetAccountingPids,
-                         vgpuInstance, count, pids);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetAccountingPids, vgpuInstance,
+                              count, pids);
 }
 
-nvmlReturn_t nvmlVgpuInstanceGetAccountingStats(nvmlVgpuInstance_t vgpuInstance,
-                                                unsigned int pid,
+nvmlReturn_t nvmlVgpuInstanceGetAccountingStats(nvmlVgpuInstance_t vgpuInstance, unsigned int pid,
                                                 nvmlAccountingStats_t *stats) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetAccountingStats,
-                         vgpuInstance, pid, stats);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetAccountingStats, vgpuInstance,
+                              pid, stats);
 }
 
 nvmlReturn_t nvmlVgpuInstanceGetFBCSessions(nvmlVgpuInstance_t vgpuInstance,
                                             unsigned int *sessionCount,
                                             nvmlFBCSessionInfo_t *sessionInfo) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetFBCSessions,
-                         vgpuInstance, sessionCount, sessionInfo);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetFBCSessions, vgpuInstance,
+                              sessionCount, sessionInfo);
 }
 
 nvmlReturn_t nvmlVgpuInstanceGetFBCStats(nvmlVgpuInstance_t vgpuInstance,
                                          nvmlFBCStats_t *fbcStats) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetFBCStats,
-                         vgpuInstance, fbcStats);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetFBCStats, vgpuInstance,
+                              fbcStats);
 }
 
-nvmlReturn_t
-nvmlVgpuTypeGetMaxInstancesPerVm(nvmlVgpuTypeId_t vgpuTypeId,
-                                 unsigned int *vgpuInstanceCountPerVm) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetMaxInstancesPerVm,
-                         vgpuTypeId, vgpuInstanceCountPerVm);
+nvmlReturn_t nvmlVgpuTypeGetMaxInstancesPerVm(nvmlVgpuTypeId_t vgpuTypeId,
+                                              unsigned int *vgpuInstanceCountPerVm) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetMaxInstancesPerVm, vgpuTypeId,
+                              vgpuInstanceCountPerVm);
 }
 
-nvmlReturn_t nvmlGetVgpuVersion(nvmlVgpuVersion_t *supported,
-                                nvmlVgpuVersion_t *current) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGetVgpuVersion, supported,
-                         current);
+nvmlReturn_t nvmlGetVgpuVersion(nvmlVgpuVersion_t *supported, nvmlVgpuVersion_t *current) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGetVgpuVersion, supported, current);
 }
 
 nvmlReturn_t nvmlSetVgpuVersion(nvmlVgpuVersion_t *vgpuVersion) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSetVgpuVersion, vgpuVersion);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlSetVgpuVersion, vgpuVersion);
 }
 
 nvmlReturn_t nvmlDeviceGetGridLicensableFeatures_v3(
-    nvmlDevice_t device,
-    nvmlGridLicensableFeatures_t *pGridLicensableFeatures) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetGridLicensableFeatures_v3, device,
-                         pGridLicensableFeatures);
+    nvmlDevice_t device, nvmlGridLicensableFeatures_t *pGridLicensableFeatures) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGridLicensableFeatures_v3, device,
+                              pGridLicensableFeatures);
 }
 
-nvmlReturn_t nvmlDeviceGetHostVgpuMode(nvmlDevice_t device,
-                                       nvmlHostVgpuMode_t *pHostVgpuMode) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetHostVgpuMode, device,
-                         pHostVgpuMode);
+nvmlReturn_t nvmlDeviceGetHostVgpuMode(nvmlDevice_t device, nvmlHostVgpuMode_t *pHostVgpuMode) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetHostVgpuMode, device, pHostVgpuMode);
 }
 
-nvmlReturn_t nvmlDeviceGetPgpuMetadataString(nvmlDevice_t device,
-                                             char *pgpuMetadata,
+nvmlReturn_t nvmlDeviceGetPgpuMetadataString(nvmlDevice_t device, char *pgpuMetadata,
                                              unsigned int *bufferSize) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPgpuMetadataString,
-                         device, pgpuMetadata, bufferSize);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetPgpuMetadataString, device,
+                              pgpuMetadata, bufferSize);
 }
 
 nvmlReturn_t nvmlVgpuInstanceGetEccMode(nvmlVgpuInstance_t vgpuInstance,
                                         nvmlEnableState_t *eccMode) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetEccMode,
-                         vgpuInstance, eccMode);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetEccMode, vgpuInstance,
+                              eccMode);
 }
 
 nvmlReturn_t nvmlComputeInstanceDestroy(nvmlComputeInstance_t computeInstance) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlComputeInstanceDestroy,
-                         computeInstance);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlComputeInstanceDestroy, computeInstance);
 }
 
 nvmlReturn_t nvmlComputeInstanceGetInfo(nvmlComputeInstance_t computeInstance,
                                         nvmlComputeInstanceInfo_t *info) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlComputeInstanceGetInfo,
-                         computeInstance, info);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlComputeInstanceGetInfo, computeInstance,
+                              info);
 }
 
-nvmlReturn_t nvmlDeviceCreateGpuInstance(nvmlDevice_t device,
-                                         unsigned int profileId,
+nvmlReturn_t nvmlDeviceCreateGpuInstance(nvmlDevice_t device, unsigned int profileId,
                                          nvmlGpuInstance_t *gpuInstance) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceCreateGpuInstance,
-                         device, profileId, gpuInstance);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceCreateGpuInstance, device, profileId,
+                              gpuInstance);
 }
 
-nvmlReturn_t nvmlDeviceGetArchitecture(nvmlDevice_t device,
-                                       nvmlDeviceArchitecture_t *arch) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetArchitecture, device,
-                         arch);
+nvmlReturn_t nvmlDeviceGetArchitecture(nvmlDevice_t device, nvmlDeviceArchitecture_t *arch) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetArchitecture, device, arch);
 }
 
-nvmlReturn_t nvmlDeviceGetAttributes(nvmlDevice_t device,
-                                     nvmlDeviceAttributes_t *attributes) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAttributes, device,
-                         attributes);
+nvmlReturn_t nvmlDeviceGetAttributes(nvmlDevice_t device, nvmlDeviceAttributes_t *attributes) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAttributes, device, attributes);
 }
 
-nvmlReturn_t nvmlDeviceGetAttributes_v2(nvmlDevice_t device,
-                                        nvmlDeviceAttributes_t *attributes) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAttributes_v2, device,
-                         attributes);
+nvmlReturn_t nvmlDeviceGetAttributes_v2(nvmlDevice_t device, nvmlDeviceAttributes_t *attributes) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetAttributes_v2, device, attributes);
 }
 
-nvmlReturn_t nvmlDeviceGetComputeInstanceId(nvmlDevice_t device,
-                                            unsigned int *id) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetComputeInstanceId,
-                         device, id);
+nvmlReturn_t nvmlDeviceGetComputeInstanceId(nvmlDevice_t device, unsigned int *id) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetComputeInstanceId, device, id);
 }
 
-nvmlReturn_t nvmlDeviceGetCpuAffinityWithinScope(nvmlDevice_t device,
-                                                 unsigned int cpuSetSize,
-                                                 unsigned long *cpuSet,
-                                                 nvmlAffinityScope_t scope) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetCpuAffinityWithinScope, device,
-                         cpuSetSize, cpuSet, scope);
+nvmlReturn_t nvmlDeviceGetCpuAffinityWithinScope(nvmlDevice_t device, unsigned int cpuSetSize,
+                                                 unsigned long *cpuSet, nvmlAffinityScope_t scope) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetCpuAffinityWithinScope, device,
+                              cpuSetSize, cpuSet, scope);
 }
 
-nvmlReturn_t
-nvmlDeviceGetDeviceHandleFromMigDeviceHandle(nvmlDevice_t migDevice,
-                                             nvmlDevice_t *device) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetDeviceHandleFromMigDeviceHandle,
-                         migDevice, device);
+nvmlReturn_t nvmlDeviceGetDeviceHandleFromMigDeviceHandle(nvmlDevice_t migDevice,
+                                                          nvmlDevice_t *device) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetDeviceHandleFromMigDeviceHandle,
+                              migDevice, device);
 }
 
 nvmlReturn_t nvmlDeviceGetGpuInstanceById(nvmlDevice_t device, unsigned int id,
                                           nvmlGpuInstance_t *gpuInstance) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstanceById,
-                         device, id, gpuInstance);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstanceById, device, id,
+                              gpuInstance);
 }
 
 nvmlReturn_t nvmlDeviceGetGpuInstanceId(nvmlDevice_t device, unsigned int *id) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstanceId, device,
-                         id);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstanceId, device, id);
 }
 
-nvmlReturn_t nvmlDeviceGetGpuInstancePossiblePlacements(
-    nvmlDevice_t device, unsigned int profileId,
-    nvmlGpuInstancePlacement_t *placements, unsigned int *count) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetGpuInstancePossiblePlacements, device,
-                         profileId, placements, count);
+nvmlReturn_t nvmlDeviceGetGpuInstancePossiblePlacements(nvmlDevice_t device, unsigned int profileId,
+                                                        nvmlGpuInstancePlacement_t *placements,
+                                                        unsigned int *count) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstancePossiblePlacements,
+                              device, profileId, placements, count);
 }
 
-nvmlReturn_t
-nvmlDeviceGetGpuInstanceProfileInfo(nvmlDevice_t device, unsigned int profile,
-                                    nvmlGpuInstanceProfileInfo_t *info) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetGpuInstanceProfileInfo, device, profile,
-                         info);
+nvmlReturn_t nvmlDeviceGetGpuInstanceProfileInfo(nvmlDevice_t device, unsigned int profile,
+                                                 nvmlGpuInstanceProfileInfo_t *info) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstanceProfileInfo, device,
+                              profile, info);
 }
 
-nvmlReturn_t nvmlDeviceGetGpuInstanceRemainingCapacity(nvmlDevice_t device,
-                                                       unsigned int profileId,
+nvmlReturn_t nvmlDeviceGetGpuInstanceRemainingCapacity(nvmlDevice_t device, unsigned int profileId,
                                                        unsigned int *count) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetGpuInstanceRemainingCapacity, device,
-                         profileId, count);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstanceRemainingCapacity, device,
+                              profileId, count);
 }
 
-nvmlReturn_t nvmlDeviceGetGpuInstances(nvmlDevice_t device,
-                                       unsigned int profileId,
-                                       nvmlGpuInstance_t *gpuInstances,
-                                       unsigned int *count) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstances, device,
-                         profileId, gpuInstances, count);
+nvmlReturn_t nvmlDeviceGetGpuInstances(nvmlDevice_t device, unsigned int profileId,
+                                       nvmlGpuInstance_t *gpuInstances, unsigned int *count) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGpuInstances, device, profileId,
+                              gpuInstances, count);
 }
 
-nvmlReturn_t nvmlDeviceGetMaxMigDeviceCount(nvmlDevice_t device,
-                                            unsigned int *count) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxMigDeviceCount,
-                         device, count);
+nvmlReturn_t nvmlDeviceGetMaxMigDeviceCount(nvmlDevice_t device, unsigned int *count) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMaxMigDeviceCount, device, count);
 }
 
-nvmlReturn_t nvmlDeviceGetMemoryAffinity(nvmlDevice_t device,
-                                         unsigned int nodeSetSize,
-                                         unsigned long *nodeSet,
-                                         nvmlAffinityScope_t scope) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMemoryAffinity,
-                         device, nodeSetSize, nodeSet, scope);
+nvmlReturn_t nvmlDeviceGetMemoryAffinity(nvmlDevice_t device, unsigned int nodeSetSize,
+                                         unsigned long *nodeSet, nvmlAffinityScope_t scope) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMemoryAffinity, device, nodeSetSize,
+                              nodeSet, scope);
 }
 
-nvmlReturn_t nvmlDeviceGetMigDeviceHandleByIndex(nvmlDevice_t device,
-                                                 unsigned int index,
+nvmlReturn_t nvmlDeviceGetMigDeviceHandleByIndex(nvmlDevice_t device, unsigned int index,
                                                  nvmlDevice_t *migDevice) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetMigDeviceHandleByIndex, device, index,
-                         migDevice);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMigDeviceHandleByIndex, device,
+                              index, migDevice);
 }
 
-nvmlReturn_t nvmlDeviceGetMigMode(nvmlDevice_t device,
-                                  unsigned int *currentMode,
+nvmlReturn_t nvmlDeviceGetMigMode(nvmlDevice_t device, unsigned int *currentMode,
                                   unsigned int *pendingMode) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMigMode, device,
-                         currentMode, pendingMode);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetMigMode, device, currentMode,
+                              pendingMode);
 }
 
-nvmlReturn_t nvmlDeviceGetRemappedRows(nvmlDevice_t device,
-                                       unsigned int *corrRows,
-                                       unsigned int *uncRows,
-                                       unsigned int *isPending,
+nvmlReturn_t nvmlDeviceGetRemappedRows(nvmlDevice_t device, unsigned int *corrRows,
+                                       unsigned int *uncRows, unsigned int *isPending,
                                        unsigned int *failureOccurred) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRemappedRows, device,
-                         corrRows, uncRows, isPending, failureOccurred);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRemappedRows, device, corrRows,
+                              uncRows, isPending, failureOccurred);
 }
 
-nvmlReturn_t
-nvmlDeviceGetRowRemapperHistogram(nvmlDevice_t device,
-                                  nvmlRowRemapperHistogramValues_t *values) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRowRemapperHistogram,
-                         device, values);
+nvmlReturn_t nvmlDeviceGetRowRemapperHistogram(nvmlDevice_t device,
+                                               nvmlRowRemapperHistogramValues_t *values) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetRowRemapperHistogram, device,
+                              values);
 }
 
-nvmlReturn_t nvmlDeviceIsMigDeviceHandle(nvmlDevice_t device,
-                                         unsigned int *isMigDevice) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceIsMigDeviceHandle,
-                         device, isMigDevice);
+nvmlReturn_t nvmlDeviceIsMigDeviceHandle(nvmlDevice_t device, unsigned int *isMigDevice) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceIsMigDeviceHandle, device, isMigDevice);
 }
 
 nvmlReturn_t nvmlDeviceSetMigMode(nvmlDevice_t device, unsigned int mode,
                                   nvmlReturn_t *activationStatus) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetMigMode, device, mode,
-                         activationStatus);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetMigMode, device, mode,
+                              activationStatus);
 }
 
 nvmlReturn_t nvmlEventSetWait_v2(nvmlEventSet_t set, nvmlEventData_t *data,
                                  unsigned int timeoutms) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlEventSetWait_v2, set, data,
-                         timeoutms);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlEventSetWait_v2, set, data, timeoutms);
 }
 
-nvmlReturn_t
-nvmlGpuInstanceCreateComputeInstance(nvmlGpuInstance_t gpuInstance,
-                                     unsigned int profileId,
-                                     nvmlComputeInstance_t *computeInstance) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlGpuInstanceCreateComputeInstance, gpuInstance,
-                         profileId, computeInstance);
+nvmlReturn_t nvmlGpuInstanceCreateComputeInstance(nvmlGpuInstance_t gpuInstance,
+                                                  unsigned int profileId,
+                                                  nvmlComputeInstance_t *computeInstance) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceCreateComputeInstance, gpuInstance,
+                              profileId, computeInstance);
 }
 
 nvmlReturn_t nvmlGpuInstanceDestroy(nvmlGpuInstance_t gpuInstance) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceDestroy,
-                         gpuInstance);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceDestroy, gpuInstance);
 }
 
-nvmlReturn_t
-nvmlGpuInstanceGetComputeInstanceById(nvmlGpuInstance_t gpuInstance,
-                                      unsigned int id,
-                                      nvmlComputeInstance_t *computeInstance) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlGpuInstanceGetComputeInstanceById, gpuInstance, id,
-                         computeInstance);
+nvmlReturn_t nvmlGpuInstanceGetComputeInstanceById(nvmlGpuInstance_t gpuInstance, unsigned int id,
+                                                   nvmlComputeInstance_t *computeInstance) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceGetComputeInstanceById,
+                              gpuInstance, id, computeInstance);
 }
 
-nvmlReturn_t nvmlGpuInstanceGetComputeInstanceProfileInfo(
-    nvmlGpuInstance_t gpuInstance, unsigned int profile,
-    unsigned int engProfile, nvmlComputeInstanceProfileInfo_t *info) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlGpuInstanceGetComputeInstanceProfileInfo,
-                         gpuInstance, profile, engProfile, info);
+nvmlReturn_t nvmlGpuInstanceGetComputeInstanceProfileInfo(nvmlGpuInstance_t gpuInstance,
+                                                          unsigned int profile,
+                                                          unsigned int engProfile,
+                                                          nvmlComputeInstanceProfileInfo_t *info) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceGetComputeInstanceProfileInfo,
+                              gpuInstance, profile, engProfile, info);
 }
 
-nvmlReturn_t nvmlGpuInstanceGetComputeInstanceRemainingCapacity(
-    nvmlGpuInstance_t gpuInstance, unsigned int profileId,
-    unsigned int *count) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlGpuInstanceGetComputeInstanceRemainingCapacity,
-                         gpuInstance, profileId, count);
+nvmlReturn_t nvmlGpuInstanceGetComputeInstanceRemainingCapacity(nvmlGpuInstance_t gpuInstance,
+                                                                unsigned int profileId,
+                                                                unsigned int *count) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry,
+                              nvmlGpuInstanceGetComputeInstanceRemainingCapacity, gpuInstance,
+                              profileId, count);
 }
 
-nvmlReturn_t nvmlGpuInstanceGetComputeInstances(
-    nvmlGpuInstance_t gpuInstance, unsigned int profileId,
-    nvmlComputeInstance_t *computeInstances, unsigned int *count) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceGetComputeInstances,
-                         gpuInstance, profileId, computeInstances, count);
+nvmlReturn_t nvmlGpuInstanceGetComputeInstances(nvmlGpuInstance_t gpuInstance,
+                                                unsigned int profileId,
+                                                nvmlComputeInstance_t *computeInstances,
+                                                unsigned int *count) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceGetComputeInstances, gpuInstance,
+                              profileId, computeInstances, count);
 }
 
-nvmlReturn_t nvmlGpuInstanceGetInfo(nvmlGpuInstance_t gpuInstance,
-                                    nvmlGpuInstanceInfo_t *info) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceGetInfo,
-                         gpuInstance, info);
+nvmlReturn_t nvmlGpuInstanceGetInfo(nvmlGpuInstance_t gpuInstance, nvmlGpuInstanceInfo_t *info) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlGpuInstanceGetInfo, gpuInstance, info);
 }
 
-nvmlReturn_t
-nvmlVgpuInstanceClearAccountingPids(nvmlVgpuInstance_t vgpuInstance) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlVgpuInstanceClearAccountingPids, vgpuInstance);
+nvmlReturn_t nvmlVgpuInstanceClearAccountingPids(nvmlVgpuInstance_t vgpuInstance) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceClearAccountingPids,
+                              vgpuInstance);
 }
 
-nvmlReturn_t nvmlVgpuInstanceGetMdevUUID(nvmlVgpuInstance_t vgpuInstance,
-                                         char *mdevUuid, unsigned int size) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetMdevUUID,
-                         vgpuInstance, mdevUuid, size);
+nvmlReturn_t nvmlVgpuInstanceGetMdevUUID(nvmlVgpuInstance_t vgpuInstance, char *mdevUuid,
+                                         unsigned int size) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetMdevUUID, vgpuInstance,
+                              mdevUuid, size);
 }
 
-nvmlReturn_t
-nvmlComputeInstanceGetInfo_v2(nvmlComputeInstance_t computeInstance,
-                              nvmlComputeInstanceInfo_t *info) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlComputeInstanceGetInfo_v2,
-                         computeInstance, info);
+nvmlReturn_t nvmlComputeInstanceGetInfo_v2(nvmlComputeInstance_t computeInstance,
+                                           nvmlComputeInstanceInfo_t *info) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlComputeInstanceGetInfo_v2, computeInstance,
+                              info);
 }
 
-nvmlReturn_t nvmlDeviceGetComputeRunningProcesses_v2(nvmlDevice_t device,
-                                                     unsigned int *infoCount,
+nvmlReturn_t nvmlDeviceGetComputeRunningProcesses_v2(nvmlDevice_t device, unsigned int *infoCount,
                                                      nvmlProcessInfo_t *infos) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetComputeRunningProcesses_v2, device,
-                         infoCount, infos);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetComputeRunningProcesses_v2, device,
+                              infoCount, infos);
 }
-nvmlReturn_t nvmlDeviceGetGraphicsRunningProcesses_v2(
-    nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_t *infos) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlDeviceGetGraphicsRunningProcesses_v2, device,
-                         infoCount, infos);
+nvmlReturn_t nvmlDeviceGetGraphicsRunningProcesses_v2(nvmlDevice_t device, unsigned int *infoCount,
+                                                      nvmlProcessInfo_t *infos) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceGetGraphicsRunningProcesses_v2, device,
+                              infoCount, infos);
 }
-nvmlReturn_t nvmlDeviceSetTemperatureThreshold(
-    nvmlDevice_t device, nvmlTemperatureThresholds_t thresholdType, int *temp) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetTemperatureThreshold,
-                         device, thresholdType, temp);
+nvmlReturn_t nvmlDeviceSetTemperatureThreshold(nvmlDevice_t device,
+                                               nvmlTemperatureThresholds_t thresholdType,
+                                               int *temp) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlDeviceSetTemperatureThreshold, device,
+                              thresholdType, temp);
 }
 
 /** no prototype
@@ -1555,13 +1303,11 @@ nvmlReturn_t nvmlRetry_NvRmControl() {}
 
 nvmlReturn_t nvmlVgpuInstanceGetGpuInstanceId(nvmlVgpuInstance_t vgpuInstance,
                                               unsigned int *gpuInstanceId) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetGpuInstanceId,
-                         vgpuInstance, gpuInstanceId);
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuInstanceGetGpuInstanceId, vgpuInstance,
+                              gpuInstanceId);
 }
-nvmlReturn_t
-nvmlVgpuTypeGetGpuInstanceProfileId(nvmlVgpuTypeId_t vgpuTypeId,
-                                    unsigned int *gpuInstanceProfileId) {
-  return NVML_OVERRIDE_CALL(nvml_library_entry,
-                         nvmlVgpuTypeGetGpuInstanceProfileId, vgpuTypeId,
-                         gpuInstanceProfileId);
+nvmlReturn_t nvmlVgpuTypeGetGpuInstanceProfileId(nvmlVgpuTypeId_t vgpuTypeId,
+                                                 unsigned int *gpuInstanceProfileId) {
+    return NVML_OVERRIDE_CALL(nvml_library_entry, nvmlVgpuTypeGetGpuInstanceProfileId, vgpuTypeId,
+                              gpuInstanceProfileId);
 }
\ No newline at end of file
diff --git a/src/utils.c b/src/utils.c
old mode 100755
new mode 100644
index 449fa714..2eedbb8b
--- a/src/utils.c
+++ b/src/utils.c
@@ -1,17 +1,19 @@
+#include "include/utils.h"
+
+#include <ctype.h>
+#include <dirent.h>
+#include <nvml.h>
 #include <stdio.h>
 #include <string.h>
-#include <dirent.h>
-#include <ctype.h>
 #include <time.h>
-#include "include/utils.h"
+
+#include "include/libcuda_hook.h"
 #include "include/log_utils.h"
-#include "include/nvml_prefix.h"
-#include <nvml.h>
 #include "include/nvml_override.h"
-#include "include/libcuda_hook.h"
+#include "include/nvml_prefix.h"
 #include "multiprocess/multiprocess_memory_limit.h"
 
-const char* unified_lock="/tmp/vgpulock/lock";
+const char *unified_lock = "/tmp/vgpulock/lock";
 static int lock_fd = -1;
 extern size_t context_size;
 extern int cuda_to_nvml_map_array[CUDA_DEVICE_MAX_COUNT];
@@ -48,20 +50,21 @@ int try_unlock_unified_lock() {
     return res == 0 ? 0 : -1;
 }
 
-int mergepid(unsigned int *prev, unsigned int *current, nvmlProcessInfo_t1 *sub, nvmlProcessInfo_t1 *merged) {
-    int i,j;
-    int found=0;
-    for (i=0;i<*prev;i++){
-        found=0;
-        for (j=0;j<*current;j++) {
-            LOG_INFO("merge pid=%d",sub[i].pid);
+int mergepid(unsigned int *prev, unsigned int *current, nvmlProcessInfo_t1 *sub,
+             nvmlProcessInfo_t1 *merged) {
+    int i, j;
+    int found = 0;
+    for (i = 0; i < *prev; i++) {
+        found = 0;
+        for (j = 0; j < *current; j++) {
+            LOG_INFO("merge pid=%d", sub[i].pid);
             if (sub[i].pid == merged[j].pid) {
                 found = 1;
                 break;
-            } 
+            }
         }
         if (!found) {
-            LOG_DEBUG("merged pid=%d\n",sub[i].pid);
+            LOG_DEBUG("merged pid=%d\n", sub[i].pid);
             merged[*current].pid = sub[i].pid;
             (*current)++;
         }
@@ -69,33 +72,32 @@ int mergepid(unsigned int *prev, unsigned int *current, nvmlProcessInfo_t1 *sub,
     return 0;
 }
 
-int getextrapid(unsigned int prev, unsigned int current, nvmlProcessInfo_t1 *pre_pids_on_device, nvmlProcessInfo_t1 *pids_on_device) {
-    int i,j;
+int getextrapid(unsigned int prev, unsigned int current, nvmlProcessInfo_t1 *pre_pids_on_device,
+                nvmlProcessInfo_t1 *pids_on_device) {
+    int i, j;
     int found = 0;
-    for (i=0; i<prev; i++){
-        LOG_INFO("prev pids[%d]=%d",i,pre_pids_on_device[i].pid);
+    for (i = 0; i < prev; i++) {
+        LOG_INFO("prev pids[%d]=%d", i, pre_pids_on_device[i].pid);
     }
-    for (i=0; i< current; i++) {
-        LOG_INFO("current pids[%d]=%d",i,pids_on_device[i].pid);
+    for (i = 0; i < current; i++) {
+        LOG_INFO("current pids[%d]=%d", i, pids_on_device[i].pid);
     }
-    if (current-prev<=0)
-        return 0;
-    for (i=0; i<current; i++) {
+    if (current - prev <= 0) return 0;
+    for (i = 0; i < current; i++) {
         found = 0;
-        for (j=0; j<prev; j++) {
+        for (j = 0; j < prev; j++) {
             if (pids_on_device[i].pid == pre_pids_on_device[j].pid) {
                 found = 1;
                 break;
             }
         }
-        if (!found)
-            return pids_on_device[i].pid;
+        if (!found) return pids_on_device[i].pid;
     }
     return 0;
 }
 
 nvmlReturn_t set_task_pid() {
-    unsigned int running_processes=0,previous=0,merged_num=0;
+    unsigned int running_processes = 0, previous = 0, merged_num = 0;
     nvmlProcessInfo_v1_t tmp_pids_on_device[SHARED_REGION_MAX_PROCESS_NUM];
     nvmlProcessInfo_t1 pre_pids_on_device[SHARED_REGION_MAX_PROCESS_NUM];
     nvmlProcessInfo_t1 pids_on_device[SHARED_REGION_MAX_PROCESS_NUM];
@@ -105,70 +107,74 @@ nvmlReturn_t set_task_pid() {
     int i;
     CHECK_NVML_API(nvmlInit());
     CHECK_NVML_API(nvmlDeviceGetHandleByIndex(0, &device));
-    
+
     unsigned int nvmlCounts;
     CHECK_NVML_API(nvmlDeviceGetCount(&nvmlCounts));
-    
+
     int cudaDev;
-    for (i=0;i<nvmlCounts;i++){
-        cudaDev=nvml_to_cuda_map(i);
-        if (cudaDev<0) {
+    for (i = 0; i < nvmlCounts; i++) {
+        cudaDev = nvml_to_cuda_map(i);
+        if (cudaDev < 0) {
             continue;
         }
         CHECK_NVML_API(nvmlDeviceGetHandleByIndex(i, &device));
-        do{
+        do {
             res = nvmlDeviceGetComputeRunningProcesses(device, &previous, tmp_pids_on_device);
             if ((res != NVML_SUCCESS) && (res != NVML_ERROR_INSUFFICIENT_SIZE)) {
-                LOG_ERROR("Device2GetComputeRunningProcesses failed %d,%d\n",res,i);
+                LOG_ERROR("Device2GetComputeRunningProcesses failed %d,%d\n", res, i);
                 return res;
             }
-        }while(res==NVML_ERROR_INSUFFICIENT_SIZE); 
-        mergepid(&previous,&merged_num,(nvmlProcessInfo_t1 *)tmp_pids_on_device,pre_pids_on_device);
+        } while (res == NVML_ERROR_INSUFFICIENT_SIZE);
+        mergepid(&previous, &merged_num, (nvmlProcessInfo_t1 *)tmp_pids_on_device,
+                 pre_pids_on_device);
         break;
     }
     previous = merged_num;
     merged_num = 0;
-    memset(tmp_pids_on_device,0,sizeof(nvmlProcessInfo_v1_t)*SHARED_REGION_MAX_PROCESS_NUM);
-    CHECK_CU_RESULT(cuDevicePrimaryCtxRetain(&pctx,0));
-    for (i=0;i<nvmlCounts;i++) {
-        cudaDev=nvml_to_cuda_map(i);
-        if (cudaDev<0) {
+    memset(tmp_pids_on_device, 0, sizeof(nvmlProcessInfo_v1_t) * SHARED_REGION_MAX_PROCESS_NUM);
+    CHECK_CU_RESULT(cuDevicePrimaryCtxRetain(&pctx, 0));
+    for (i = 0; i < nvmlCounts; i++) {
+        cudaDev = nvml_to_cuda_map(i);
+        if (cudaDev < 0) {
             continue;
         }
-        CHECK_NVML_API(nvmlDeviceGetHandleByIndex (i, &device)); 
-        do{
-            res = nvmlDeviceGetComputeRunningProcesses(device, &running_processes, tmp_pids_on_device);
+        CHECK_NVML_API(nvmlDeviceGetHandleByIndex(i, &device));
+        do {
+            res = nvmlDeviceGetComputeRunningProcesses(device, &running_processes,
+                                                       tmp_pids_on_device);
             if ((res != NVML_SUCCESS) && (res != NVML_ERROR_INSUFFICIENT_SIZE)) {
-                LOG_ERROR("Device2GetComputeRunningProcesses failed %d\n",res);
+                LOG_ERROR("Device2GetComputeRunningProcesses failed %d\n", res);
                 return res;
             }
-        }while(res == NVML_ERROR_INSUFFICIENT_SIZE);
-        mergepid(&running_processes,&merged_num,(nvmlProcessInfo_t1 *)tmp_pids_on_device,pids_on_device);
+        } while (res == NVML_ERROR_INSUFFICIENT_SIZE);
+        mergepid(&running_processes, &merged_num, (nvmlProcessInfo_t1 *)tmp_pids_on_device,
+                 pids_on_device);
         break;
     }
     running_processes = merged_num;
-    LOG_INFO("current processes num = %u %u",previous,running_processes);
-    for (i=0;i<merged_num;i++){
-        LOG_INFO("current pid in use is %d %d",i,pids_on_device[i].pid);
-        //tmp_pids_on_device[i].pid=0;
-    }
-    unsigned int hostpid = getextrapid(previous,running_processes,pre_pids_on_device,pids_on_device); 
-    if (hostpid==0) {
+    LOG_INFO("current processes num = %u %u", previous, running_processes);
+    for (i = 0; i < merged_num; i++) {
+        LOG_INFO("current pid in use is %d %d", i, pids_on_device[i].pid);
+        // tmp_pids_on_device[i].pid=0;
+    }
+    unsigned int hostpid =
+        getextrapid(previous, running_processes, pre_pids_on_device, pids_on_device);
+    if (hostpid == 0) {
         LOG_ERROR("host pid is error!");
         return NVML_ERROR_DRIVER_NOT_LOADED;
     }
-    LOG_INFO("hostPid=%d",hostpid);
-    if (set_host_pid(hostpid)==0) {
-        for (i=0;i<running_processes;i++) {
-            if (pids_on_device[i].pid==hostpid) {
-                LOG_INFO("Primary Context Size==%lld",tmp_pids_on_device[i].usedGpuMemory);
-                context_size = tmp_pids_on_device[i].usedGpuMemory; 
+    LOG_INFO("hostPid=%d", hostpid);
+    if (set_host_pid(hostpid) == 0) {
+        for (i = 0; i < running_processes; i++) {
+            if (pids_on_device[i].pid == hostpid) {
+                LOG_INFO("Primary Context Size==%lld", tmp_pids_on_device[i].usedGpuMemory);
+                context_size = tmp_pids_on_device[i].usedGpuMemory;
                 break;
             }
         }
     }
     CHECK_CU_RESULT(cuDevicePrimaryCtxRelease(0));
-    return NVML_SUCCESS; 
+    return NVML_SUCCESS;
 }
 
 int parse_cuda_visible_env() {
@@ -181,14 +187,14 @@ int parse_cuda_visible_env() {
     if (need_cuda_virtualize()) {
         for (int i = 0; i < strlen(s); i++) {
             if ((s[i] == ',') || (i == 0)) {
-                int tmp = (i==0) ? atoi(s) : atoi(s + i +1);
-                cuda_to_nvml_map_array[count] = tmp; 
+                int tmp = (i == 0) ? atoi(s) : atoi(s + i + 1);
+                cuda_to_nvml_map_array[count] = tmp;
                 count++;
             }
-        } 
+        }
     }
     for (int i = 0; i < CUDA_DEVICE_MAX_COUNT; i++) {
-        LOG_INFO("device %d -> %d",i,cuda_to_nvml_map(i));
+        LOG_INFO("device %d -> %d", i, cuda_to_nvml_map(i));
     }
     LOG_INFO("get default cuda from %s", getenv("CUDA_VISIBLE_DEVICES"));
     return count;
@@ -201,31 +207,30 @@ int map_cuda_visible_devices() {
 
 int getenvcount() {
     char *s = getenv("CUDA_VISIBLE_DEVICES");
-    if ((s == NULL) || (strlen(s)==0)){
+    if ((s == NULL) || (strlen(s) == 0)) {
         return -1;
     }
-    LOG_DEBUG("get from env %s",s);
-    int i,count=0;
-    for (i=0;i<strlen(s);i++){
-        if (s[i]==',')
-            count++;
+    LOG_DEBUG("get from env %s", s);
+    int i, count = 0;
+    for (i = 0; i < strlen(s); i++) {
+        if (s[i] == ',') count++;
     }
-    return count+1;
+    return count + 1;
 }
 
 int need_cuda_virtualize() {
     int count1 = -1;
     char *s = getenv("CUDA_VISIBLE_DEVICES");
-    if ((s == NULL) || (strlen(s)==0)){
+    if ((s == NULL) || (strlen(s) == 0)) {
         return 0;
     }
     int fromenv = getenvcount();
-    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceGetCount,&count1);
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetCount, &count1);
     if (res != CUDA_SUCCESS) {
         return 1;
     }
-    LOG_DEBUG("count1=%d",count1);
-    if (fromenv ==count1) {
+    LOG_DEBUG("count1=%d", count1);
+    if (fromenv == count1) {
         return 1;
     }
     return 0;