DSFans2014 · DSFans2014 · May 29, 2026 · May 29, 2026
diff --git a/.github/workflows/style.yaml b/.github/workflows/style.yaml
@@ -6,11 +6,12 @@ on:
   pull_request:
 
 jobs:
-  cpplint:
+  lint:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@master
-    - uses: reviewdog/action-cpplint@master
+    - uses: actions/checkout@v3
+    - name: Run clang-format style check for C/C++ programs.
+      uses: jidicula/clang-format-action@v4.18.0
       with:
-        github_token: ${{ secrets.github_token }}
-        args: --linelength=120
+        clang-format-version: '18'
+        check-path: 'src'
diff --git a/src/.clang-format b/src/.clang-format
@@ -0,0 +1,3 @@
+BasedOnStyle: Google
+IndentWidth: 4
+ColumnLimit: 100
diff --git a/src/allocator/allocator.c b/src/allocator/allocator.c
@@ -1,44 +1,42 @@
 #include "allocator.h"
-#include "include/log_utils.h"
+
 #include "include/libcuda_hook.h"
+#include "include/log_utils.h"
 #include "multiprocess/multiprocess_memory_limit.h"
 
-
 size_t BITSIZE = 512;
 size_t IPCSIZE = 2097152;
 size_t OVERSIZE = 134217728;
-//int pidfound;
+// int pidfound;
 
 region_list *r_list;
 allocated_list *device_overallocated;
 allocated_list *device_allocasync;
 
-#define ALIGN       2097152
+#define ALIGN 2097152
 #define MULTI_PARAM 1
 
-#define CHUNK_SIZE  (OVERSIZE/BITSIZE)
-#define __CHUNK_SIZE__  CHUNK_SIZE
+#define CHUNK_SIZE (OVERSIZE / BITSIZE)
+#define __CHUNK_SIZE__ CHUNK_SIZE
 
 extern size_t initial_offset;
-extern CUresult
-    cuMemoryAllocate(CUdeviceptr* dptr, size_t bytesize, void* data);
+extern CUresult cuMemoryAllocate(CUdeviceptr *dptr, size_t bytesize, void *data);
 extern CUresult cuMemoryFree(CUdeviceptr dptr);
 
 pthread_once_t allocator_allocate_flag = PTHREAD_ONCE_INIT;
 pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
 
 size_t round_up(size_t size, size_t unit) {
-    if (size & (unit-1))
-        return ((size / unit) + 1 ) * unit;
+    if (size & (unit - 1)) return ((size / unit) + 1) * unit;
     return size;
 }
 
 int oom_check(const int dev, size_t addon) {
     CUdevice d;
-    if (dev==-1)
+    if (dev == -1)
         cuCtxGetDevice(&d);
     else
-        d=dev;
+        d = dev;
     uint64_t limit = get_current_device_memory_limit(d);
     size_t _usage = get_gpu_memory_usage(d);
 
@@ -47,12 +45,11 @@ int oom_check(const int dev, size_t addon) {
     }
 
     size_t new_allocated = _usage + addon;
-    LOG_INFO("_usage=%lu limit=%lu new_allocated=%lu",_usage,limit,new_allocated);
+    LOG_INFO("_usage=%lu limit=%lu new_allocated=%lu", _usage, limit, new_allocated);
     if (new_allocated > limit) {
         LOG_ERROR("Device %d OOM %lu / %lu", d, new_allocated, limit);
 
-        if (clear_proc_slot_nolock(1) > 0)
-            return oom_check(dev,addon);
+        if (clear_proc_slot_nolock(1) > 0) return oom_check(dev, addon);
         return 1;
     }
     return 0;
@@ -61,27 +58,27 @@ int oom_check(const int dev, size_t addon) {
 CUresult view_vgpu_allocator() {
     allocated_list_entry *al;
     size_t total;
-    total=0;
+    total = 0;
     LOG_INFO("[view1]:overallocated:");
-    for (al=device_overallocated->head;al!=NULL;al=al->next){
-        LOG_INFO("(%p %lu)\t",(void *)al->entry->address,al->entry->length);
-        total+=al->entry->length;
+    for (al = device_overallocated->head; al != NULL; al = al->next) {
+        LOG_INFO("(%p %lu)\t", (void *)al->entry->address, al->entry->length);
+        total += al->entry->length;
     }
-    LOG_INFO("total=%lu",total);
+    LOG_INFO("total=%lu", total);
     size_t t = get_current_device_memory_usage(0);
-    LOG_INFO("current_device_memory_usage:%lu",t);
+    LOG_INFO("current_device_memory_usage:%lu", t);
     return 0;
 }
 
 CUresult get_listsize(allocated_list *al, size_t *size) {
-    if (al->length == 0){
+    if (al->length == 0) {
         *size = 0;
         return CUDA_SUCCESS;
     }
-    size_t count=0;
+    size_t count = 0;
     allocated_list_entry *val;
-    for (val=al->head;val!=NULL;val=val->next){
-        count+=val->entry->length;
+    for (val = al->head; val != NULL; val = val->next) {
+        count += val->entry->length;
     }
     *size = count;
     return CUDA_SUCCESS;
@@ -92,10 +89,10 @@ void allocator_init() {
 
     device_overallocated = malloc(sizeof(allocated_list));
     LIST_INIT(device_overallocated);
-    device_allocasync=malloc(sizeof(allocated_list));
+    device_allocasync = malloc(sizeof(allocated_list));
     LIST_INIT(device_allocasync);
 
-    pthread_mutex_init(&mutex,NULL);
+    pthread_mutex_init(&mutex, NULL);
 }
 
 int add_chunk(CUdeviceptr *address, size_t size) {
@@ -105,8 +102,7 @@ int add_chunk(CUdeviceptr *address, size_t size) {
     cuCtxGetDevice(&dev);
 
     /* OOM pre-check without lock */
-    if (oom_check(dev, size))
-        return CUDA_ERROR_OUT_OF_MEMORY;
+    if (oom_check(dev, size)) return CUDA_ERROR_OUT_OF_MEMORY;
 
     /* GPU allocation outside lock — the expensive part */
     if (size <= IPCSIZE) {
@@ -141,17 +137,17 @@ int add_chunk(CUdeviceptr *address, size_t size) {
 
 int add_chunk_only(CUdeviceptr address, size_t size, CUdevice dev) {
     pthread_mutex_lock(&mutex);
-    size_t addr=0;
+    size_t addr = 0;
     size_t allocsize;
-    if (oom_check(dev,size)){
+    if (oom_check(dev, size)) {
         pthread_mutex_unlock(&mutex);
         return -1;
     }
     allocated_list_entry *e;
     INIT_ALLOCATED_LIST_ENTRY(e, addr, size, dev);
-    LIST_ADD(device_overallocated,e);
-    //uint64_t t_size;
-    e->entry->address=address;
+    LIST_ADD(device_overallocated, e);
+    // uint64_t t_size;
+    e->entry->address = address;
     allocsize = size;
     add_gpu_device_memory_usage(getpid(), dev, allocsize, 2);
     pthread_mutex_unlock(&mutex);
@@ -161,8 +157,9 @@ int add_chunk_only(CUdeviceptr address, size_t size, CUdevice dev) {
 int check_memory_type(CUdeviceptr address) {
     allocated_list_entry *cursor;
     cursor = device_overallocated->head;
-    for (cursor=device_overallocated->head;cursor!=NULL;cursor=cursor->next){
-        if ((cursor->entry->address <= address) && (cursor->entry->address+cursor->entry->length>=address))
+    for (cursor = device_overallocated->head; cursor != NULL; cursor = cursor->next) {
+        if ((cursor->entry->address <= address) &&
+            (cursor->entry->address + cursor->entry->length >= address))
             return CU_MEMORYTYPE_DEVICE;
     }
     return CU_MEMORYTYPE_HOST;
@@ -218,30 +215,25 @@ int remove_chunk_only(CUdeviceptr dptr) {
     return -1;
 }
 
-int allocate_raw(CUdeviceptr *dptr, size_t size) {
-    return add_chunk(dptr, size);
-}
+int allocate_raw(CUdeviceptr *dptr, size_t size) { return add_chunk(dptr, size); }
 
-int free_raw(CUdeviceptr dptr) {
-    return remove_chunk(device_overallocated, dptr);
-}
+int free_raw(CUdeviceptr dptr) { return remove_chunk(device_overallocated, dptr); }
 
-int remove_chunk_async(
-    allocated_list *a_list, CUdeviceptr dptr, CUstream hStream) {
+int remove_chunk_async(allocated_list *a_list, CUdeviceptr dptr, CUstream hStream) {
     size_t t_size;
     if (a_list->length == 0) {
         return -1;
     }
     allocated_list_entry *val;
     for (val = a_list->head; val != NULL; val = val->next) {
         if (val->entry->address == dptr) {
-            t_size=val->entry->length;
-            CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemFreeAsync,dptr,hStream);
-            LIST_REMOVE(a_list,val);
-            a_list->limit-=t_size;
+            t_size = val->entry->length;
+            CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemFreeAsync, dptr, hStream);
+            LIST_REMOVE(a_list, val);
+            a_list->limit -= t_size;
             CUdevice dev;
             cuCtxGetDevice(&dev);
-            rm_gpu_device_memory_usage(getpid(),dev,t_size,2);
+            rm_gpu_device_memory_usage(getpid(), dev, t_size, 2);
             return 0;
         }
     }
@@ -256,53 +248,56 @@ int free_raw_async(CUdeviceptr dptr, CUstream hStream) {
 }
 
 int add_chunk_async(CUdeviceptr *address, size_t size, CUstream hStream) {
-    size_t addr=0;
+    size_t addr = 0;
     size_t allocsize;
     CUresult res = CUDA_SUCCESS;
     CUdevice dev;
     cuCtxGetDevice(&dev);
-    if (oom_check(dev,size))
-        return -1;
+    if (oom_check(dev, size)) return -1;
 
     allocated_list_entry *e;
     INIT_ALLOCATED_LIST_ENTRY(e, addr, size, dev);
-    res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemAllocAsync,&e->entry->address,size,hStream);
+    res =
+        CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocAsync, &e->entry->address, size, hStream);
     if (res != CUDA_SUCCESS) {
-        LOG_ERROR("cuMemoryAllocate failed res=%d",res);
+        LOG_ERROR("cuMemoryAllocate failed res=%d", res);
         return res;
     }
     *address = e->entry->address;
     CUmemoryPool pool;
-    res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceGetMemPool,&pool,dev);
+    res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuDeviceGetMemPool, &pool, dev);
     if (res != CUDA_SUCCESS) {
-        LOG_ERROR("cuDeviceGetMemPool failed res=%d",res);
+        LOG_ERROR("cuDeviceGetMemPool failed res=%d", res);
         return res;
     }
     size_t poollimit;
-    res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemPoolGetAttribute,pool,CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH,&poollimit);
+    res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemPoolGetAttribute, pool,
+                             CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH, &poollimit);
     if (res != CUDA_SUCCESS) {
-        LOG_ERROR("cuMemPoolGetAttribute failed res=%d",res);
+        LOG_ERROR("cuMemPoolGetAttribute failed res=%d", res);
         return res;
     }
     if (poollimit != 0) {
-        if (poollimit> device_allocasync->limit) {
-            allocsize = (poollimit-device_allocasync->limit < size)? poollimit-device_allocasync->limit : size;
+        if (poollimit > device_allocasync->limit) {
+            allocsize = (poollimit - device_allocasync->limit < size)
+                            ? poollimit - device_allocasync->limit
+                            : size;
             cuCtxGetDevice(&dev);
             add_gpu_device_memory_usage(getpid(), dev, allocsize, 2);
-            device_allocasync->limit=device_allocasync->limit+allocsize;
-            e->entry->length=allocsize;
-        }else{
-            e->entry->length=0;
+            device_allocasync->limit = device_allocasync->limit + allocsize;
+            e->entry->length = allocsize;
+        } else {
+            e->entry->length = 0;
         }
     }
-    LIST_ADD(device_allocasync,e);
+    LIST_ADD(device_allocasync, e);
     return 0;
 }
 
 int allocate_async_raw(CUdeviceptr *dptr, size_t size, CUstream hStream) {
     int tmp;
     pthread_mutex_lock(&mutex);
-    tmp = add_chunk_async(dptr,size,hStream);
+    tmp = add_chunk_async(dptr, size, hStream);
     pthread_mutex_unlock(&mutex);
     return tmp;
 }