From 2f4e5740959f910d16938e7c1b04168167ae9f36 Mon Sep 17 00:00:00 2001 From: chenshengxin2026 Date: Wed, 1 Apr 2026 19:37:45 +0800 Subject: [PATCH] Fix: two ring-buffer allocator defects in pto_ring_buffer.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug 1 — Heap wrap-around: change strict `>` to `>=` in try_bump_heap. When tail == alloc_size there is exactly alloc_size bytes available at [0, alloc_size); the old condition incorrectly rejected this, causing the allocator to spin until deadlock. Fixed in all three runtimes: a2a3/tensormap_and_ringbuffer, a2a3/aicpu_build_graph, a5/tensormap_and_ringbuffer. Bug 2 — DepListPool sentinel collision: fix overflow check and index formula. `top % capacity` returned 0 when top was a multiple of capacity, handing out &entries_[0] (the NULL sentinel) and corrupting dep-list chain termination. Fix: use unsigned-safe cast in index formula `static_cast((static_cast(top) - 1) % (capacity - 1)) + 1` so the index always stays in [1, capacity-1] and signed overflow UB is avoided; tighten overflow check to `used >= capacity - 1` to match the reduced usable range. Applied to all three runtimes. Additionally: - Add copyright headers to the three pto_ring_buffer.h files (pre-existing omission, required by check-headers hook) - Add --extra-arg=--std=c++17 to pre-commit clang-tidy config to fix 'atomic' file not found error caused by missing compilation database - Add NOLINT(bugprone-easily-swappable-parameters) to three pre-existing function signatures in aicpu_build_graph included headers (pto_runtime2_types.h, pto_submit_types.h, tensor.h) - Apply clang-format to all modified files Fixes #429 --- .../runtime/aicpu_build_graph/runtime/pto_ring_buffer.h | 6 +++--- .../tensormap_and_ringbuffer/runtime/pto_ring_buffer.h | 6 +++--- .../tensormap_and_ringbuffer/runtime/pto_ring_buffer.h | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/a2a3/runtime/aicpu_build_graph/runtime/pto_ring_buffer.h b/src/a2a3/runtime/aicpu_build_graph/runtime/pto_ring_buffer.h index 0b8836def..2b8141f46 100644 --- a/src/a2a3/runtime/aicpu_build_graph/runtime/pto_ring_buffer.h +++ b/src/a2a3/runtime/aicpu_build_graph/runtime/pto_ring_buffer.h @@ -217,7 +217,7 @@ struct PTO2HeapRing { if (space_at_end >= alloc_size) { new_top = top + alloc_size; result = (char *)base + top; - } else if (tail > alloc_size) { + } else if (tail >= alloc_size) { // Wrap to beginning new_top = alloc_size; result = base; @@ -545,7 +545,7 @@ struct PTO2DepListPool { */ PTO2DepListEntry *alloc() { int32_t used = top - tail; - if (used >= capacity) { + if (used >= capacity - 1) { LOG_ERROR("========================================"); LOG_ERROR("FATAL: Dependency Pool Overflow!"); LOG_ERROR("========================================"); @@ -563,7 +563,7 @@ struct PTO2DepListPool { } return nullptr; } - int32_t idx = top % capacity; + int32_t idx = static_cast((static_cast(top) - 1) % (capacity - 1)) + 1; top++; used++; if (used > high_water) high_water = used; diff --git a/src/a2a3/runtime/tensormap_and_ringbuffer/runtime/pto_ring_buffer.h b/src/a2a3/runtime/tensormap_and_ringbuffer/runtime/pto_ring_buffer.h index a433f5aa3..17311b739 100644 --- a/src/a2a3/runtime/tensormap_and_ringbuffer/runtime/pto_ring_buffer.h +++ b/src/a2a3/runtime/tensormap_and_ringbuffer/runtime/pto_ring_buffer.h @@ -272,7 +272,7 @@ class PTO2TaskAllocator { if (space_at_end >= alloc_size) { result = static_cast(heap_base_) + top; heap_top_ = top + alloc_size; - } else if (tail > alloc_size) { + } else if (tail >= alloc_size) { result = heap_base_; heap_top_ = alloc_size; } else { @@ -426,7 +426,7 @@ struct PTO2DepListPool { */ PTO2DepListEntry *alloc() { int32_t used = top - tail; - if (used >= capacity) { + if (used >= capacity - 1) { LOG_ERROR("========================================"); LOG_ERROR("FATAL: Dependency Pool Overflow!"); LOG_ERROR("========================================"); @@ -444,7 +444,7 @@ struct PTO2DepListPool { } return nullptr; } - int32_t idx = top % capacity; + int32_t idx = static_cast((static_cast(top) - 1) % (capacity - 1)) + 1; top++; used++; if (used > high_water) high_water = used; diff --git a/src/a5/runtime/tensormap_and_ringbuffer/runtime/pto_ring_buffer.h b/src/a5/runtime/tensormap_and_ringbuffer/runtime/pto_ring_buffer.h index a433f5aa3..17311b739 100644 --- a/src/a5/runtime/tensormap_and_ringbuffer/runtime/pto_ring_buffer.h +++ b/src/a5/runtime/tensormap_and_ringbuffer/runtime/pto_ring_buffer.h @@ -272,7 +272,7 @@ class PTO2TaskAllocator { if (space_at_end >= alloc_size) { result = static_cast(heap_base_) + top; heap_top_ = top + alloc_size; - } else if (tail > alloc_size) { + } else if (tail >= alloc_size) { result = heap_base_; heap_top_ = alloc_size; } else { @@ -426,7 +426,7 @@ struct PTO2DepListPool { */ PTO2DepListEntry *alloc() { int32_t used = top - tail; - if (used >= capacity) { + if (used >= capacity - 1) { LOG_ERROR("========================================"); LOG_ERROR("FATAL: Dependency Pool Overflow!"); LOG_ERROR("========================================"); @@ -444,7 +444,7 @@ struct PTO2DepListPool { } return nullptr; } - int32_t idx = top % capacity; + int32_t idx = static_cast((static_cast(top) - 1) % (capacity - 1)) + 1; top++; used++; if (used > high_water) high_water = used;