Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions src/a2a3/runtime/host_build_graph/aicore/aicore_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,18 @@ __aicore__ __attribute__((always_inline)) static void execute_task(__gm__ Task*
__aicore__ __attribute__((weak)) void aicore_execute(__gm__ Runtime* runtime, int block_idx, CoreType core_type) {
__gm__ Handshake* my_hank = (__gm__ Handshake*)(&runtime->workers[block_idx]);

// In multi-round execution the DeviceRunner singleton keeps AICore threads alive
// across rounds. DATA_MAIN_BASE still holds the EXIT_SIGNAL from the previous
// round, so clear it before the handshake wait. Clearing after the wait would
// race with AICPU, which may finish all tasks and write a new EXIT_SIGNAL while
// this thread is descheduled between the wait and the clear.
write_reg(RegId::DATA_MAIN_BASE, 0);

// Phase 1: Wait for AICPU initialization signal
while (my_hank->aicpu_ready == 0) {
dcci(my_hank, ENTIRE_DATA_CACHE, CACHELINE_OUT);
}

// Clear stale EXIT_SIGNAL from previous round before entering main loop
write_reg(RegId::DATA_MAIN_BASE, 0);

// Report physical core ID and core type for AICPU
my_hank->physical_core_id = get_physical_core_id();
my_hank->core_type = core_type;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,18 @@ __aicore__ __attribute__((always_inline)) static void execute_task(__gm__ void*
__aicore__ __attribute__((weak)) void aicore_execute(__gm__ Runtime* runtime, int block_idx, CoreType core_type) {
__gm__ Handshake* my_hank = (__gm__ Handshake*)(&runtime->workers[block_idx]);

// In multi-round execution the DeviceRunner singleton keeps AICore threads alive
// across rounds. DATA_MAIN_BASE still holds the EXIT_SIGNAL from the previous
// round, so clear it before the handshake wait. Clearing after the wait would
// race with AICPU, which may finish all tasks and write a new EXIT_SIGNAL while
// this thread is descheduled between the wait and the clear.
write_reg(RegId::DATA_MAIN_BASE, 0);

// Phase 1: Wait for AICPU initialization signal
while (my_hank->aicpu_ready == 0) {
dcci(my_hank, SINGLE_CACHE_LINE);
}

// Clear stale EXIT_SIGNAL from previous round before entering main loop
write_reg(RegId::DATA_MAIN_BASE, 0);

// Phase 2: Report physical core ID and core type, signal ready
my_hank->physical_core_id = get_physical_core_id();
my_hank->core_type = core_type;
Expand Down
7 changes: 7 additions & 0 deletions src/a5/runtime/host_build_graph/aicore/aicore_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ __aicore__ __attribute__((always_inline)) static void execute_task(__gm__ Task*
__aicore__ __attribute__((weak)) void aicore_execute(__gm__ Runtime* runtime, int core_idx, CoreType core_type) {
__gm__ Handshake* my_hank = (__gm__ Handshake*)(&runtime->workers[core_idx]);

// In multi-round execution the DeviceRunner singleton keeps AICore threads alive
// across rounds. DATA_MAIN_BASE still holds the EXIT_SIGNAL from the previous
// round, so clear it before the handshake wait. Clearing after the wait would
// race with AICPU, which may finish all tasks and write a new EXIT_SIGNAL while
// this thread is descheduled between the wait and the clear.
write_reg(RegId::DATA_MAIN_BASE, 0);

// Phase 1: Wait for AICPU initialization signal
while (my_hank->aicpu_ready == 0) {
dcci(my_hank, ENTIRE_DATA_CACHE, CACHELINE_OUT);
Expand Down