1+ /*
2+ * Copyright (c) PyPTO Contributors.
3+ * This program is free software, you can redistribute it and/or modify it under the terms and conditions of
4+ * CANN Open Software License Agreement Version 2.0 (the "License").
5+ * Please refer to the License for details. You may not use this file except in compliance with the License.
6+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
7+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
8+ * See LICENSE in the root of the software repository for the full text of the License.
9+ * -----------------------------------------------------------------------------------------------------------
10+ */
111/* *
212 * AICore Kernel Wrapper for Simulation
313 *
414 * Provides a wrapper around aicore_execute for dlsym lookup.
515 * Sets up per-thread simulated register base before calling the executor.
616 */
717
18+ #include < dlfcn.h>
19+
820#include < cstdint>
21+
922#include " aicore/aicore.h"
1023#include " common/core_type.h"
1124#include " common/platform_config.h"
@@ -20,9 +33,20 @@ thread_local uint32_t g_sim_physical_core_id = 0;
2033// Declare the original function (defined in aicore_executor.cpp with weak linkage)
2134void aicore_execute (__gm__ Runtime* runtime, int block_idx, CoreType core_type);
2235
36+ namespace {
37+ using CpuSimSetExecutionContextHook = void (*)(uint32_t , uint32_t , uint32_t );
38+
39+ CpuSimSetExecutionContextHook resolve_cpu_sim_set_execution_context_hook () {
40+ static auto hook =
41+ reinterpret_cast <CpuSimSetExecutionContextHook>(dlsym (RTLD_DEFAULT, " pto_cpu_sim_set_execution_context" ));
42+ return hook;
43+ }
44+ } // namespace
45+
2346// Wrapper with extern "C" for dlsym lookup
2447// NOTE: physical_core_id stays in wrapper signature (DeviceRunner passes it for register indexing)
25- extern " C" void aicore_execute_wrapper (__gm__ Runtime* runtime, int block_idx, CoreType core_type, uint32_t physical_core_id, uint64_t regs) {
48+ extern " C" void aicore_execute_wrapper (
49+ __gm__ Runtime* runtime, int block_idx, CoreType core_type, uint32_t physical_core_id, uint64_t regs) {
2650 // Set up simulated register base for this thread.
2751 // regs points to an array of uint64_t base addresses (one per core).
2852 // physical_core_id indexes into it to get this core's register block.
@@ -32,6 +56,22 @@ extern "C" void aicore_execute_wrapper(__gm__ Runtime* runtime, int block_idx, C
3256 }
3357
3458 g_sim_physical_core_id = physical_core_id;
59+ const uint32_t num_aic = static_cast <uint32_t >(runtime->worker_count / PLATFORM_CORES_PER_BLOCKDIM);
60+ uint32_t cpu_block_idx = static_cast <uint32_t >(block_idx);
61+ uint32_t subblock_id = 0 ;
62+ uint32_t subblock_dim = 1 ;
63+
64+ if (core_type == CoreType::AIV && physical_core_id >= num_aic) {
65+ const uint32_t aiv_offset = physical_core_id - num_aic;
66+ cpu_block_idx = aiv_offset / PLATFORM_AIV_CORES_PER_BLOCKDIM;
67+ subblock_id = aiv_offset % PLATFORM_AIV_CORES_PER_BLOCKDIM;
68+ subblock_dim = PLATFORM_AIV_CORES_PER_BLOCKDIM;
69+ } else {
70+ cpu_block_idx = physical_core_id;
71+ }
3572
73+ if (auto hook = resolve_cpu_sim_set_execution_context_hook (); hook != nullptr ) {
74+ hook (cpu_block_idx, subblock_id, subblock_dim);
75+ }
3676 aicore_execute (runtime, block_idx, core_type);
3777}
0 commit comments