From 84e4292ca16210a90aa609c7a1d2f37c88de7693 Mon Sep 17 00:00:00 2001 From: Chao Wang <[26245345+ChaoWao@users.noreply.github.com](mailto:26245345+ChaoWao@users.noreply.github.com)> Date: Sat, 4 Apr 2026 11:38:52 +0800 Subject: [PATCH] Fix(sim): pass context helpers to AICore SO instead of dlsym(RTLD_DEFAULT) The AICore kernel SO used dlsym(RTLD_DEFAULT, "pto_cpu_sim_*") to resolve sim context functions. This fails when libhost_runtime.so is loaded with RTLD_LOCAL, which is needed for multi-runtime in-process isolation. Replace with explicit function pointer injection via set_sim_context_helpers. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- src/a2a3/platform/sim/aicore/inner_kernel.h | 31 +++++++++++--------- src/a2a3/platform/sim/aicore/kernel.cpp | 12 ++++++++ src/a2a3/platform/sim/host/cpu_sim_context.h | 14 +++++++++ src/a2a3/platform/sim/host/device_runner.cpp | 13 ++++++++ src/a5/platform/sim/aicore/inner_kernel.h | 26 ++++++++-------- src/a5/platform/sim/aicore/kernel.cpp | 12 ++++++++ src/a5/platform/sim/host/cpu_sim_context.h | 14 +++++++++ src/a5/platform/sim/host/device_runner.cpp | 13 ++++++++ 8 files changed, 107 insertions(+), 28 deletions(-) diff --git a/src/a2a3/platform/sim/aicore/inner_kernel.h b/src/a2a3/platform/sim/aicore/inner_kernel.h index f250d946f..4dd6ef375 100644 --- a/src/a2a3/platform/sim/aicore/inner_kernel.h +++ b/src/a2a3/platform/sim/aicore/inner_kernel.h @@ -186,31 +186,34 @@ inline uint32_t get_physical_core_id() { return g_sim_physical_core_id; } // ============================================================================= // CPU Simulation Context Hooks // ============================================================================= +// +// These functions bridge the AICore kernel SO → host runtime SO gap. +// The host runtime's DeviceRunner calls set_sim_context_helpers() after +// dlopen to pass function pointers, avoiding dlsym(RTLD_DEFAULT) which +// fails when the host SO is loaded with RTLD_LOCAL. + +// Function pointer storage — set by set_sim_context_helpers(), used by macros below. +using SimSetExecCtxFn = void (*)(uint32_t, uint32_t, uint32_t); +using SimSetTaskCookieFn = void (*)(uint64_t); +using SimGetTaskCookieFn = uint64_t (*)(uint32_t, uint32_t); + +extern SimSetExecCtxFn g_sim_set_exec_ctx_fn; +extern SimSetTaskCookieFn g_sim_set_task_cookie_fn; +extern SimGetTaskCookieFn g_sim_get_task_cookie_fn; -// CPU_SIM_SET_EXECUTION_CONTEXT — set block/subblock context for CANN intrinsic emulation. -// Resolves pto_cpu_sim_set_execution_context (defined in cpu_sim_context.cpp) via dlsym. inline void cpu_sim_set_execution_context(uint32_t block_idx, uint32_t subblock_id, uint32_t subblock_dim) { - using Fn = void (*)(uint32_t, uint32_t, uint32_t); - static auto fn = reinterpret_cast(dlsym(RTLD_DEFAULT, "pto_cpu_sim_set_execution_context")); - if (fn != nullptr) fn(block_idx, subblock_id, subblock_dim); + if (g_sim_set_exec_ctx_fn != nullptr) g_sim_set_exec_ctx_fn(block_idx, subblock_id, subblock_dim); } #define CPU_SIM_SET_EXECUTION_CONTEXT(block_idx, subblock_id, subblock_dim) \ cpu_sim_set_execution_context(block_idx, subblock_id, subblock_dim) -// CPU_SIM_SET_TASK_COOKIE — set task cookie for simulation tracing. -// Resolves pto_cpu_sim_set_task_cookie (defined in cpu_sim_context.cpp) via dlsym. inline void cpu_sim_set_task_cookie(uint64_t cookie) { - using Fn = void (*)(uint64_t); - static auto fn = reinterpret_cast(dlsym(RTLD_DEFAULT, "pto_cpu_sim_set_task_cookie")); - if (fn != nullptr) fn(cookie); + if (g_sim_set_task_cookie_fn != nullptr) g_sim_set_task_cookie_fn(cookie); } #define CPU_SIM_SET_TASK_COOKIE(cookie) cpu_sim_set_task_cookie(cookie) -// platform_get_cpu_sim_task_cookie — resolve per-dispatch logical task identity. inline uint64_t platform_get_cpu_sim_task_cookie(uint32_t core_id, uint32_t reg_task_id) { - using Fn = uint64_t (*)(uint32_t, uint32_t); - static auto fn = reinterpret_cast(dlsym(RTLD_DEFAULT, "platform_get_cpu_sim_task_cookie")); - return (fn != nullptr) ? fn(core_id, reg_task_id) : 0; + return (g_sim_get_task_cookie_fn != nullptr) ? g_sim_get_task_cookie_fn(core_id, reg_task_id) : 0; } #endif // PLATFORM_A2A3SIM_AICORE_INNER_KERNEL_H_ diff --git a/src/a2a3/platform/sim/aicore/kernel.cpp b/src/a2a3/platform/sim/aicore/kernel.cpp index e7bf8eef9..5da3aded2 100644 --- a/src/a2a3/platform/sim/aicore/kernel.cpp +++ b/src/a2a3/platform/sim/aicore/kernel.cpp @@ -16,6 +16,7 @@ */ #include +#include "inner_kernel.h" // NOLINT(build/include_subdir) #include "aicore/aicore.h" #include "common/core_type.h" #include "common/platform_config.h" @@ -27,6 +28,17 @@ thread_local volatile uint8_t *g_sim_reg_base = nullptr; // Thread-local simulated physical core ID (declared in inner_kernel.h) thread_local uint32_t g_sim_physical_core_id = 0; +// Sim context function pointers — set by DeviceRunner after dlopen. +SimSetExecCtxFn g_sim_set_exec_ctx_fn = nullptr; +SimSetTaskCookieFn g_sim_set_task_cookie_fn = nullptr; +SimGetTaskCookieFn g_sim_get_task_cookie_fn = nullptr; + +extern "C" void set_sim_context_helpers(void *set_exec_ctx, void *set_task_cookie, void *get_task_cookie) { + g_sim_set_exec_ctx_fn = reinterpret_cast(set_exec_ctx); + g_sim_set_task_cookie_fn = reinterpret_cast(set_task_cookie); + g_sim_get_task_cookie_fn = reinterpret_cast(get_task_cookie); +} + // Declare the original function (defined in aicore_executor.cpp with weak linkage) void aicore_execute(__gm__ Runtime *runtime, int block_idx, CoreType core_type); diff --git a/src/a2a3/platform/sim/host/cpu_sim_context.h b/src/a2a3/platform/sim/host/cpu_sim_context.h index fc328c457..611953306 100644 --- a/src/a2a3/platform/sim/host/cpu_sim_context.h +++ b/src/a2a3/platform/sim/host/cpu_sim_context.h @@ -19,9 +19,23 @@ #pragma once +#include + /** * Free all entries in the CPU simulation shared storage map and reset the * pthread-backed per-thread execution context store. * Called by DeviceRunner::run() at start and DeviceRunner::finalize() at end. */ void clear_cpu_sim_shared_storage(); + +#ifdef __cplusplus +extern "C" { +#endif + +void pto_cpu_sim_set_execution_context(uint32_t block_idx, uint32_t subblock_id, uint32_t subblock_dim); +void pto_cpu_sim_set_task_cookie(uint64_t task_cookie); +uint64_t platform_get_cpu_sim_task_cookie(uint32_t core_id, uint32_t reg_task_id); + +#ifdef __cplusplus +} +#endif diff --git a/src/a2a3/platform/sim/host/device_runner.cpp b/src/a2a3/platform/sim/host/device_runner.cpp index dd51727d2..bd23cc05e 100644 --- a/src/a2a3/platform/sim/host/device_runner.cpp +++ b/src/a2a3/platform/sim/host/device_runner.cpp @@ -167,6 +167,19 @@ int DeviceRunner::ensure_binaries_loaded( return -1; } LOG_INFO("DeviceRunner(sim): Loaded aicore_execute_wrapper from %s", aicore_so_path_.c_str()); + + // Pass sim context function pointers to the AICore SO so it doesn't + // need dlsym(RTLD_DEFAULT) — which fails when the host runtime SO + // is loaded with RTLD_LOCAL. + auto set_helpers = + reinterpret_cast(dlsym(aicore_so_handle_, "set_sim_context_helpers")); + if (set_helpers != nullptr) { + set_helpers( + reinterpret_cast(pto_cpu_sim_set_execution_context), + reinterpret_cast(pto_cpu_sim_set_task_cookie), + reinterpret_cast(platform_get_cpu_sim_task_cookie) + ); + } } return 0; diff --git a/src/a5/platform/sim/aicore/inner_kernel.h b/src/a5/platform/sim/aicore/inner_kernel.h index 5352cfdc2..77ceb1fd7 100644 --- a/src/a5/platform/sim/aicore/inner_kernel.h +++ b/src/a5/platform/sim/aicore/inner_kernel.h @@ -195,30 +195,28 @@ inline uint32_t get_physical_core_id() { return g_sim_physical_core_id; } // CPU Simulation Context Hooks // ============================================================================= -// CPU_SIM_SET_EXECUTION_CONTEXT — set block/subblock context for CANN intrinsic emulation. -// Resolves pto_cpu_sim_set_execution_context (defined in cpu_sim_context.cpp) via dlsym. +// Function pointer storage — set by set_sim_context_helpers(), used by macros below. +using SimSetExecCtxFn = void (*)(uint32_t, uint32_t, uint32_t); +using SimSetTaskCookieFn = void (*)(uint64_t); +using SimGetTaskCookieFn = uint64_t (*)(uint32_t, uint32_t); + +extern SimSetExecCtxFn g_sim_set_exec_ctx_fn; +extern SimSetTaskCookieFn g_sim_set_task_cookie_fn; +extern SimGetTaskCookieFn g_sim_get_task_cookie_fn; + inline void cpu_sim_set_execution_context(uint32_t block_idx, uint32_t subblock_id, uint32_t subblock_dim) { - using Fn = void (*)(uint32_t, uint32_t, uint32_t); - static auto fn = reinterpret_cast(dlsym(RTLD_DEFAULT, "pto_cpu_sim_set_execution_context")); - if (fn != nullptr) fn(block_idx, subblock_id, subblock_dim); + if (g_sim_set_exec_ctx_fn != nullptr) g_sim_set_exec_ctx_fn(block_idx, subblock_id, subblock_dim); } #define CPU_SIM_SET_EXECUTION_CONTEXT(block_idx, subblock_id, subblock_dim) \ cpu_sim_set_execution_context(block_idx, subblock_id, subblock_dim) -// CPU_SIM_SET_TASK_COOKIE — set task cookie for simulation tracing. -// Resolves pto_cpu_sim_set_task_cookie (defined in cpu_sim_context.cpp) via dlsym. inline void cpu_sim_set_task_cookie(uint64_t cookie) { - using Fn = void (*)(uint64_t); - static auto fn = reinterpret_cast(dlsym(RTLD_DEFAULT, "pto_cpu_sim_set_task_cookie")); - if (fn != nullptr) fn(cookie); + if (g_sim_set_task_cookie_fn != nullptr) g_sim_set_task_cookie_fn(cookie); } #define CPU_SIM_SET_TASK_COOKIE(cookie) cpu_sim_set_task_cookie(cookie) -// platform_get_cpu_sim_task_cookie — resolve per-dispatch logical task identity. inline uint64_t platform_get_cpu_sim_task_cookie(uint32_t core_id, uint32_t reg_task_id) { - using Fn = uint64_t (*)(uint32_t, uint32_t); - static auto fn = reinterpret_cast(dlsym(RTLD_DEFAULT, "platform_get_cpu_sim_task_cookie")); - return (fn != nullptr) ? fn(core_id, reg_task_id) : 0; + return (g_sim_get_task_cookie_fn != nullptr) ? g_sim_get_task_cookie_fn(core_id, reg_task_id) : 0; } #endif // PLATFORM_A5SIM_AICORE_INNER_KERNEL_H_ diff --git a/src/a5/platform/sim/aicore/kernel.cpp b/src/a5/platform/sim/aicore/kernel.cpp index e7bf8eef9..5da3aded2 100644 --- a/src/a5/platform/sim/aicore/kernel.cpp +++ b/src/a5/platform/sim/aicore/kernel.cpp @@ -16,6 +16,7 @@ */ #include +#include "inner_kernel.h" // NOLINT(build/include_subdir) #include "aicore/aicore.h" #include "common/core_type.h" #include "common/platform_config.h" @@ -27,6 +28,17 @@ thread_local volatile uint8_t *g_sim_reg_base = nullptr; // Thread-local simulated physical core ID (declared in inner_kernel.h) thread_local uint32_t g_sim_physical_core_id = 0; +// Sim context function pointers — set by DeviceRunner after dlopen. +SimSetExecCtxFn g_sim_set_exec_ctx_fn = nullptr; +SimSetTaskCookieFn g_sim_set_task_cookie_fn = nullptr; +SimGetTaskCookieFn g_sim_get_task_cookie_fn = nullptr; + +extern "C" void set_sim_context_helpers(void *set_exec_ctx, void *set_task_cookie, void *get_task_cookie) { + g_sim_set_exec_ctx_fn = reinterpret_cast(set_exec_ctx); + g_sim_set_task_cookie_fn = reinterpret_cast(set_task_cookie); + g_sim_get_task_cookie_fn = reinterpret_cast(get_task_cookie); +} + // Declare the original function (defined in aicore_executor.cpp with weak linkage) void aicore_execute(__gm__ Runtime *runtime, int block_idx, CoreType core_type); diff --git a/src/a5/platform/sim/host/cpu_sim_context.h b/src/a5/platform/sim/host/cpu_sim_context.h index fc328c457..611953306 100644 --- a/src/a5/platform/sim/host/cpu_sim_context.h +++ b/src/a5/platform/sim/host/cpu_sim_context.h @@ -19,9 +19,23 @@ #pragma once +#include + /** * Free all entries in the CPU simulation shared storage map and reset the * pthread-backed per-thread execution context store. * Called by DeviceRunner::run() at start and DeviceRunner::finalize() at end. */ void clear_cpu_sim_shared_storage(); + +#ifdef __cplusplus +extern "C" { +#endif + +void pto_cpu_sim_set_execution_context(uint32_t block_idx, uint32_t subblock_id, uint32_t subblock_dim); +void pto_cpu_sim_set_task_cookie(uint64_t task_cookie); +uint64_t platform_get_cpu_sim_task_cookie(uint32_t core_id, uint32_t reg_task_id); + +#ifdef __cplusplus +} +#endif diff --git a/src/a5/platform/sim/host/device_runner.cpp b/src/a5/platform/sim/host/device_runner.cpp index f15106f3d..08dc01b07 100644 --- a/src/a5/platform/sim/host/device_runner.cpp +++ b/src/a5/platform/sim/host/device_runner.cpp @@ -167,6 +167,19 @@ int DeviceRunner::ensure_binaries_loaded( return -1; } LOG_INFO("DeviceRunner(sim): Loaded aicore_execute_wrapper from %s", aicore_so_path_.c_str()); + + // Pass sim context function pointers to the AICore SO so it doesn't + // need dlsym(RTLD_DEFAULT) — which fails when the host runtime SO + // is loaded with RTLD_LOCAL. + auto set_helpers = + reinterpret_cast(dlsym(aicore_so_handle_, "set_sim_context_helpers")); + if (set_helpers != nullptr) { + set_helpers( + reinterpret_cast(pto_cpu_sim_set_execution_context), + reinterpret_cast(pto_cpu_sim_set_task_cookie), + reinterpret_cast(platform_get_cpu_sim_task_cookie) + ); + } } return 0;