Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 17 additions & 14 deletions src/a2a3/platform/sim/aicore/inner_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,31 +186,34 @@ inline uint32_t get_physical_core_id() { return g_sim_physical_core_id; }
// =============================================================================
// CPU Simulation Context Hooks
// =============================================================================
//
// These functions bridge the AICore kernel SO → host runtime SO gap.
// The host runtime's DeviceRunner calls set_sim_context_helpers() after
// dlopen to pass function pointers, avoiding dlsym(RTLD_DEFAULT) which
// fails when the host SO is loaded with RTLD_LOCAL.

// Function pointer storage — set by set_sim_context_helpers(), used by macros below.
using SimSetExecCtxFn = void (*)(uint32_t, uint32_t, uint32_t);
using SimSetTaskCookieFn = void (*)(uint64_t);
using SimGetTaskCookieFn = uint64_t (*)(uint32_t, uint32_t);

extern SimSetExecCtxFn g_sim_set_exec_ctx_fn;
extern SimSetTaskCookieFn g_sim_set_task_cookie_fn;
extern SimGetTaskCookieFn g_sim_get_task_cookie_fn;

// CPU_SIM_SET_EXECUTION_CONTEXT — set block/subblock context for CANN intrinsic emulation.
// Resolves pto_cpu_sim_set_execution_context (defined in cpu_sim_context.cpp) via dlsym.
inline void cpu_sim_set_execution_context(uint32_t block_idx, uint32_t subblock_id, uint32_t subblock_dim) {
using Fn = void (*)(uint32_t, uint32_t, uint32_t);
static auto fn = reinterpret_cast<Fn>(dlsym(RTLD_DEFAULT, "pto_cpu_sim_set_execution_context"));
if (fn != nullptr) fn(block_idx, subblock_id, subblock_dim);
if (g_sim_set_exec_ctx_fn != nullptr) g_sim_set_exec_ctx_fn(block_idx, subblock_id, subblock_dim);
}
#define CPU_SIM_SET_EXECUTION_CONTEXT(block_idx, subblock_id, subblock_dim) \
cpu_sim_set_execution_context(block_idx, subblock_id, subblock_dim)

// CPU_SIM_SET_TASK_COOKIE — set task cookie for simulation tracing.
// Resolves pto_cpu_sim_set_task_cookie (defined in cpu_sim_context.cpp) via dlsym.
inline void cpu_sim_set_task_cookie(uint64_t cookie) {
using Fn = void (*)(uint64_t);
static auto fn = reinterpret_cast<Fn>(dlsym(RTLD_DEFAULT, "pto_cpu_sim_set_task_cookie"));
if (fn != nullptr) fn(cookie);
if (g_sim_set_task_cookie_fn != nullptr) g_sim_set_task_cookie_fn(cookie);
}
#define CPU_SIM_SET_TASK_COOKIE(cookie) cpu_sim_set_task_cookie(cookie)

// platform_get_cpu_sim_task_cookie — resolve per-dispatch logical task identity.
inline uint64_t platform_get_cpu_sim_task_cookie(uint32_t core_id, uint32_t reg_task_id) {
using Fn = uint64_t (*)(uint32_t, uint32_t);
static auto fn = reinterpret_cast<Fn>(dlsym(RTLD_DEFAULT, "platform_get_cpu_sim_task_cookie"));
return (fn != nullptr) ? fn(core_id, reg_task_id) : 0;
return (g_sim_get_task_cookie_fn != nullptr) ? g_sim_get_task_cookie_fn(core_id, reg_task_id) : 0;
}

#endif // PLATFORM_A2A3SIM_AICORE_INNER_KERNEL_H_
12 changes: 12 additions & 0 deletions src/a2a3/platform/sim/aicore/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/

#include <cstdint>
#include "inner_kernel.h" // NOLINT(build/include_subdir)
#include "aicore/aicore.h"
#include "common/core_type.h"
#include "common/platform_config.h"
Expand All @@ -27,6 +28,17 @@ thread_local volatile uint8_t *g_sim_reg_base = nullptr;
// Thread-local simulated physical core ID (declared in inner_kernel.h)
thread_local uint32_t g_sim_physical_core_id = 0;

// Sim context function pointers — set by DeviceRunner after dlopen.
SimSetExecCtxFn g_sim_set_exec_ctx_fn = nullptr;
SimSetTaskCookieFn g_sim_set_task_cookie_fn = nullptr;
SimGetTaskCookieFn g_sim_get_task_cookie_fn = nullptr;

extern "C" void set_sim_context_helpers(void *set_exec_ctx, void *set_task_cookie, void *get_task_cookie) {
g_sim_set_exec_ctx_fn = reinterpret_cast<SimSetExecCtxFn>(set_exec_ctx);
g_sim_set_task_cookie_fn = reinterpret_cast<SimSetTaskCookieFn>(set_task_cookie);
g_sim_get_task_cookie_fn = reinterpret_cast<SimGetTaskCookieFn>(get_task_cookie);
}

// Declare the original function (defined in aicore_executor.cpp with weak linkage)
void aicore_execute(__gm__ Runtime *runtime, int block_idx, CoreType core_type);

Expand Down
14 changes: 14 additions & 0 deletions src/a2a3/platform/sim/host/cpu_sim_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,23 @@

#pragma once

#include <cstdint>

/**
* Free all entries in the CPU simulation shared storage map and reset the
* pthread-backed per-thread execution context store.
* Called by DeviceRunner::run() at start and DeviceRunner::finalize() at end.
*/
void clear_cpu_sim_shared_storage();

#ifdef __cplusplus
extern "C" {
#endif

void pto_cpu_sim_set_execution_context(uint32_t block_idx, uint32_t subblock_id, uint32_t subblock_dim);
void pto_cpu_sim_set_task_cookie(uint64_t task_cookie);
uint64_t platform_get_cpu_sim_task_cookie(uint32_t core_id, uint32_t reg_task_id);

#ifdef __cplusplus
}
#endif
13 changes: 13 additions & 0 deletions src/a2a3/platform/sim/host/device_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,19 @@ int DeviceRunner::ensure_binaries_loaded(
return -1;
}
LOG_INFO("DeviceRunner(sim): Loaded aicore_execute_wrapper from %s", aicore_so_path_.c_str());

// Pass sim context function pointers to the AICore SO so it doesn't
// need dlsym(RTLD_DEFAULT) — which fails when the host runtime SO
// is loaded with RTLD_LOCAL.
auto set_helpers =
reinterpret_cast<void (*)(void *, void *, void *)>(dlsym(aicore_so_handle_, "set_sim_context_helpers"));
if (set_helpers != nullptr) {
set_helpers(
reinterpret_cast<void *>(pto_cpu_sim_set_execution_context),
reinterpret_cast<void *>(pto_cpu_sim_set_task_cookie),
reinterpret_cast<void *>(platform_get_cpu_sim_task_cookie)
);
}
}

return 0;
Expand Down
26 changes: 12 additions & 14 deletions src/a5/platform/sim/aicore/inner_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,30 +195,28 @@ inline uint32_t get_physical_core_id() { return g_sim_physical_core_id; }
// CPU Simulation Context Hooks
// =============================================================================

// CPU_SIM_SET_EXECUTION_CONTEXT — set block/subblock context for CANN intrinsic emulation.
// Resolves pto_cpu_sim_set_execution_context (defined in cpu_sim_context.cpp) via dlsym.
// Function pointer storage — set by set_sim_context_helpers(), used by macros below.
using SimSetExecCtxFn = void (*)(uint32_t, uint32_t, uint32_t);
using SimSetTaskCookieFn = void (*)(uint64_t);
using SimGetTaskCookieFn = uint64_t (*)(uint32_t, uint32_t);

extern SimSetExecCtxFn g_sim_set_exec_ctx_fn;
extern SimSetTaskCookieFn g_sim_set_task_cookie_fn;
extern SimGetTaskCookieFn g_sim_get_task_cookie_fn;

inline void cpu_sim_set_execution_context(uint32_t block_idx, uint32_t subblock_id, uint32_t subblock_dim) {
using Fn = void (*)(uint32_t, uint32_t, uint32_t);
static auto fn = reinterpret_cast<Fn>(dlsym(RTLD_DEFAULT, "pto_cpu_sim_set_execution_context"));
if (fn != nullptr) fn(block_idx, subblock_id, subblock_dim);
if (g_sim_set_exec_ctx_fn != nullptr) g_sim_set_exec_ctx_fn(block_idx, subblock_id, subblock_dim);
}
#define CPU_SIM_SET_EXECUTION_CONTEXT(block_idx, subblock_id, subblock_dim) \
cpu_sim_set_execution_context(block_idx, subblock_id, subblock_dim)

// CPU_SIM_SET_TASK_COOKIE — set task cookie for simulation tracing.
// Resolves pto_cpu_sim_set_task_cookie (defined in cpu_sim_context.cpp) via dlsym.
inline void cpu_sim_set_task_cookie(uint64_t cookie) {
using Fn = void (*)(uint64_t);
static auto fn = reinterpret_cast<Fn>(dlsym(RTLD_DEFAULT, "pto_cpu_sim_set_task_cookie"));
if (fn != nullptr) fn(cookie);
if (g_sim_set_task_cookie_fn != nullptr) g_sim_set_task_cookie_fn(cookie);
}
#define CPU_SIM_SET_TASK_COOKIE(cookie) cpu_sim_set_task_cookie(cookie)

// platform_get_cpu_sim_task_cookie — resolve per-dispatch logical task identity.
inline uint64_t platform_get_cpu_sim_task_cookie(uint32_t core_id, uint32_t reg_task_id) {
using Fn = uint64_t (*)(uint32_t, uint32_t);
static auto fn = reinterpret_cast<Fn>(dlsym(RTLD_DEFAULT, "platform_get_cpu_sim_task_cookie"));
return (fn != nullptr) ? fn(core_id, reg_task_id) : 0;
return (g_sim_get_task_cookie_fn != nullptr) ? g_sim_get_task_cookie_fn(core_id, reg_task_id) : 0;
}

#endif // PLATFORM_A5SIM_AICORE_INNER_KERNEL_H_
12 changes: 12 additions & 0 deletions src/a5/platform/sim/aicore/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/

#include <cstdint>
#include "inner_kernel.h" // NOLINT(build/include_subdir)
#include "aicore/aicore.h"
#include "common/core_type.h"
#include "common/platform_config.h"
Expand All @@ -27,6 +28,17 @@ thread_local volatile uint8_t *g_sim_reg_base = nullptr;
// Thread-local simulated physical core ID (declared in inner_kernel.h)
thread_local uint32_t g_sim_physical_core_id = 0;

// Sim context function pointers — set by DeviceRunner after dlopen.
SimSetExecCtxFn g_sim_set_exec_ctx_fn = nullptr;
SimSetTaskCookieFn g_sim_set_task_cookie_fn = nullptr;
SimGetTaskCookieFn g_sim_get_task_cookie_fn = nullptr;

extern "C" void set_sim_context_helpers(void *set_exec_ctx, void *set_task_cookie, void *get_task_cookie) {
g_sim_set_exec_ctx_fn = reinterpret_cast<SimSetExecCtxFn>(set_exec_ctx);
g_sim_set_task_cookie_fn = reinterpret_cast<SimSetTaskCookieFn>(set_task_cookie);
g_sim_get_task_cookie_fn = reinterpret_cast<SimGetTaskCookieFn>(get_task_cookie);
}

// Declare the original function (defined in aicore_executor.cpp with weak linkage)
void aicore_execute(__gm__ Runtime *runtime, int block_idx, CoreType core_type);

Expand Down
14 changes: 14 additions & 0 deletions src/a5/platform/sim/host/cpu_sim_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,23 @@

#pragma once

#include <cstdint>

/**
* Free all entries in the CPU simulation shared storage map and reset the
* pthread-backed per-thread execution context store.
* Called by DeviceRunner::run() at start and DeviceRunner::finalize() at end.
*/
void clear_cpu_sim_shared_storage();

#ifdef __cplusplus
extern "C" {
#endif

void pto_cpu_sim_set_execution_context(uint32_t block_idx, uint32_t subblock_id, uint32_t subblock_dim);
void pto_cpu_sim_set_task_cookie(uint64_t task_cookie);
uint64_t platform_get_cpu_sim_task_cookie(uint32_t core_id, uint32_t reg_task_id);

#ifdef __cplusplus
}
#endif
13 changes: 13 additions & 0 deletions src/a5/platform/sim/host/device_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,19 @@ int DeviceRunner::ensure_binaries_loaded(
return -1;
}
LOG_INFO("DeviceRunner(sim): Loaded aicore_execute_wrapper from %s", aicore_so_path_.c_str());

// Pass sim context function pointers to the AICore SO so it doesn't
// need dlsym(RTLD_DEFAULT) — which fails when the host runtime SO
// is loaded with RTLD_LOCAL.
auto set_helpers =
reinterpret_cast<void (*)(void *, void *, void *)>(dlsym(aicore_so_handle_, "set_sim_context_helpers"));
if (set_helpers != nullptr) {
set_helpers(
reinterpret_cast<void *>(pto_cpu_sim_set_execution_context),
reinterpret_cast<void *>(pto_cpu_sim_set_task_cookie),
reinterpret_cast<void *>(platform_get_cpu_sim_task_cookie)
);
}
}

return 0;
Expand Down
Loading