diff --git a/src/a2a3/platform/include/host/pto_runtime_c_api.h b/src/a2a3/platform/include/host/pto_runtime_c_api.h deleted file mode 100644 index 6dda41eba..000000000 --- a/src/a2a3/platform/include/host/pto_runtime_c_api.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) PyPTO Contributors. - * This program is free software, you can redistribute it and/or modify it under the terms and conditions of - * CANN Open Software License Agreement Version 2.0 (the "License"). - * Please refer to the License for details. You may not use this file except in compliance with the License. - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. - * See LICENSE in the root of the software repository for the full text of the License. - * ----------------------------------------------------------------------------------------------------------- - */ -/** - * PTO Runtime C API — Platform Header - * - * Declares all C-linkage functions exported by the host runtime .so: - * - * - Public API (resolved by ChipWorker via dlsym): - * get_runtime_size, set_device, run_runtime, finalize_device - * → see src/common/worker/pto_runtime_c_api.h for the canonical spec. - * - * - Internal API (called by orchestration code via function pointers - * or direct linking within the .so): - * record_tensor_pair - * - * Memory management: caller allocates a buffer of get_runtime_size() bytes - * and passes it to run_runtime(). Error codes: 0 = success, negative = error. - */ - -#ifndef SRC_A2A3_PLATFORM_INCLUDE_HOST_PTO_RUNTIME_C_API_H_ -#define SRC_A2A3_PLATFORM_INCLUDE_HOST_PTO_RUNTIME_C_API_H_ - -#include -#include - -#include "callable.h" // NOLINT(build/include_subdir) -#include "common/compile_strategy.h" -#include "task_args.h" // NOLINT(build/include_subdir) - -#ifdef __cplusplus -extern "C" { -#endif - -typedef void *RuntimeHandle; - -/* =========================================================================== - * Public API (resolved by ChipWorker via dlsym) - * =========================================================================== */ - -/** Return sizeof(Runtime) for caller buffer allocation. */ -size_t get_runtime_size(void); - -/** Set the target device. Must be called before the first run_runtime(). */ -int set_device(int device_id); - -/** - * Build the task graph, execute on device, copy results back, and clean up. - * - * Combines the former init_runtime + enable_runtime_profiling + - * launch_runtime + finalize_runtime into a single call. - */ -int run_runtime( - RuntimeHandle runtime, const void *callable, const void *args, int block_dim, int aicpu_thread_num, - int orch_thread_num, int device_id, const uint8_t *aicpu_binary, size_t aicpu_size, const uint8_t *aicore_binary, - size_t aicore_size, int enable_profiling -); - -/** - * Finalize the DeviceRunner, releasing all device resources. - * Must be called before dlclose() to avoid static destruction order issues. - */ -int finalize_device(void); - -/* =========================================================================== - * Internal API (used by orchestration code within the .so) - * =========================================================================== */ - -/** - * Record a host-device tensor pair for copy-back during finalize. - * Called by orchestration to track memory mappings. - */ -void record_tensor_pair(RuntimeHandle runtime, void *host_ptr, void *dev_ptr, size_t size); - -#ifdef __cplusplus -} -#endif - -#endif // SRC_A2A3_PLATFORM_INCLUDE_HOST_PTO_RUNTIME_C_API_H_ diff --git a/src/a2a3/platform/onboard/host/CMakeLists.txt b/src/a2a3/platform/onboard/host/CMakeLists.txt index 0edd624fc..12c86f4fd 100644 --- a/src/a2a3/platform/onboard/host/CMakeLists.txt +++ b/src/a2a3/platform/onboard/host/CMakeLists.txt @@ -1,3 +1,11 @@ +# Copyright (c) PyPTO Contributors. +# This program is free software, you can redistribute it and/or modify it under the terms and conditions of +# CANN Open Software License Agreement Version 2.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ----------------------------------------------------------------------------------------------------------- # Build Host runtime using host/* + common/* + custom includes and sources (for `Runtime`) # CMAKE_C_COMPILER and CMAKE_CXX_COMPILER are set when calling cmake # CUSTOM_INCLUDE_DIRS and CUSTOM_SOURCE_DIRS are set when calling cmake @@ -10,6 +18,7 @@ project(host_runtime LANGUAGES C CXX) set(CMAKE_CUSTOM_INCLUDE_DIRS "") list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../include") list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/task_interface") +list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/worker") if(DEFINED CUSTOM_INCLUDE_DIRS) foreach(INC_DIR ${CUSTOM_INCLUDE_DIRS}) list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${INC_DIR}") diff --git a/src/a2a3/platform/onboard/host/pto_runtime_c_api.cpp b/src/a2a3/platform/onboard/host/pto_runtime_c_api.cpp index e8420171b..6fd750c85 100644 --- a/src/a2a3/platform/onboard/host/pto_runtime_c_api.cpp +++ b/src/a2a3/platform/onboard/host/pto_runtime_c_api.cpp @@ -15,7 +15,10 @@ * src/common/worker/pto_runtime_c_api.h. Uses real Ascend device execution. */ -#include "host/pto_runtime_c_api.h" +#include "pto_runtime_c_api.h" + +#include "callable.h" +#include "task_args.h" #include diff --git a/src/a2a3/platform/sim/host/CMakeLists.txt b/src/a2a3/platform/sim/host/CMakeLists.txt index 161781b62..af138c945 100644 --- a/src/a2a3/platform/sim/host/CMakeLists.txt +++ b/src/a2a3/platform/sim/host/CMakeLists.txt @@ -23,6 +23,7 @@ project(host_runtime_sim LANGUAGES C CXX) set(CMAKE_CUSTOM_INCLUDE_DIRS "") list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../include") list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/task_interface") +list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/worker") if(DEFINED CUSTOM_INCLUDE_DIRS) foreach(INC_DIR ${CUSTOM_INCLUDE_DIRS}) diff --git a/src/a2a3/platform/sim/host/pto_runtime_c_api.cpp b/src/a2a3/platform/sim/host/pto_runtime_c_api.cpp index b338d0629..dc3c2e3be 100644 --- a/src/a2a3/platform/sim/host/pto_runtime_c_api.cpp +++ b/src/a2a3/platform/sim/host/pto_runtime_c_api.cpp @@ -15,7 +15,10 @@ * src/common/worker/pto_runtime_c_api.h. Uses thread-based simulation. */ -#include "host/pto_runtime_c_api.h" +#include "pto_runtime_c_api.h" + +#include "callable.h" +#include "task_args.h" #include #include diff --git a/src/a5/platform/include/host/pto_runtime_c_api.h b/src/a5/platform/include/host/pto_runtime_c_api.h deleted file mode 100644 index b3346ffbe..000000000 --- a/src/a5/platform/include/host/pto_runtime_c_api.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) PyPTO Contributors. - * This program is free software, you can redistribute it and/or modify it under the terms and conditions of - * CANN Open Software License Agreement Version 2.0 (the "License"). - * Please refer to the License for details. You may not use this file except in compliance with the License. - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. - * See LICENSE in the root of the software repository for the full text of the License. - * ----------------------------------------------------------------------------------------------------------- - */ -/** - * PTO Runtime C API — Platform Header - * - * Declares all C-linkage functions exported by the host runtime .so: - * - * - Public API (resolved by ChipWorker via dlsym): - * get_runtime_size, set_device, run_runtime, finalize_device - * → see src/common/worker/pto_runtime_c_api.h for the canonical spec. - * - * - Internal API (called by orchestration code via function pointers - * or direct linking within the .so): - * record_tensor_pair - * - * Memory management: caller allocates a buffer of get_runtime_size() bytes - * and passes it to run_runtime(). Error codes: 0 = success, negative = error. - */ - -#ifndef SRC_A5_PLATFORM_INCLUDE_HOST_PTO_RUNTIME_C_API_H_ -#define SRC_A5_PLATFORM_INCLUDE_HOST_PTO_RUNTIME_C_API_H_ - -#include -#include - -#include "callable.h" // NOLINT(build/include_subdir) -#include "common/compile_strategy.h" -#include "task_args.h" // NOLINT(build/include_subdir) - -#ifdef __cplusplus -extern "C" { -#endif - -typedef void *RuntimeHandle; - -/* =========================================================================== - * Public API (resolved by ChipWorker via dlsym) - * =========================================================================== */ - -/** Return sizeof(Runtime) for caller buffer allocation. */ -size_t get_runtime_size(void); - -/** Set the target device. Must be called before the first run_runtime(). */ -int set_device(int device_id); - -/** - * Build the task graph, execute on device, copy results back, and clean up. - * - * Combines the former init_runtime + enable_runtime_profiling + - * launch_runtime + finalize_runtime into a single call. - */ -int run_runtime( - RuntimeHandle runtime, const void *callable, const void *args, int block_dim, int aicpu_thread_num, - int orch_thread_num, int device_id, const uint8_t *aicpu_binary, size_t aicpu_size, const uint8_t *aicore_binary, - size_t aicore_size, int enable_profiling -); - -/** - * Finalize the DeviceRunner, releasing all device resources. - * Must be called before dlclose() to avoid static destruction order issues. - */ -int finalize_device(void); - -/* =========================================================================== - * Internal API (used by orchestration code within the .so) - * =========================================================================== */ - -/** - * Record a host-device tensor pair for copy-back during finalize. - * Called by orchestration to track memory mappings. - */ -void record_tensor_pair(RuntimeHandle runtime, void *host_ptr, void *dev_ptr, size_t size); - -#ifdef __cplusplus -} -#endif - -#endif // SRC_A5_PLATFORM_INCLUDE_HOST_PTO_RUNTIME_C_API_H_ diff --git a/src/a5/platform/onboard/host/CMakeLists.txt b/src/a5/platform/onboard/host/CMakeLists.txt index 0edd624fc..12c86f4fd 100644 --- a/src/a5/platform/onboard/host/CMakeLists.txt +++ b/src/a5/platform/onboard/host/CMakeLists.txt @@ -1,3 +1,11 @@ +# Copyright (c) PyPTO Contributors. +# This program is free software, you can redistribute it and/or modify it under the terms and conditions of +# CANN Open Software License Agreement Version 2.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ----------------------------------------------------------------------------------------------------------- # Build Host runtime using host/* + common/* + custom includes and sources (for `Runtime`) # CMAKE_C_COMPILER and CMAKE_CXX_COMPILER are set when calling cmake # CUSTOM_INCLUDE_DIRS and CUSTOM_SOURCE_DIRS are set when calling cmake @@ -10,6 +18,7 @@ project(host_runtime LANGUAGES C CXX) set(CMAKE_CUSTOM_INCLUDE_DIRS "") list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../include") list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/task_interface") +list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/worker") if(DEFINED CUSTOM_INCLUDE_DIRS) foreach(INC_DIR ${CUSTOM_INCLUDE_DIRS}) list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${INC_DIR}") diff --git a/src/a5/platform/onboard/host/pto_runtime_c_api.cpp b/src/a5/platform/onboard/host/pto_runtime_c_api.cpp index e8420171b..6fd750c85 100644 --- a/src/a5/platform/onboard/host/pto_runtime_c_api.cpp +++ b/src/a5/platform/onboard/host/pto_runtime_c_api.cpp @@ -15,7 +15,10 @@ * src/common/worker/pto_runtime_c_api.h. Uses real Ascend device execution. */ -#include "host/pto_runtime_c_api.h" +#include "pto_runtime_c_api.h" + +#include "callable.h" +#include "task_args.h" #include diff --git a/src/a5/platform/sim/host/CMakeLists.txt b/src/a5/platform/sim/host/CMakeLists.txt index 2f8965fee..f56769897 100644 --- a/src/a5/platform/sim/host/CMakeLists.txt +++ b/src/a5/platform/sim/host/CMakeLists.txt @@ -23,6 +23,7 @@ project(host_runtime_sim LANGUAGES C CXX) set(CMAKE_CUSTOM_INCLUDE_DIRS "") list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../include") list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/task_interface") +list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/worker") if(DEFINED CUSTOM_INCLUDE_DIRS) foreach(INC_DIR ${CUSTOM_INCLUDE_DIRS}) diff --git a/src/a5/platform/sim/host/pto_runtime_c_api.cpp b/src/a5/platform/sim/host/pto_runtime_c_api.cpp index b338d0629..dc3c2e3be 100644 --- a/src/a5/platform/sim/host/pto_runtime_c_api.cpp +++ b/src/a5/platform/sim/host/pto_runtime_c_api.cpp @@ -15,7 +15,10 @@ * src/common/worker/pto_runtime_c_api.h. Uses thread-based simulation. */ -#include "host/pto_runtime_c_api.h" +#include "pto_runtime_c_api.h" + +#include "callable.h" +#include "task_args.h" #include #include diff --git a/src/common/worker/pto_runtime_c_api.h b/src/common/worker/pto_runtime_c_api.h index f1f9846aa..e163cc529 100644 --- a/src/common/worker/pto_runtime_c_api.h +++ b/src/common/worker/pto_runtime_c_api.h @@ -8,18 +8,18 @@ * See LICENSE in the root of the software repository for the full text of the License. * ----------------------------------------------------------------------------------------------------------- */ - /** - * PTO Runtime Public C API + * PTO Runtime C API — canonical header + * + * Declares all C-linkage functions exported by the host runtime .so. + * Both the ChipWorker (consumer, resolves public symbols via dlsym) and the + * platform implementations (producers, define all symbols) include this file. * - * Declares the symbols that ChipWorker resolves via dlsym from the host - * runtime shared library. Each platform (sim / onboard × a2a3 / a5) - * provides its own implementation. + * Public API — resolved by ChipWorker via dlsym: + * get_runtime_size, set_device, run_runtime, finalize_device * - * Internal functions used by orchestration code (device_malloc, device_free, - * copy_to_device, copy_from_device, upload_kernel_binary_wrapper, - * remove_kernel_binary_wrapper) are NOT part of this public interface — - * they are passed via Runtime.host_api function pointers within the .so. + * Memory management: caller allocates a buffer of get_runtime_size() bytes + * and passes it to run_runtime(). Error codes: 0 = success, negative = error. */ #ifndef SRC_COMMON_WORKER_PTO_RUNTIME_C_API_H_ @@ -34,40 +34,20 @@ extern "C" { typedef void *RuntimeHandle; -/** - * Return the size (in bytes) of the Runtime structure. - * The caller allocates a buffer of this size and passes it to run_runtime(). - */ +/* =========================================================================== + * Public API (resolved by ChipWorker via dlsym) + * =========================================================================== */ + +/** Return sizeof(Runtime) for caller buffer allocation. */ size_t get_runtime_size(void); -/** - * Set the target device for subsequent operations. - * Must be called before the first run_runtime() call. - * - * @param device_id Logical device identifier - * @return 0 on success, negative on error - */ +/** Set the target device. Must be called before the first run_runtime(). */ int set_device(int device_id); /** * Build the task graph, execute on device, copy results back, and clean up. * - * Combines the former init_runtime + enable_runtime_profiling + - * launch_runtime + finalize_runtime into a single call. - * - * @param runtime Caller-allocated buffer (size from get_runtime_size()) - * @param callable Opaque ChipCallable pointer (orchestration + kernel binaries) - * @param args Opaque ChipStorageTaskArgs pointer (tensor/scalar arguments) - * @param block_dim Number of AICore blocks - * @param aicpu_thread_num Number of AICPU scheduler threads - * @param orch_thread_num Number of orchestrator threads - * @param device_id Target device - * @param aicpu_binary AICPU executor binary blob - * @param aicpu_size Size of AICPU binary - * @param aicore_binary AICore executor binary blob - * @param aicore_size Size of AICore binary - * @param enable_profiling 1 to enable profiling, 0 to disable - * @return 0 on success, negative on error + * Combines init_runtime + launch_runtime + finalize_runtime into one call. */ int run_runtime( RuntimeHandle runtime, const void *callable, const void *args, int block_dim, int aicpu_thread_num, @@ -76,13 +56,8 @@ int run_runtime( ); /** - * Finalize the DeviceRunner, releasing all device resources. - * - * Must be called before dlclose() to avoid static destruction order segfaults. - * After this call, the next set_device() + run_runtime() cycle will - * re-initialize from scratch. - * - * @return 0 on success, negative on error + * Release all device resources. + * Must be called before dlclose() to avoid static destruction order issues. */ int finalize_device(void);