From 2290ff16e72647e1826b5ef037fafeaefd7df75c Mon Sep 17 00:00:00 2001 From: Liu Zhengyun Date: Tue, 20 Jan 2026 13:03:17 +0800 Subject: [PATCH 1/3] optimize model loading --- .../iotdb/ainode/core/inference/pool_controller.py | 9 +++++++++ .../iotdb/ainode/core/manager/inference_manager.py | 6 +----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/iotdb-core/ainode/iotdb/ainode/core/inference/pool_controller.py b/iotdb-core/ainode/iotdb/ainode/core/inference/pool_controller.py index 1eb07adfde44..7d8403318c36 100644 --- a/iotdb-core/ainode/iotdb/ainode/core/inference/pool_controller.py +++ b/iotdb-core/ainode/iotdb/ainode/core/inference/pool_controller.py @@ -443,6 +443,15 @@ def has_request_pools(self, model_id: str, device_id: torch.device = None) -> bo return device_id in self._request_pool_map[model_id] return True + def has_running_pools(self, model_id: str) -> bool: + """ + Check if there are running pools for the given model_id. + """ + for device_id, pool_group in self._request_pool_map[model_id].items(): + if pool_group.get_running_pool_count(): + return True + return False + def get_request_pools_group( self, model_id: str, device_id: torch.device ) -> Optional[PoolGroup]: diff --git a/iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py b/iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py index ebbb036a9dca..7b88a08a98b8 100644 --- a/iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py +++ b/iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py @@ -18,7 +18,6 @@ import threading import time -from typing import Dict import torch import torch.multiprocessing as mp @@ -69,9 +68,6 @@ class InferenceManager: def __init__(self): self._model_manager = ModelManager() self._backend = DeviceManager() - self._model_mem_usage_map: Dict[str, int] = ( - {} - ) # store model memory usage for each model self._result_queue = mp.Queue() self._result_wrapper_map = {} self._result_wrapper_lock = threading.RLock() @@ -214,7 +210,7 @@ def _run( output_length, ) - if self._pool_controller.has_request_pools(model_id=model_id): + if self._pool_controller.has_running_pools(model_id): infer_req = InferenceRequest( req_id=generate_req_id(), model_id=model_id, From b858ebfb670ea0e936ccbde9e5e00217d0613981 Mon Sep 17 00:00:00 2001 From: Liu Zhengyun Date: Tue, 20 Jan 2026 17:07:37 +0800 Subject: [PATCH 2/3] bug fix --- .../ainode/iotdb/ainode/core/inference/pool_controller.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/iotdb-core/ainode/iotdb/ainode/core/inference/pool_controller.py b/iotdb-core/ainode/iotdb/ainode/core/inference/pool_controller.py index 7d8403318c36..5a6db12edded 100644 --- a/iotdb-core/ainode/iotdb/ainode/core/inference/pool_controller.py +++ b/iotdb-core/ainode/iotdb/ainode/core/inference/pool_controller.py @@ -447,6 +447,8 @@ def has_running_pools(self, model_id: str) -> bool: """ Check if there are running pools for the given model_id. """ + if model_id not in self._request_pool_map: + return False for device_id, pool_group in self._request_pool_map[model_id].items(): if pool_group.get_running_pool_count(): return True From 8cd0d0526846a4ade5106573e4103d3e3338be50 Mon Sep 17 00:00:00 2001 From: Liu Zhengyun Date: Tue, 20 Jan 2026 17:57:37 +0800 Subject: [PATCH 3/3] fix exception --- .../ainode/iotdb/ainode/core/manager/inference_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py b/iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py index 7b88a08a98b8..180cc00ff492 100644 --- a/iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py +++ b/iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py @@ -203,11 +203,11 @@ def _run( ): raise NumericalRangeException( "output_length", + output_length, 1, AINodeDescriptor() .get_config() .get_ain_inference_max_output_length(), - output_length, ) if self._pool_controller.has_running_pools(model_id):