From 9cee5cd7772bfb2a0dcdf972b528ce80d01b4778 Mon Sep 17 00:00:00 2001 From: Mykola Kobets Date: Mon, 16 Feb 2026 19:14:30 +0200 Subject: [PATCH 1/3] cm: launcher: wait for node instance status before running re(balancing) Signed-off-by: Mykola Kobets Reviewed-by: Oleksandr Grytsov Reviewed-by: Mykola Solianko --- src/core/cm/launcher/launcher.cpp | 5 +++- src/core/cm/launcher/node.cpp | 4 +++ src/core/cm/launcher/node.hpp | 16 ++++++++++- src/core/cm/launcher/nodemanager.cpp | 4 ++- src/core/cm/launcher/tests/launcher.cpp | 28 +++++++++++++++++++ .../tests/stubs/instancerunnerstub.hpp | 7 +++++ 6 files changed, 61 insertions(+), 3 deletions(-) diff --git a/src/core/cm/launcher/launcher.cpp b/src/core/cm/launcher/launcher.cpp index 06b90ff87..905de05ea 100644 --- a/src/core/cm/launcher/launcher.cpp +++ b/src/core/cm/launcher/launcher.cpp @@ -464,7 +464,7 @@ void Launcher::ProcessUpdate() void Launcher::WaitAllNodesConnected(UniqueLock& lock) { auto allNodesConnected = [this]() { - auto notConnected = [](const Node& node) { return !node.GetInfo().mIsConnected; }; + auto notConnected = [](const Node& node) { return !node.IsConnected(); }; return !mNodeManager.GetNodes().ContainsIf(notConnected) || !mIsRunning; }; @@ -615,6 +615,9 @@ Error Launcher::OnNodeInstancesStatusesReceived(const String& nodeID, const Arra } mProcessUpdatesCondVar.NotifyAll(); + // Node is not connected untill it receives instance statuses. + // So, we need to trigger notification for waiting nodes after we handled statuses. + mAllNodesConnectedCondVar.NotifyAll(); return ErrorEnum::eNone; } diff --git a/src/core/cm/launcher/node.cpp b/src/core/cm/launcher/node.cpp index 24faeb1ec..f4c77142e 100644 --- a/src/core/cm/launcher/node.cpp +++ b/src/core/cm/launcher/node.cpp @@ -189,6 +189,10 @@ bool Node::UpdateInfo(const UnitNodeInfo& info) mInfo = info; } + if (!info.mIsConnected) { + mIsNodeStatusReceived = false; + } + return nodeChanged; } diff --git a/src/core/cm/launcher/node.hpp b/src/core/cm/launcher/node.hpp index 0f8f51dbe..6ab7f8f69 100644 --- a/src/core/cm/launcher/node.hpp +++ b/src/core/cm/launcher/node.hpp @@ -149,6 +149,18 @@ class Node : public NodeItf { */ void UpdateConfig(); + /** + * Indicates whether node is connected. + * + * @return bool. + */ + bool IsConnected() const { return mInfo.mIsConnected && mIsNodeStatusReceived; } + + /** + * Notifies the node that its instance status has been received. + */ + void NotifyInstanceStatusReceived() { mIsNodeStatusReceived = true; } + private: // Returns CPU usage without Aos service instances. size_t GetSystemCPUUsage(const monitoring::NodeMonitoringData& monitoringData) const; @@ -165,7 +177,9 @@ class Node : public NodeItf { InstanceRunnerItf* mInstanceRunner {}; UnitNodeInfo mInfo {}; - bool mNeedBalancing {}; + bool mIsNodeStatusReceived {}; + + bool mNeedBalancing {}; size_t mTotalCPUUsage {}; size_t mTotalRAMUsage {}; diff --git a/src/core/cm/launcher/nodemanager.cpp b/src/core/cm/launcher/nodemanager.cpp index 9ca3aafdd..da51ced15 100644 --- a/src/core/cm/launcher/nodemanager.cpp +++ b/src/core/cm/launcher/nodemanager.cpp @@ -143,7 +143,9 @@ Error NodeManager::NotifyNodeStatusReceived(const String& nodeID) return AOS_ERROR_WRAP(Error(ErrorEnum::eNotFound, "node not found")); } - if (node->GetInfo().mIsConnected && node->GetInfo().mState == NodeStateEnum::eProvisioned) { + node->NotifyInstanceStatusReceived(); + + if (node->IsConnected() && node->GetInfo().mState == NodeStateEnum::eProvisioned) { if (mNodesExpectedToSendStatus.Remove(nodeID) != 0) { mStatusUpdateCondVar.NotifyAll(); } diff --git a/src/core/cm/launcher/tests/launcher.cpp b/src/core/cm/launcher/tests/launcher.cpp index 87587554b..6bc7ea576 100644 --- a/src/core/cm/launcher/tests/launcher.cpp +++ b/src/core/cm/launcher/tests/launcher.cpp @@ -682,6 +682,10 @@ TEST_F(CMLauncherTest, CacheInstances) ASSERT_TRUE(mLauncher.Start().IsNone()); + for (const auto& nodeID : {cNodeIDLocalSM, cNodeIDRemoteSM1, cNodeIDRemoteSM2}) { + mInstanceRunner.SendInitialStatuses(nodeID); + } + // Run instances 1 auto runRequest1 = std::make_unique>(); @@ -779,6 +783,10 @@ TEST_F(CMLauncherTest, Components) ASSERT_TRUE(mLauncher.Start().IsNone()); + for (const auto& nodeID : {cNodeIDLocalSM, cNodeIDRemoteSM1}) { + mInstanceRunner.SendInitialStatuses(nodeID); + } + auto instanceStatusListener = std::make_unique(); mLauncher.SubscribeListener(*instanceStatusListener); @@ -1475,6 +1483,10 @@ TEST_F(CMLauncherTest, Balancing) ASSERT_TRUE(mLauncher.Start().IsNone()); + for (const auto& nodeID : nodeIDs) { + mInstanceRunner.SendInitialStatuses(nodeID); + } + // Run instances auto runStatuses = std::make_unique>(); ASSERT_TRUE(mLauncher.RunInstances(testItem.mRunRequests, *runStatuses).IsNone()); @@ -1578,6 +1590,10 @@ TEST_F(CMLauncherTest, PlatformFiltering) ASSERT_TRUE(mLauncher.Start().IsNone()); + for (const auto& nodeID : {cNodeIDLocalSM, cNodeIDRemoteSM1, cNodeIDRemoteSM2}) { + mInstanceRunner.SendInitialStatuses(nodeID); + } + InstanceStatusListenerStub instanceStatusListener; mLauncher.SubscribeListener(instanceStatusListener); @@ -1672,6 +1688,8 @@ TEST_F(CMLauncherTest, ResendInstancesOnMismatchedNodeStatus) ASSERT_TRUE(mLauncher.Start().IsNone()); + mInstanceRunner.SendInitialStatuses(cNodeIDLocalSM); + InstanceStatusListenerStub instanceStatusListener; mLauncher.SubscribeListener(instanceStatusListener); @@ -1752,6 +1770,8 @@ TEST_F(CMLauncherTest, SubjectChanged) ASSERT_TRUE(mLauncher.Start().IsNone()); + mInstanceRunner.SendInitialStatuses(cNodeIDLocalSM); + // 1) Run a single instance with a single subject. auto runRequest = std::make_unique>(); runRequest->PushBack(CreateRunRequest(cService1, cSubject1, 50, 1)); @@ -1824,6 +1844,8 @@ TEST_F(CMLauncherTest, PrepareNetworkParamsFails) ASSERT_TRUE(mLauncher.Start().IsNone()); + mInstanceRunner.SendInitialStatuses(cNodeIDLocalSM); + // Run a single instance. auto runRequest = std::make_unique>(); runRequest->PushBack(CreateRunRequest(cService1, cSubject1, 50, 1)); @@ -1896,6 +1918,8 @@ TEST_F(CMLauncherTest, TestSentInstanceInfo) ASSERT_TRUE(mLauncher.Start().IsNone()); + mInstanceRunner.SendInitialStatuses(cNodeIDLocalSM); + InstanceStatusListenerStub instanceStatusListener; mLauncher.SubscribeListener(instanceStatusListener); @@ -1980,6 +2004,8 @@ TEST_F(CMLauncherTest, PreinstalledComponents) ASSERT_TRUE(mLauncher.Start().IsNone()); + mInstanceRunner.SendInitialStatuses(cNodeIDLocalSM); + InstanceStatusListenerStub instanceStatusListener; mLauncher.SubscribeListener(instanceStatusListener); @@ -2075,6 +2101,8 @@ TEST_F(CMLauncherTest, SetStatusOnStart) ASSERT_TRUE(mLauncher.Start().IsNone()); + mInstanceRunner.SendInitialStatuses(cNodeIDLocalSM); + // Verify that both instances are activating InstanceStatus expectedStatus1 = CreateInstanceStatus(CreateInstanceIdent(cService1, cSubject1, 0), cNodeIDLocalSM, cRunnerRunc, aos::InstanceStateEnum::eActivating, ErrorEnum::eNone, "1.0.0", false, manifestDigest.CStr()); diff --git a/src/core/cm/launcher/tests/stubs/instancerunnerstub.hpp b/src/core/cm/launcher/tests/stubs/instancerunnerstub.hpp index ac98b09d6..00b9b43d3 100644 --- a/src/core/cm/launcher/tests/stubs/instancerunnerstub.hpp +++ b/src/core/cm/launcher/tests/stubs/instancerunnerstub.hpp @@ -70,6 +70,13 @@ class InstanceRunnerStub : public InstanceRunnerItf { mPreinstalledComponents = preinstalledComponents; } + void SendInitialStatuses(const String& nodeID) + { + if (mStatusReceiver != nullptr) { + mStatusReceiver->OnNodeInstancesStatusesReceived(nodeID, Array()); + } + } + MOCK_METHOD(void, OnRunRequest, ()); // InstanceRunnerItf From 76a9812d68dcbf739a65f192b539f73b46e4233e Mon Sep 17 00:00:00 2001 From: Mykola Kobets Date: Tue, 17 Feb 2026 12:56:55 +0200 Subject: [PATCH 2/3] cm: launcher: ignore load SM data for active instances error Signed-off-by: Mykola Kobets Reviewed-by: Oleksandr Grytsov Reviewed-by: Mykola Solianko --- src/core/cm/launcher/launcher.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/cm/launcher/launcher.cpp b/src/core/cm/launcher/launcher.cpp index 905de05ea..56a905310 100644 --- a/src/core/cm/launcher/launcher.cpp +++ b/src/core/cm/launcher/launcher.cpp @@ -127,7 +127,7 @@ Error Launcher::Start() UpdateInstanceStatuses(); if (auto err = mBalancer.LoadSMDataForActiveInstances(); !err.IsNone()) { - return AOS_ERROR_WRAP(err); + LOG_ERR() << "Can't load SM data for active instances" << Log::Field(err); } if (auto err = mWorkerThread.Run([this](void*) { ProcessUpdate(); }); !err.IsNone()) { From 474faa3483e74255f5b571909013795631b1b5ec Mon Sep 17 00:00:00 2001 From: Mykola Kobets Date: Mon, 16 Feb 2026 19:26:57 +0200 Subject: [PATCH 3/3] cm: launcher: skip checking resources and runtimes on start Resource/Runtimes are not available on start, because of that rebalance is triggered on every start. Skip checking them when connection status has changed. Signed-off-by: Mykola Kobets Reviewed-by: Oleksandr Grytsov Reviewed-by: Mykola Solianko --- src/core/cm/launcher/node.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/core/cm/launcher/node.cpp b/src/core/cm/launcher/node.cpp index f4c77142e..d5bafe3e7 100644 --- a/src/core/cm/launcher/node.cpp +++ b/src/core/cm/launcher/node.cpp @@ -182,8 +182,16 @@ void Node::UpdateMonitoringData(const monitoring::NodeMonitoringData& monitoring bool Node::UpdateInfo(const UnitNodeInfo& info) { + // Skip checking resources and runtimes if connection status is changed. + if (mInfo.mIsConnected != info.mIsConnected) { + mInfo.mResources = info.mResources; + mInfo.mRuntimes = info.mRuntimes; + } + + // Skip connection status change. mInfo.mIsConnected = info.mIsConnected; + // Check if node info has changed. bool nodeChanged = mInfo != info; if (nodeChanged) { mInfo = info;