From e00188949d2ea96d68547a174a4971fd6bd5e8b9 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 12 May 2026 19:37:49 +0200 Subject: [PATCH] feat(autoheal): Add autoheal callback --- README.md | 1 + src/mria_autoheal.erl | 8 +++++--- src/mria_config.erl | 1 + test/mria_autoheal_SUITE.erl | 3 ++- test/mria_ct.erl | 7 +++++++ 5 files changed, 16 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5eed5a7..0021039 100644 --- a/README.md +++ b/README.md @@ -171,5 +171,6 @@ They can be registered using `mria:register_callback/2` function. - `stop`: This callback is executed when the DB stops or restarts. - `start`: This callback is executed when the DB starts or restarts. +- `heal_partition`: This callback is executed after mria heals a network partition by rebooting the losing nodes Note that the DB restarts when the node joins the cluster. diff --git a/src/mria_autoheal.erl b/src/mria_autoheal.erl index 247c71c..b7c826d 100644 --- a/src/mria_autoheal.erl +++ b/src/mria_autoheal.erl @@ -1,5 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2019-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2019-2026 EMQ Technologies Co., Ltd. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -142,8 +142,10 @@ coordinator([Majority | _]) -> heal_partition([[_Majority]]) -> %% There are no partitions: ok; -heal_partition([_Majority|Minorities]) -> - reboot_minority(lists:append(Minorities)). +heal_partition([Majority|Minorities]) -> + Result = reboot_minority(lists:append(Minorities)), + mria_lib:exec_callback(heal_partition, {Majority, Minorities}), + Result. reboot_minority(Minority) -> ?tp(info, "Rebooting minority", #{nodes => Minority}), diff --git a/src/mria_config.erl b/src/mria_config.erl index c0771b7..51e3c57 100644 --- a/src/mria_config.erl +++ b/src/mria_config.erl @@ -73,6 +73,7 @@ -type callback() :: start | stop | {start | stop, mria_rlog:shard()} + | heal_partition | core_node_discovery | lb_custom_info | lb_custom_info_check. diff --git a/test/mria_autoheal_SUITE.erl b/test/mria_autoheal_SUITE.erl index 3c48b30..f0221d6 100644 --- a/test/mria_autoheal_SUITE.erl +++ b/test/mria_autoheal_SUITE.erl @@ -38,6 +38,7 @@ t_autoheal(Config) when is_list(Config) -> ?assertMatch({[N3], [N1, N2, N4]}, view(N3)), ?assertMatch({[N4], [N1, N2, N3]}, view(N4)), %% Wait for autoheal, it should happen automatically: + ?block_until(#{?snk_kind := mria_ct_heal_partition}), ?retry(1000, 20, begin ?assertMatch({Nodes, []}, view(N1)), @@ -209,7 +210,7 @@ prop_callbacks(Trace0) -> )) || N <- Minority], %% Check that ONLY the minority nodes have been restarted: - Restarted = lists:usort([Node || #{?snk_kind := mria_exec_callback, ?snk_meta := #{node := Node}} <- AfterHeal]), + Restarted = lists:usort([Node || #{?snk_kind := mria_exec_callback, type := stop, ?snk_meta := #{node := Node}} <- AfterHeal]), ?assertEqual(lists:sort(Minority), Restarted), true. diff --git a/test/mria_ct.erl b/test/mria_ct.erl index cf276a6..0ff3e85 100644 --- a/test/mria_ct.erl +++ b/test/mria_ct.erl @@ -80,6 +80,7 @@ cluster(Specs0, CommonEnv, ClusterOpts) -> , env => [ {mria, core_nodes, CoreNodes} , {mria, node_role, Role} , {mria, rlog_replica_reconnect_interval, 100} % For faster response times + , {mria, {callback, heal_partition}, fun heal_callback/1} , {gen_rpc, tcp_server_port, BaseGenRpcPort + Number} , {gen_rpc, client_config_per_node, {internal, GenRpcPorts}} | Env] @@ -273,6 +274,12 @@ get_txid() -> TID end. +heal_callback({Majority, Minority}) -> + ?tp(mria_ct_heal_partition, + #{ majority => Majority + , minority => Minority + }). + -if(?OTP_RELEASE >= 25). start_dist() -> ensure_epmd(),