Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -171,5 +171,6 @@ They can be registered using `mria:register_callback/2` function.

- `stop`: This callback is executed when the DB stops or restarts.
- `start`: This callback is executed when the DB starts or restarts.
- `heal_partition`: This callback is executed after mria heals a network partition by rebooting the losing nodes

Note that the DB restarts when the node joins the cluster.
8 changes: 5 additions & 3 deletions src/mria_autoheal.erl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2019-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%% Copyright (c) 2019-2026 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -142,8 +142,10 @@ coordinator([Majority | _]) ->
heal_partition([[_Majority]]) ->
%% There are no partitions:
ok;
heal_partition([_Majority|Minorities]) ->
reboot_minority(lists:append(Minorities)).
heal_partition([Majority|Minorities]) ->
Result = reboot_minority(lists:append(Minorities)),
mria_lib:exec_callback(heal_partition, {Majority, Minorities}),
Comment thread
thalesmg marked this conversation as resolved.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: add a comment to doc that the callback result is discarded.

Copy link
Copy Markdown
Member Author

@ieQu1 ieQu1 May 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is pretty much the case with all mria callbacks. mria_lib prints warnings in the log if the callback crashes and always returns ok.

Result.

reboot_minority(Minority) ->
?tp(info, "Rebooting minority", #{nodes => Minority}),
Expand Down
1 change: 1 addition & 0 deletions src/mria_config.erl
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
-type callback() :: start
| stop
| {start | stop, mria_rlog:shard()}
| heal_partition
| core_node_discovery
| lb_custom_info
| lb_custom_info_check.
Expand Down
3 changes: 2 additions & 1 deletion test/mria_autoheal_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ t_autoheal(Config) when is_list(Config) ->
?assertMatch({[N3], [N1, N2, N4]}, view(N3)),
?assertMatch({[N4], [N1, N2, N3]}, view(N4)),
%% Wait for autoheal, it should happen automatically:
?block_until(#{?snk_kind := mria_ct_heal_partition}),
?retry(1000, 20,
begin
?assertMatch({Nodes, []}, view(N1)),
Expand Down Expand Up @@ -209,7 +210,7 @@ prop_callbacks(Trace0) ->
))
|| N <- Minority],
%% Check that ONLY the minority nodes have been restarted:
Restarted = lists:usort([Node || #{?snk_kind := mria_exec_callback, ?snk_meta := #{node := Node}} <- AfterHeal]),
Restarted = lists:usort([Node || #{?snk_kind := mria_exec_callback, type := stop, ?snk_meta := #{node := Node}} <- AfterHeal]),
?assertEqual(lists:sort(Minority),
Restarted),
true.
Expand Down
7 changes: 7 additions & 0 deletions test/mria_ct.erl
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ cluster(Specs0, CommonEnv, ClusterOpts) ->
, env => [ {mria, core_nodes, CoreNodes}
, {mria, node_role, Role}
, {mria, rlog_replica_reconnect_interval, 100} % For faster response times
, {mria, {callback, heal_partition}, fun heal_callback/1}
, {gen_rpc, tcp_server_port, BaseGenRpcPort + Number}
, {gen_rpc, client_config_per_node, {internal, GenRpcPorts}}
| Env]
Expand Down Expand Up @@ -273,6 +274,12 @@ get_txid() ->
TID
end.

heal_callback({Majority, Minority}) ->
?tp(mria_ct_heal_partition,
#{ majority => Majority
, minority => Minority
}).

-if(?OTP_RELEASE >= 25).
start_dist() ->
ensure_epmd(),
Expand Down