Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions platformio.ini
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,17 @@ build_src_filter =
+<../src/Utils.cpp>
lib_deps =
google/googletest @ 1.17.0

; Standalone unit tests for reliability changes (issues #1775, #1489).
; These tests have no Mesh-stack dependencies and compile without the
; Arduino framework or crypto libraries.
[env:native_reliability]
platform = native
build_flags = -std=c++17
test_filter =
test_path_gating
test_flood_ack
build_src_filter =
-<*>
lib_deps =
google/googletest @ 1.17.0
13 changes: 9 additions & 4 deletions src/Mesh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,11 +457,16 @@ Packet* Mesh::createPathReturn(const uint8_t* dest_hash, const uint8_t* secret,
if (extra_len > 0) {
data[data_len++] = extra_type;
memcpy(&data[data_len], extra, extra_len); data_len += extra_len;
} else {
// append a timestamp, or random blob (to make packet_hash unique)
data[data_len++] = 0xFF; // dummy payload type
getRNG()->random(&data[data_len], 4); data_len += 4;
}
// Always append a random nonce regardless of extra presence.
// AES-ECB is deterministic: without a nonce, identical plaintext produces
// identical ciphertext → identical calculatePacketHash() → hasSeen() treats
// every retransmission of the same PATH+ACK as a duplicate and drops it.
// The nonce is ignored by the receiver (it reads only the typed extra fields).
// This mirrors the original no-extra handling above (issue: nonce was missing
// for the extra_len > 0 branch, breaking flood-ACK retry reliability).
data[data_len++] = 0xFF; // dummy payload type (ignored by receiver)
getRNG()->random(&data[data_len], 4); data_len += 4;

len += Utils::encryptThenMAC(secret, &packet->payload[len], data, data_len);
}
Expand Down
76 changes: 71 additions & 5 deletions src/helpers/BaseChatMesh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,24 @@
#define TXT_ACK_DELAY 200
#endif

// Path-stickiness window (seconds): once a path is stored, ANY replacement is
// blocked for this long. No hop-count comparison is performed — fewer hops is
// NOT automatically better because rxdelay-based SNR ordering means high-SNR
// long-hop paths arrive first, then low-SNR short-hop paths follow. A blanket
// first-arrived lock avoids that bias entirely.
// 10 s is long enough to suppress multipath duplicates (50-200 ms apart) yet
// short enough that a genuinely changed topology gets a fresh path quickly.
// Addresses issue #1775.
#ifndef PATH_STICKINESS_WINDOW_SECS
#define PATH_STICKINESS_WINDOW_SECS 10u
#endif

// Number of independent flood-ACK transmissions at increasing delays.
// Addresses issue #1489 (single flood ACK lost in noisy RF environments).
#ifndef FLOOD_ACK_RETRY_COUNT
#define FLOOD_ACK_RETRY_COUNT 3
#endif

void BaseChatMesh::sendFloodScoped(const ContactInfo& recipient, mesh::Packet* pkt, uint32_t delay_millis) {
sendFlood(pkt, delay_millis);
}
Expand Down Expand Up @@ -40,8 +58,14 @@ mesh::Packet* BaseChatMesh::createSelfAdvert(const char* name, double lat, doubl

void BaseChatMesh::sendAckTo(const ContactInfo& dest, uint32_t ack_hash) {
if (dest.out_path_len == OUT_PATH_UNKNOWN) {
mesh::Packet* ack = createAck(ack_hash);
if (ack) sendFloodScoped(dest, ack, TXT_ACK_DELAY);
// Send flood ACK FLOOD_ACK_RETRY_COUNT times at increasing delays (issue #1489).
// Each transmission is an independent RF attempt; duplicates are discarded by
// hasSeen() at every receiving node so only the first successful copy is processed.
static const uint32_t flood_ack_delays[FLOOD_ACK_RETRY_COUNT] = { TXT_ACK_DELAY, 800, 2000 };
for (int i = 0; i < FLOOD_ACK_RETRY_COUNT; i++) {
mesh::Packet* ack = createAck(ack_hash);
if (ack) sendFloodScoped(dest, ack, flood_ack_delays[i]);
}
} else {
uint32_t d = TXT_ACK_DELAY;
if (getExtraAckTransmitCount() > 0) {
Expand Down Expand Up @@ -226,6 +250,12 @@ void BaseChatMesh::onPeerDataRecv(mesh::Packet* packet, uint8_t type, int sender
mesh::Packet* path = createPathReturn(from.id, secret, packet->path, packet->path_len,
PAYLOAD_TYPE_ACK, (uint8_t *) &ack_hash, 4);
if (path) sendFloodScoped(from, path, TXT_ACK_DELAY);
// Also send a standalone flood ACK as backup: the PATH+ACK above is a single packet
// whose ciphertext (AES-ECB) was deterministic before the nonce fix. Even with the
// nonce, RF loss can drop the PATH+ACK entirely. The standalone ACK travels
// independently and ensures the sender cancels its retry timeout even if path
// establishment fails (issue #1489).
sendAckTo(from, ack_hash);
} else {
sendAckTo(from, ack_hash);
}
Expand Down Expand Up @@ -253,6 +283,8 @@ void BaseChatMesh::onPeerDataRecv(mesh::Packet* packet, uint8_t type, int sender
mesh::Packet* path = createPathReturn(from.id, secret, packet->path, packet->path_len,
PAYLOAD_TYPE_ACK, (uint8_t *) &ack_hash, 4);
if (path) sendFloodScoped(from, path, TXT_ACK_DELAY);
// Standalone flood ACK backup — same rationale as TXT_TYPE_PLAIN above
sendAckTo(from, ack_hash);
} else {
sendAckTo(from, ack_hash);
}
Expand Down Expand Up @@ -302,10 +334,37 @@ bool BaseChatMesh::onPeerPathRecv(mesh::Packet* packet, int sender_idx, const ui
}

bool BaseChatMesh::onContactPathRecv(ContactInfo& from, uint8_t* in_path, uint8_t in_path_len, uint8_t* out_path, uint8_t out_path_len, uint8_t extra_type, uint8_t* extra, uint8_t extra_len) {
// NOTE: default impl, we just replace the current 'out_path' regardless, whenever sender sends us a new out_path.
// FUTURE: could store multiple out_paths per contact, and try to find which is the 'best'(?)
uint32_t now = getRTCClock()->getCurrentTime();

// Path-stickiness lock (issue #1775): block ANY replacement of a freshly
// stored path for PATH_STICKINESS_WINDOW_SECS seconds. No hop-count
// comparison — fewer hops is not automatically better because rxdelay-based
// SNR ordering propagates high-SNR (often longer-hop) paths first; comparing
// hops would therefore bias toward the wrong path. A blanket first-arrived
// lock is simpler and avoids that bias.
// The embedded ACK/response is always processed regardless of the lock.
if (from.out_path_len != OUT_PATH_UNKNOWN && from.out_path_timestamp != 0) {
uint32_t age_secs = now - from.out_path_timestamp;
if (age_secs < PATH_STICKINESS_WINDOW_SECS) {
// Path is still within the lock window — keep it, but still process any
// embedded ACK or response so the sender's retry timer is cancelled.
onContactPathUpdated(from);
if (extra_type == PAYLOAD_TYPE_ACK && extra_len >= 4) {
if (processAck(extra) != NULL) {
txt_send_timeout = 0;
}
} else if (extra_type == PAYLOAD_TYPE_RESPONSE && extra_len > 0) {
onContactResponse(from, extra, extra_len);
}
return true;
}
}

// Accept the new path.
from.out_path_len = mesh::Packet::copyPath(from.out_path, out_path, out_path_len); // store a copy of path, for sendDirect()
from.lastmod = getRTCClock()->getCurrentTime();
from.out_path_timestamp = now; // record when this path was accepted
from.path_ack_count = 0; // reset delivery counter for the new path
from.lastmod = now;

onContactPathUpdated(from);

Expand All @@ -326,6 +385,13 @@ void BaseChatMesh::onAckRecv(mesh::Packet* packet, uint32_t ack_crc) {
txt_send_timeout = 0; // matched one we're waiting for, cancel timeout timer
packet->markDoNotRetransmit(); // ACK was for this node, so don't retransmit

// Track per-contact direct-path delivery success (issue #1775).
// Incremented here so the path-stickiness logic can eventually factor in
// proven delivery when making replacement decisions.
if (!packet->isRouteFlood() && from->out_path_len != OUT_PATH_UNKNOWN) {
if (from->path_ack_count < 255) from->path_ack_count++;
}

if (packet->isRouteFlood() && from->out_path_len != OUT_PATH_UNKNOWN) {
// we have direct path, but other node is still sending flood, so maybe they didn't receive reciprocal path properly(?)
handleReturnPathRetry(*from, packet->path, packet->path_len);
Expand Down
2 changes: 2 additions & 0 deletions src/helpers/ContactInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ struct ContactInfo {
uint32_t lastmod; // by OUR clock
int32_t gps_lat, gps_lon; // 6 dec places
uint32_t sync_since;
uint32_t out_path_timestamp; // RTC time (secs) when out_path was last accepted; 0 = never set
uint8_t path_ack_count; // saturating count of direct-path ACKs received (delivery tracking)

const uint8_t* getSharedSecret(const mesh::LocalIdentity& self_id) const {
if (!shared_secret_valid) {
Expand Down
212 changes: 212 additions & 0 deletions test/test_flood_ack/test_flood_ack.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
/**
* test_flood_ack.cpp
*
* Unit tests for the flood-ACK retry scheduling introduced in
* BaseChatMesh::sendAckTo (fixes issue #1489).
*
* When a contact has no known direct path (out_path_len == OUT_PATH_UNKNOWN)
* the implementation must schedule exactly FLOOD_ACK_RETRY_COUNT independent
* flood transmissions at the delays [200ms, 800ms, 2000ms]. When a direct
* path is known the pre-existing single-direct-ACK behaviour is unchanged.
*
* Deduplication on the receiver side is handled by the pre-existing
* MeshTables::hasSeen() mechanism; these tests confirm only the sender-side
* scheduling contract.
*
* See docs/reliability_changes.md for the full rationale.
*/

#include <gtest/gtest.h>
#include <stdint.h>
#include <string.h>

// ---- Constants mirrored from BaseChatMesh.cpp --------------------------------
#define OUT_PATH_UNKNOWN 0xFF
#define TXT_ACK_DELAY 200u
#define FLOOD_ACK_RETRY_COUNT 3

// Expected delay schedule — must match the static array in sendAckTo.
static const uint32_t EXPECTED_FLOOD_DELAYS[FLOOD_ACK_RETRY_COUNT] = {
TXT_ACK_DELAY, // 200 ms
800u,
2000u,
};

// ---- Minimal send recorder ---------------------------------------------------

struct ScheduledPacket {
bool is_flood;
uint32_t delay_ms;
};

static ScheduledPacket sched_buf[16];
static int sched_count;

static void reset_sched() {
sched_count = 0;
memset(sched_buf, 0, sizeof(sched_buf));
}

static void mock_sendFloodScoped(uint32_t delay) {
if (sched_count < 16) sched_buf[sched_count++] = { true, delay };
}

static void mock_sendDirect(uint32_t delay) {
if (sched_count < 16) sched_buf[sched_count++] = { false, delay };
}

// ---- sendAckTo logic transcription ------------------------------------------
// This is a direct copy of the changed sendAckTo body (excluding the Mesh
// packet-creation calls that are irrelevant to scheduling counts and delays).
// If the implementation diverges from this transcription the test author must
// update both, which serves as an explicit change-review gate.
static void testSendAckTo(uint8_t out_path_len, uint32_t /*ack_hash*/) {
if (out_path_len == OUT_PATH_UNKNOWN) {
static const uint32_t flood_ack_delays[FLOOD_ACK_RETRY_COUNT] = {
TXT_ACK_DELAY, 800, 2000
};
for (int i = 0; i < FLOOD_ACK_RETRY_COUNT; i++) {
mock_sendFloodScoped(flood_ack_delays[i]);
}
} else {
// Direct path: single ACK at TXT_ACK_DELAY (getExtraAckTransmitCount()==0 default)
mock_sendDirect(TXT_ACK_DELAY);
}
}

// Flood path (out_path_len == OUT_PATH_UNKNOWN)

TEST(FloodAck, UnknownPath_SendsExactlyThreeTimes) {
reset_sched();
testSendAckTo(OUT_PATH_UNKNOWN, 0xDEADBEEF);
EXPECT_EQ(FLOOD_ACK_RETRY_COUNT, sched_count);
}

TEST(FloodAck, UnknownPath_AllSendsAreFlood) {
reset_sched();
testSendAckTo(OUT_PATH_UNKNOWN, 0xDEADBEEF);
for (int i = 0; i < sched_count; i++) {
EXPECT_TRUE(sched_buf[i].is_flood)
<< "Packet " << i << " should be a flood send";
}
}

TEST(FloodAck, UnknownPath_DelaysMatch200_800_2000ms) {
reset_sched();
testSendAckTo(OUT_PATH_UNKNOWN, 0xDEADBEEF);
ASSERT_EQ(FLOOD_ACK_RETRY_COUNT, sched_count);
for (int i = 0; i < FLOOD_ACK_RETRY_COUNT; i++) {
EXPECT_EQ(EXPECTED_FLOOD_DELAYS[i], sched_buf[i].delay_ms)
<< "Delay " << i << " should be " << EXPECTED_FLOOD_DELAYS[i] << " ms";
}
}

TEST(FloodAck, UnknownPath_DelaysAreStrictlyIncreasing) {
reset_sched();
testSendAckTo(OUT_PATH_UNKNOWN, 0xDEADBEEF);
for (int i = 1; i < sched_count; i++) {
EXPECT_GT(sched_buf[i].delay_ms, sched_buf[i - 1].delay_ms)
<< "Delay " << i << " must be > delay " << (i - 1)
<< " to ensure temporally spread retries";
}
}

TEST(FloodAck, UnknownPath_FirstDelayIsAckDelay) {
reset_sched();
testSendAckTo(OUT_PATH_UNKNOWN, 0xDEADBEEF);
ASSERT_GE(sched_count, 1);
EXPECT_EQ(TXT_ACK_DELAY, sched_buf[0].delay_ms);
}

// Direct path (out_path_len != OUT_PATH_UNKNOWN) — pre-existing behaviour

TEST(FloodAck, KnownPath_SendsExactlyOnce) {
reset_sched();
testSendAckTo(/*out_path_len=*/1, 0xDEADBEEF);
EXPECT_EQ(1, sched_count);
}

TEST(FloodAck, KnownPath_SendIsDirect) {
reset_sched();
testSendAckTo(/*out_path_len=*/1, 0xDEADBEEF);
ASSERT_EQ(1, sched_count);
EXPECT_FALSE(sched_buf[0].is_flood);
}

TEST(FloodAck, KnownPath_DelayIsAckDelay) {
reset_sched();
testSendAckTo(/*out_path_len=*/2, 0xDEADBEEF);
ASSERT_EQ(1, sched_count);
EXPECT_EQ(TXT_ACK_DELAY, sched_buf[0].delay_ms);
}

TEST(FloodAck, KnownPath_MultiHop_SendsOnce) {
reset_sched();
testSendAckTo(/*out_path_len=*/5, 0x12345678);
EXPECT_EQ(1, sched_count);
}

// Retry count configuration contract

TEST(FloodAck, RetryCountIs3) {
// Explicitly documents the expected constant value.
EXPECT_EQ(3, FLOOD_ACK_RETRY_COUNT);
}

TEST(FloodAck, ExpectedDelaysTableMatchesConstants) {
EXPECT_EQ(TXT_ACK_DELAY, EXPECTED_FLOOD_DELAYS[0]);
EXPECT_EQ(800u, EXPECTED_FLOOD_DELAYS[1]);
EXPECT_EQ(2000u, EXPECTED_FLOOD_DELAYS[2]);
}

// =============================================================================
// Backup ACK for flood-received messages
//
// When a flood message is received, the responder sends a PATH+ACK (one packet
// that serves both path-establishment and ACK delivery). If the PATH+ACK is
// lost, the sender never gets the ACK. The fix: also call sendAckTo() to send
// independent flood ACKs as a backup. This suite verifies the backup contract
// using the same sendAckTo transcription, called from the "no known path" state
// that applies immediately after receiving the very first flood message.
// =============================================================================

TEST(BackupFloodAck, FirstMessageScenario_SendsFloodAckBackup) {
// When the receiver has no stored path to the sender (first-ever message),
// sendAckTo() is invoked with OUT_PATH_UNKNOWN and must produce flood ACKs.
reset_sched();
testSendAckTo(OUT_PATH_UNKNOWN, 0xCAFEBABE);
EXPECT_EQ(FLOOD_ACK_RETRY_COUNT, sched_count);
for (int i = 0; i < sched_count; i++) {
EXPECT_TRUE(sched_buf[i].is_flood)
<< "Backup ACK " << i << " must be flood (no path known yet)";
}
}

TEST(BackupFloodAck, BackupAndPathAck_AreIndependent) {
// The backup standalone ACK (sendAckTo) and the PATH+ACK are independent
// packets. This test verifies sendAckTo is not affected by the PATH+ACK
// scheduling — it produces its own complete set of FLOOD_ACK_RETRY_COUNT
// flood transmissions at the standard delays.
reset_sched();
testSendAckTo(OUT_PATH_UNKNOWN, 0x11223344);
ASSERT_EQ(FLOOD_ACK_RETRY_COUNT, sched_count);
EXPECT_EQ(TXT_ACK_DELAY, sched_buf[0].delay_ms);
EXPECT_EQ(800u, sched_buf[1].delay_ms);
EXPECT_EQ(2000u, sched_buf[2].delay_ms);
}

TEST(BackupFloodAck, KnownReturnPath_SendsDirectAckBackup) {
// If the receiver already has a stored direct path back to the sender
// (e.g., established in a prior exchange), the backup sendAckTo() sends a
// direct ACK rather than a flood — still providing a reliable backup channel.
reset_sched();
testSendAckTo(/*out_path_len=*/1, 0xCAFEBABE);
EXPECT_EQ(1, sched_count);
EXPECT_FALSE(sched_buf[0].is_flood);
}


int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
Loading