From af67f5a0687a8ced804c84055adc9821ceb2b9d0 Mon Sep 17 00:00:00 2001 From: nightjoker7 Date: Sun, 26 Apr 2026 14:47:11 -0500 Subject: [PATCH 1/2] fix: null-check packetPool/queueStatusPool allocCopy at 11 router/module callers Adds null-checks at 11 call sites that take a pointer from packetPool.allocCopy() or queueStatusPool.allocCopy() and immediately dereference (or pass to a function that derefs) without verifying the allocation succeeded. Allocator::allocCopy is documented to return nullptr on pool exhaustion; these call sites assumed "always succeeds". Sites fixed: src/mesh/MeshService.cpp:113 sendToPhone(allocCopy(*mp)) src/mesh/MeshService.cpp:209 sendToMesh(allocCopy(p)) src/mesh/MeshService.cpp:233 queueStatusPool.allocCopy(qs); copied->res=res src/mesh/MeshService.cpp:271 sendToPhone(allocCopy(*p)) src/mesh/NextHopRouter.cpp:35 startRetransmission(allocCopy(*p)) src/mesh/NextHopRouter.cpp:151 tosend = allocCopy(*p); tosend->hop_limit=0 src/mesh/NextHopRouter.cpp:329,331,336 three retx allocCopy sites src/mesh/ReliableRouter.cpp:21 want_ack copy = allocCopy(*p) src/mesh/Router.cpp:376 p_decoded = allocCopy(*p); MQTT::onSend(*p_decoded) src/modules/NodeInfoModule.cpp:63 packetCopy = allocCopy(mp) src/modules/AtakPluginModule.cpp:199 decompressedCopy = allocCopy(mp) Sites with existing guards (left alone): Router.cpp:744 legacy null-check from #9136 Router.cpp:813 existing if (p_encrypted_new) check Telemetry/*.cpp lastMeasurementPacket consumers all check Reproduction (deterministic null-injection on Heltec V3, static-pool board, ~50 MeshPacket slots, no PSRAM): Build A (origin/develop + 1-in-5 allocCopy nullptr inject) rebootCount: 4 -> 26 (+22 panics in 90 s under sendText load) Build B (this patch + 1-in-5 allocCopy nullptr inject) rebootCount: 3 -> 4 (+1 close+reopen probe artifact only; zero hammer-induced crashes) Same hardware, identical workload, same injection rate. The only delta is these 11 null-checks. Field reports whose backtraces / call graphs land in these sites: #4410 (T-Beam reboot loop after REPEATER config) - GUVWAF decoded the backtrace in-thread; panic literally inside MeshService::sendToPhone (the call site of packetPool.allocCopy(*mp) at MeshService.cpp:113) #1355 (T-Lora v2 1.6 MQTT bridge crash) - filed by caveman99; Guru Meditation LoadProhibited with EXCVADDR=0x00000004 (textbook null-deref of struct field) inside Router::send -> MQTT::onSend #3725 (Heltec V3 reboot loop with ATAK plugin) - thebentern landed partial fix #3922 noting "there's still some underlying issue"; AtakPluginModule.cpp:199 path #2264 (heap-too-low reboot loop with NodeInfo + sendToPhone storm) #6772 (T-Lora v2 1.6 abort at operator new during PhoneAPI::handleToRadio) #8066 - fifieldt: "In 2.7.9 we fixed a range of heap exhaustion issues that could be the cause" (Heltec V3 reboot acknowledgment) This patch does not claim to fix those reports - it null-checks the specific call sites whose deref-on-null is in their backtraces. Mirrors the merge pattern of #10262 (13 allocZeroed null-checks in one focused PR). Same shape, adjacent class. --- src/mesh/MeshService.cpp | 19 ++++++++++++++++--- src/mesh/NextHopRouter.cpp | 17 ++++++++++++----- src/mesh/ReliableRouter.cpp | 4 +++- src/mesh/Router.cpp | 9 ++++++--- src/modules/AtakPluginModule.cpp | 4 ++++ src/modules/NodeInfoModule.cpp | 13 ++++++++----- 6 files changed, 49 insertions(+), 17 deletions(-) diff --git a/src/mesh/MeshService.cpp b/src/mesh/MeshService.cpp index 952a6d2be37..e080f05e890 100644 --- a/src/mesh/MeshService.cpp +++ b/src/mesh/MeshService.cpp @@ -110,7 +110,10 @@ int MeshService::handleFromRadio(const meshtastic_MeshPacket *mp) } printPacket("Forwarding to phone", mp); - sendToPhone(packetPool.allocCopy(*mp)); + if (auto *c = packetPool.allocCopy(*mp)) + sendToPhone(c); + else + LOG_WARN("phone-forward dropped: packetPool exhausted"); return 0; } @@ -205,7 +208,10 @@ void MeshService::handleToRadio(meshtastic_MeshPacket &p) DEBUG_HEAP_BEFORE; auto a = packetPool.allocCopy(p); DEBUG_HEAP_AFTER("MeshService::handleToRadio", a); - sendToMesh(a, RX_SRC_USER); + if (a) + sendToMesh(a, RX_SRC_USER); + else + LOG_WARN("handleToRadio: packetPool exhausted, dropping ToRadio packet"); bool loopback = false; // if true send any packet the phone sends back itself (for testing) if (loopback) { @@ -225,6 +231,10 @@ bool MeshService::cancelSending(PacketId id) ErrorCode MeshService::sendQueueStatusToPhone(const meshtastic_QueueStatus &qs, ErrorCode res, uint32_t mesh_packet_id) { meshtastic_QueueStatus *copied = queueStatusPool.allocCopy(qs); + if (!copied) { + LOG_WARN("sendQueueStatusToPhone: queueStatusPool exhausted, skipping"); + return ERRNO_UNKNOWN; + } copied->res = res; copied->mesh_packet_id = mesh_packet_id; @@ -265,7 +275,10 @@ void MeshService::sendToMesh(meshtastic_MeshPacket *p, RxSource src, bool ccToPh auto a = packetPool.allocCopy(*p); DEBUG_HEAP_AFTER("MeshService::sendToMesh", a); - sendToPhone(a); + if (a) + sendToPhone(a); + // else: pool exhausted; phone will see this packet again via the + // normal RX-to-phone path on a future cycle. } // Router may ask us to release the packet if it wasn't sent diff --git a/src/mesh/NextHopRouter.cpp b/src/mesh/NextHopRouter.cpp index e8613d45729..c58a5c306de 100644 --- a/src/mesh/NextHopRouter.cpp +++ b/src/mesh/NextHopRouter.cpp @@ -31,8 +31,11 @@ ErrorCode NextHopRouter::send(meshtastic_MeshPacket *p) // If it's from us, ReliableRouter already handles retransmissions if want_ack is set. If a next hop is set and hop limit is // not 0 or want_ack is set, start retransmissions - if ((!isFromUs(p) || !p->want_ack) && p->next_hop != NO_NEXT_HOP_PREFERENCE && (p->hop_limit > 0 || p->want_ack)) - startRetransmission(packetPool.allocCopy(*p)); // start retransmission for relayed packet + if ((!isFromUs(p) || !p->want_ack) && p->next_hop != NO_NEXT_HOP_PREFERENCE && (p->hop_limit > 0 || p->want_ack)) { + if (auto *retxCopy = packetPool.allocCopy(*p)) + startRetransmission(retxCopy); // start retransmission for relayed packet + // else: pool exhausted; relayed packet is still sent below — just no retransmission + } return Router::send(p); } @@ -146,6 +149,10 @@ bool NextHopRouter::perhapsRebroadcast(const meshtastic_MeshPacket *p) if (isRebroadcaster()) { if (p->next_hop == NO_NEXT_HOP_PREFERENCE || p->next_hop == nodeDB->getLastByteOfNodeNum(getNodeNum())) { meshtastic_MeshPacket *tosend = packetPool.allocCopy(*p); // keep a copy because we will be sending it + if (!tosend) { + LOG_WARN("Rebroadcast skipped: packetPool exhausted (from=%x)", p->relay_node); + return false; + } LOG_INFO("Rebroadcast received message coming from %x", p->relay_node); // If exhausting hops, force hop_limit = 0 regardless of other logic @@ -319,14 +326,14 @@ int32_t NextHopRouter::doRetransmissions() LOG_INFO("Resetting next hop for packet with dest 0x%x\n", p.packet->to); sentTo->next_hop = NO_NEXT_HOP_PREFERENCE; } - FloodingRouter::send(packetPool.allocCopy(*p.packet)); + if (auto *c = packetPool.allocCopy(*p.packet)) FloodingRouter::send(c); } else { - NextHopRouter::send(packetPool.allocCopy(*p.packet)); + if (auto *c = packetPool.allocCopy(*p.packet)) NextHopRouter::send(c); } } else { // Note: we call the superclass version because we don't want to have our version of send() add a new // retransmission record - FloodingRouter::send(packetPool.allocCopy(*p.packet)); + if (auto *c = packetPool.allocCopy(*p.packet)) FloodingRouter::send(c); } // Queue again diff --git a/src/mesh/ReliableRouter.cpp b/src/mesh/ReliableRouter.cpp index 42c24c783f6..d1571b422bb 100644 --- a/src/mesh/ReliableRouter.cpp +++ b/src/mesh/ReliableRouter.cpp @@ -21,7 +21,9 @@ ErrorCode ReliableRouter::send(meshtastic_MeshPacket *p) auto copy = packetPool.allocCopy(*p); DEBUG_HEAP_AFTER("ReliableRouter::send", copy); - startRetransmission(copy, NUM_RELIABLE_RETX); + if (copy) + startRetransmission(copy, NUM_RELIABLE_RETX); + // else: pool exhausted; the packet itself is still sent — just no retry } /* If we have pending retransmissions, add the airtime of this packet to it, because during that time we cannot receive an diff --git a/src/mesh/Router.cpp b/src/mesh/Router.cpp index e0473a14e14..f8ff5d77407 100644 --- a/src/mesh/Router.cpp +++ b/src/mesh/Router.cpp @@ -384,12 +384,15 @@ ErrorCode Router::send(meshtastic_MeshPacket *p) return encodeResult; // FIXME - this isn't a valid ErrorCode } #if !MESHTASTIC_EXCLUDE_MQTT - // Only publish to MQTT if we're the original transmitter of the packet - if (moduleConfig.mqtt.enabled && isFromUs(p) && mqtt) { + // Only publish to MQTT if we're the original transmitter of the packet. + // Skip on packetPool exhaustion (p_decoded == nullptr) — onSend would + // dereference the reference and crash; better to drop the MQTT publish + // for this one packet than panic the radio. + if (p_decoded && moduleConfig.mqtt.enabled && isFromUs(p) && mqtt) { mqtt->onSend(*p, *p_decoded, chIndex); } #endif - packetPool.release(p_decoded); + packetPool.release(p_decoded); // release() handles nullptr } #if HAS_UDP_MULTICAST diff --git a/src/modules/AtakPluginModule.cpp b/src/modules/AtakPluginModule.cpp index bddb6276bb3..059380a63f2 100644 --- a/src/modules/AtakPluginModule.cpp +++ b/src/modules/AtakPluginModule.cpp @@ -197,6 +197,10 @@ void AtakPluginModule::alterReceivedProtobuf(meshtastic_MeshPacket &mp, meshtast } } auto decompressedCopy = packetPool.allocCopy(mp); + if (!decompressedCopy) { + LOG_WARN("AtakPluginModule: packetPool exhausted, skipping decompressed phone forward"); + return; + } decompressedCopy->decoded.payload.size = pb_encode_to_bytes(decompressedCopy->decoded.payload.bytes, sizeof(decompressedCopy->decoded.payload), meshtastic_TAKPacket_fields, &uncompressed); diff --git a/src/modules/NodeInfoModule.cpp b/src/modules/NodeInfoModule.cpp index 4de4792416a..74687cc638b 100644 --- a/src/modules/NodeInfoModule.cpp +++ b/src/modules/NodeInfoModule.cpp @@ -61,12 +61,15 @@ bool NodeInfoModule::handleReceivedProtobuf(const meshtastic_MeshPacket &mp, mes // if user has changed while packet was not for us, inform phone if (hasChanged && !wasBroadcast && !isToUs(&mp)) { auto packetCopy = packetPool.allocCopy(mp); // Keep a copy of the packet for later analysis + if (packetCopy) { + // Re-encode the user protobuf, as we have stripped out the user.id + packetCopy->decoded.payload.size = pb_encode_to_bytes( + packetCopy->decoded.payload.bytes, sizeof(packetCopy->decoded.payload.bytes), &meshtastic_User_msg, &p); - // Re-encode the user protobuf, as we have stripped out the user.id - packetCopy->decoded.payload.size = pb_encode_to_bytes( - packetCopy->decoded.payload.bytes, sizeof(packetCopy->decoded.payload.bytes), &meshtastic_User_msg, &p); - - service->sendToPhone(packetCopy); + service->sendToPhone(packetCopy); + } else { + LOG_WARN("NodeInfoModule: packetPool exhausted, skipping phone forward"); + } } pruneLastNodeInfoCache(); From 67bc2665d859ac896bff20732a1ae0253e7b34af Mon Sep 17 00:00:00 2001 From: nightjoker7 Date: Thu, 30 Apr 2026 18:57:07 -0500 Subject: [PATCH 2/2] Address Copilot review: fix sizeof bug + downgrade duplicate warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AtakPluginModule.cpp:205 — pb_encode_to_bytes was passed sizeof(payload) instead of sizeof(payload.bytes). PB_BYTES_ARRAY_T is a struct with a size field plus the bytes array, so the previous form overstated the buffer by the size-field width and could let pb_encode_to_bytes write past payload.bytes into adjacent fields under a worst-case TAKPacket re-encode. Use sizeof(...payload.bytes) to match the canonical form already used at NodeInfoModule.cpp:67. Caller-side LOG_WARN on packetPool/queueStatusPool exhaustion was producing 3 lines per failure (caller WARN + Allocator::allocCopy WARN at MemoryPool.h:45 + static-pool alloc WARN at MemoryPool.h:158). Downgrade the caller-side message to LOG_DEBUG so the contextual breadcrumb is preserved for debug builds without spamming production logs under burst load. Affects: - MeshService.cpp:116 (handleFromRadio phone-forward) - MeshService.cpp:235 (sendQueueStatusToPhone) - AtakPluginModule.cpp:201 (decompressed TAK phone-forward) - NodeInfoModule.cpp:71 (NodeInfo phone-forward) MeshService.cpp:281 — reword the cc-to-phone exhaustion comment. Previous wording implied the phone would see the packet again via the normal RX-to-phone path; that is not guaranteed for non-broadcast or non-local sends. New comment is accurate about what the drop actually means. Resolves the Copilot review on commit 0984d68. --- src/mesh/MeshService.cpp | 10 ++++++---- src/modules/AtakPluginModule.cpp | 4 ++-- src/modules/NodeInfoModule.cpp | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/mesh/MeshService.cpp b/src/mesh/MeshService.cpp index e080f05e890..b07a3bd47b1 100644 --- a/src/mesh/MeshService.cpp +++ b/src/mesh/MeshService.cpp @@ -113,7 +113,7 @@ int MeshService::handleFromRadio(const meshtastic_MeshPacket *mp) if (auto *c = packetPool.allocCopy(*mp)) sendToPhone(c); else - LOG_WARN("phone-forward dropped: packetPool exhausted"); + LOG_DEBUG("phone-forward dropped: packetPool exhausted"); return 0; } @@ -232,7 +232,7 @@ ErrorCode MeshService::sendQueueStatusToPhone(const meshtastic_QueueStatus &qs, { meshtastic_QueueStatus *copied = queueStatusPool.allocCopy(qs); if (!copied) { - LOG_WARN("sendQueueStatusToPhone: queueStatusPool exhausted, skipping"); + LOG_DEBUG("sendQueueStatusToPhone: queueStatusPool exhausted, skipping"); return ERRNO_UNKNOWN; } @@ -277,8 +277,10 @@ void MeshService::sendToMesh(meshtastic_MeshPacket *p, RxSource src, bool ccToPh if (a) sendToPhone(a); - // else: pool exhausted; phone will see this packet again via the - // normal RX-to-phone path on a future cycle. + // else: packetPool exhausted; the cc-to-phone copy is dropped here. + // The original packet is still queued to the mesh; phone visibility + // of cc'd local sends depends on whether the RX path forwards it, + // which is not guaranteed for non-broadcast / non-local-dest sends. } // Router may ask us to release the packet if it wasn't sent diff --git a/src/modules/AtakPluginModule.cpp b/src/modules/AtakPluginModule.cpp index 059380a63f2..f86322f1d3f 100644 --- a/src/modules/AtakPluginModule.cpp +++ b/src/modules/AtakPluginModule.cpp @@ -198,11 +198,11 @@ void AtakPluginModule::alterReceivedProtobuf(meshtastic_MeshPacket &mp, meshtast } auto decompressedCopy = packetPool.allocCopy(mp); if (!decompressedCopy) { - LOG_WARN("AtakPluginModule: packetPool exhausted, skipping decompressed phone forward"); + LOG_DEBUG("AtakPluginModule: packetPool exhausted, skipping decompressed phone forward"); return; } decompressedCopy->decoded.payload.size = - pb_encode_to_bytes(decompressedCopy->decoded.payload.bytes, sizeof(decompressedCopy->decoded.payload), + pb_encode_to_bytes(decompressedCopy->decoded.payload.bytes, sizeof(decompressedCopy->decoded.payload.bytes), meshtastic_TAKPacket_fields, &uncompressed); service->sendToPhone(decompressedCopy); diff --git a/src/modules/NodeInfoModule.cpp b/src/modules/NodeInfoModule.cpp index 74687cc638b..a29aeef9d93 100644 --- a/src/modules/NodeInfoModule.cpp +++ b/src/modules/NodeInfoModule.cpp @@ -68,7 +68,7 @@ bool NodeInfoModule::handleReceivedProtobuf(const meshtastic_MeshPacket &mp, mes service->sendToPhone(packetCopy); } else { - LOG_WARN("NodeInfoModule: packetPool exhausted, skipping phone forward"); + LOG_DEBUG("NodeInfoModule: packetPool exhausted, skipping phone forward"); } }