From 86e2228493b3ed56296cfaf46d3fc6c1c43b74b5 Mon Sep 17 00:00:00 2001 From: Andy Aragon Date: Fri, 29 May 2026 12:38:55 -0700 Subject: [PATCH] =?UTF-8?q?fix(firmware):=20get=20Wi-Fi=20online=20?= =?UTF-8?q?=E2=80=94=20BLE/Wi-Fi=20mutual=20exclusion=20+=20socket=20pool?= =?UTF-8?q?=20sizing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wi-Fi never came up once a real STACKCHAN.RON was loaded — start_async failed with InternalError(NoMem). Root cause: only ~1.5 KiB of internal SRAM was free at Wi-Fi start. PSRAM (8 MiB free) can't back the Wi-Fi MAC's DMA buffers, the bootloader-reclaimed heap is hard-capped at ~72 KiB (full), and the only other internal heap shares the DRAM segment with the stack (growing it overflowed the stack in the coex path). Make BLE and Wi-Fi mutually exclusive instead: with a Wi-Fi SSID configured, skip BLE bring-up entirely. That frees ~36 KiB of internal RAM (1.5 KiB -> 37 KiB) for the Wi-Fi stack and removes the coex stack pressure. BLE (Hardware Buddy) still runs when no SSID is set. Wi-Fi connecting then exposed a second latent bug: STACK_SOCKETS was 6, predating the 4-worker HTTP pool, so smoltcp panicked ("adding a socket to a full SocketSet") the instant the link came up. Size it to the real demand (HTTP workers + mDNS + SNTP + DHCP/DNS + optional clients). Also reflows one write_config line that merged slightly un-formatted. On-device (CoreS3): boots once (no loop), Wi-Fi connects to the AP, DHCP lease acquired, SNTP synced via pool.ntp.org (full internet reachability), HTTP server listening; no NoMem, stack-guard, or SocketSet panics. --- crates/stackchan-firmware/src/main.rs | 141 +++++++++++---------- crates/stackchan-firmware/src/net/stack.rs | 14 +- crates/stackchan-firmware/src/storage.rs | 3 +- 3 files changed, 86 insertions(+), 72 deletions(-) diff --git a/crates/stackchan-firmware/src/main.rs b/crates/stackchan-firmware/src/main.rs index 8cae991f..cdec83db 100644 --- a/crates/stackchan-firmware/src/main.rs +++ b/crates/stackchan-firmware/src/main.rs @@ -1571,76 +1571,83 @@ async fn main(spawner: Spawner) -> ! { ); } - // BLE peripheral. Shares the same `&'static esp_radio::Controller` - // as Wi-Fi — coex (enabled via the `coex` feature on `esp-radio`) - // schedules airtime between the two stacks. The BLE address + - // local name are derived from the chip's eFuse MAC so a single - // device shows up consistently across reboots, and matches the - // discovery handle visible on the LAN over mDNS. - #[allow(clippy::items_after_statements)] - static BLE_NAME: StaticCell> = StaticCell::new(); - let ble_mac = esp_hal::efuse::Efuse::mac_address(); - // First, try the operator-supplied name from - // `/sd/DEVICE.NAM` (set via the Hardware Buddy `cmd:name` - // message). Fall back to the MAC-suffixed default. The name - // must start with `Claude` so the Hardware Buddy picker filters - // us in; the default does, and a desktop-set override is - // assumed to as well — we don't enforce the prefix. - let persisted_name = stackchan_firmware::storage::with_storage( - stackchan_firmware::storage::FirmwareStorage::read_device_name, - ) - .await - .and_then(|r| match r { - Ok(opt) => opt, - Err(e) => { - defmt::warn!( - "ble: device name read failed ({}); using MAC-suffixed default", - defmt::Debug2Format(&e) - ); - None - } - }); - let ble_name: &'static str = { - use core::fmt::Write as _; - let mut s: heapless::String<32> = heapless::String::new(); - if let Some(name) = persisted_name.as_deref() { - // Cap at heapless::String<32>'s capacity. Operator - // names longer than the BLE GAP 22-byte limit will be - // rejected by the advertise layer; here we just keep - // the buffer write infallible. - let n = name.len().min(32); - if let Err(e) = s.push_str(&name[..n]) { - defmt::panic!("ble: name copy failed: {}", defmt::Debug2Format(&e)); + // BLE peripheral — brought up ONLY when no Wi-Fi SSID is configured. + // BLE (Hardware Buddy bridge) and Wi-Fi (HTTP/MCP plane) are mutually + // exclusive: running both needs radio coexistence, whose buffers plus + // BLE's own exhaust the scarce internal-SRAM heap and make the Wi-Fi MAC + // fail to start with `NoMem`. With an SSID present we skip BLE so the + // Wi-Fi stack gets that internal RAM. Address + name derive from the + // eFuse MAC for a stable cross-reboot identity. + if net_config.wifi.ssid.trim().is_empty() { + #[allow(clippy::items_after_statements)] + static BLE_NAME: StaticCell> = StaticCell::new(); + let ble_mac = esp_hal::efuse::Efuse::mac_address(); + // First, try the operator-supplied name from + // `/sd/DEVICE.NAM` (set via the Hardware Buddy `cmd:name` + // message). Fall back to the MAC-suffixed default. The name + // must start with `Claude` so the Hardware Buddy picker filters + // us in; the default does, and a desktop-set override is + // assumed to as well — we don't enforce the prefix. + let persisted_name = stackchan_firmware::storage::with_storage( + stackchan_firmware::storage::FirmwareStorage::read_device_name, + ) + .await + .and_then(|r| match r { + Ok(opt) => opt, + Err(e) => { + defmt::warn!( + "ble: device name read failed ({}); using MAC-suffixed default", + defmt::Debug2Format(&e) + ); + None + } + }); + let ble_name: &'static str = { + use core::fmt::Write as _; + let mut s: heapless::String<32> = heapless::String::new(); + if let Some(name) = persisted_name.as_deref() { + // Cap at heapless::String<32>'s capacity. Operator + // names longer than the BLE GAP 22-byte limit will be + // rejected by the advertise layer; here we just keep + // the buffer write infallible. + let n = name.len().min(32); + if let Err(e) = s.push_str(&name[..n]) { + defmt::panic!("ble: name copy failed: {}", defmt::Debug2Format(&e)); + } + } else if let Err(e) = write!( + &mut s, + "Claude stk-{:02x}{:02x}{:02x}", + ble_mac[3], ble_mac[4], ble_mac[5] + ) { + defmt::panic!("ble: name format failed: {}", defmt::Debug2Format(&e)); } - } else if let Err(e) = write!( - &mut s, - "Claude stk-{:02x}{:02x}{:02x}", - ble_mac[3], ble_mac[4], ble_mac[5] + BLE_NAME.init(s).as_str() + }; + defmt::info!("ble: advertising as {=str}", ble_name); + let ble_connector = match esp_radio::ble::controller::BleConnector::new( + radio, + peripherals.BT, + esp_radio::ble::Config::default(), ) { - defmt::panic!("ble: name format failed: {}", defmt::Debug2Format(&e)); + Ok(c) => c, + Err(e) => defmt::panic!( + "ble: BleConnector::new failed ({})", + defmt::Debug2Format(&e) + ), + }; + let ble_controller: trouble_host::prelude::ExternalController<_, 20> = + trouble_host::prelude::ExternalController::new(ble_connector); + if let Err(e) = spawner.spawn(ble::ble_task(ble::BleTaskConfig { + controller: ble_controller, + address_bytes: ble_mac, + local_name: ble_name, + })) { + defmt::panic!("spawn(ble_task) failed: {}", defmt::Debug2Format(&e)); } - BLE_NAME.init(s).as_str() - }; - defmt::info!("ble: advertising as {=str}", ble_name); - let ble_connector = match esp_radio::ble::controller::BleConnector::new( - radio, - peripherals.BT, - esp_radio::ble::Config::default(), - ) { - Ok(c) => c, - Err(e) => defmt::panic!( - "ble: BleConnector::new failed ({})", - defmt::Debug2Format(&e) - ), - }; - let ble_controller: trouble_host::prelude::ExternalController<_, 20> = - trouble_host::prelude::ExternalController::new(ble_connector); - if let Err(e) = spawner.spawn(ble::ble_task(ble::BleTaskConfig { - controller: ble_controller, - address_bytes: ble_mac, - local_name: ble_name, - })) { - defmt::panic!("spawn(ble_task) failed: {}", defmt::Debug2Format(&e)); + } else { + defmt::info!( + "ble: skipped — Wi-Fi SSID configured; BLE and Wi-Fi are mutually exclusive to fit internal SRAM" + ); } if let Err(e) = spawner.spawn(stackchan_firmware::desktop_permission::desktop_permission_task()) diff --git a/crates/stackchan-firmware/src/net/stack.rs b/crates/stackchan-firmware/src/net/stack.rs index 3df16809..1c828c75 100644 --- a/crates/stackchan-firmware/src/net/stack.rs +++ b/crates/stackchan-firmware/src/net/stack.rs @@ -6,10 +6,16 @@ use embassy_net::{Runner, StackResources}; use esp_radio::wifi::WifiDevice; use static_cell::StaticCell; -/// Maximum number of concurrent sockets the firmware ever needs: -/// one for SNTP (UDP), one or two for the HTTP server (listen + -/// accepted), one for mDNS responder, plus a couple of slack. -pub const STACK_SOCKETS: usize = 6; +/// Maximum number of concurrent sockets the firmware ever needs. +/// +/// `smoltcp` panics ("adding a socket to a full `SocketSet`") the moment +/// demand exceeds this, so it must cover every consumer that can be live at +/// once: `HTTP_WORKER_COUNT` HTTP listeners (each owns its own socket), plus +/// mDNS, SNTP, the DHCP + DNS clients, and the optional outbound clients +/// (agent sidecar, `VoiceVox`, audio debug). The old value of 6 pre-dated the +/// 4-worker HTTP pool and overflowed as soon as Wi-Fi linked. Slots are cheap +/// static metadata, so over-provisioning costs only a few `.bss` bytes. +pub const STACK_SOCKETS: usize = crate::net::http::HTTP_WORKER_COUNT + 8; /// Static cell for the embassy-net resource pool. Sized at compile /// time to avoid heap fragmentation under SNTP/HTTP/mDNS churn. diff --git a/crates/stackchan-firmware/src/storage.rs b/crates/stackchan-firmware/src/storage.rs index 91618d42..9db652a2 100644 --- a/crates/stackchan-firmware/src/storage.rs +++ b/crates/stackchan-firmware/src/storage.rs @@ -457,7 +457,8 @@ where let dst = root .open_file_in_dir(&short, Mode::ReadWriteCreateOrTruncate) .map_err(|_| StorageError::Write)?; - dst.write(rendered.as_bytes()).map_err(|_| StorageError::Write)?; + dst.write(rendered.as_bytes()) + .map_err(|_| StorageError::Write)?; dst.flush().map_err(|_| StorageError::Write)?; } // Staging served its purpose (a crash-safe copy existed before we