From 8b6689d37b68acf3cdf1f78dbf0a5dcb29031c23 Mon Sep 17 00:00:00 2001 From: Nick Dunklee Date: Sat, 23 May 2026 22:15:24 -0600 Subject: [PATCH] fix: power brownout and dirty write flush on restart or shutdown This is probably another "needs soaking" fix as it touches power. Backstory on this one: I noticed the sensor firmware build was aggressively sending "Battery is low" messages constantly when a RAK19007 was below 50%. (These messages only show up in third party clients to all node admins, as the stock MeshCore mobile client doesn't let one see messages from a sensor node. Seems another power draw sending that message, but not part of this PR.) Then people on the local mesh have been on and off talking about certain nodes randomly losing their contact lists on some node types, and others were talking about Heltec V4 brownouts. I also observed Heltec v4 die prematurely around 50% and I started thinking they were all related. Started digging into the code and found a few potential leads: - MeshCore does a "lazy" write on `dirty_contacts_expiry` in a 5 second window. - The shutdown/restart path do not clean this up - Low battery check is a poll every 8 seconds with no awareness of other things going on in the node **On the power piece:** Heltec V4 and other higher-powered nodes can hit the battery harder when transmitting, below 50%, lithium batteries sag more dramatically than they do at higher charge states. If the power check happens at the same time as transmit, the shutdown code gets called prematurely and shuts down the node. **On the file write piece:** If the shutdown or restart paths are called, the code just calls `shutdown()` or `reboot()` without checking and calling `saveContacts()`. There do not appear to be any other file writes that act this way. **The Fix** The change is kept using AUTO_SHUTDOWN_MILLIVOLTS so it respects previous power threshold decisions across all node types. With this change, all restart or shutdown paths will make sure to call `saveContacts()` before shutting down to stop the list from becoming corrupted. It also suspends reading battery level for 250ms during transmit (adjustable) so a power sag doesn't trigger an early shutdown. On Heltec V4 at least, the MeshCore software power threshold is much higher than the board's internal brownout/shutdown threshold. **Tested on** - Heltec v4 - RAK 19007 - Heltec T096 - RAK 19003 On the Heltec v4, I can now pass 50% and get down to 36% before it shuts down. Although the voltage at 36% should probably actually say 5% [based on some voltage curve sites like this one](https://voltagebasics.com/lithium-polymer-battery-voltage-chart/). That is probably an idea for future mobile app improvements, the MCU temp and battery voltage could be calculated in the app itself to generate the battery percent and it would likely seem a bit more "accurate" on all board types without having to add math in the node code. --- examples/companion_radio/AbstractUITask.h | 7 ++++ examples/companion_radio/MyMesh.h | 10 ++++- examples/companion_radio/main.cpp | 1 + examples/companion_radio/ui-new/UITask.cpp | 43 +++++++++++++--------- examples/simple_repeater/MyMesh.h | 8 ++++ examples/simple_repeater/main.cpp | 1 + examples/simple_sensor/SensorMesh.cpp | 4 ++ src/MeshCore.h | 21 +++++++++++ src/helpers/radiolib/RadioLibWrappers.cpp | 3 ++ 9 files changed, 79 insertions(+), 19 deletions(-) diff --git a/examples/companion_radio/AbstractUITask.h b/examples/companion_radio/AbstractUITask.h index 0eee45aef3..b9baa4e82c 100644 --- a/examples/companion_radio/AbstractUITask.h +++ b/examples/companion_radio/AbstractUITask.h @@ -22,17 +22,24 @@ enum class UIEventType { ack }; +class ShutdownHandler { +public: + virtual void onBeforeShutdown() = 0; +}; + class AbstractUITask { protected: mesh::MainBoard* _board; BaseSerialInterface* _serial; bool _connected; + ShutdownHandler* _shutdown_handler = nullptr; AbstractUITask(mesh::MainBoard* board, BaseSerialInterface* serial) : _board(board), _serial(serial) { _connected = false; } public: + void setShutdownHandler(ShutdownHandler* h) { _shutdown_handler = h; } void setHasConnection(bool connected) { _connected = connected; } bool hasConnection() const { return _connected; } uint16_t getBattMilliVolts() const { return _board->getBattMilliVolts(); } diff --git a/examples/companion_radio/MyMesh.h b/examples/companion_radio/MyMesh.h index f6a4ce40e9..dc0606a752 100644 --- a/examples/companion_radio/MyMesh.h +++ b/examples/companion_radio/MyMesh.h @@ -84,7 +84,7 @@ struct AdvertPath { uint8_t path[MAX_PATH_SIZE]; }; -class MyMesh : public BaseChatMesh, public DataStoreHost { +class MyMesh : public BaseChatMesh, public DataStoreHost, public ShutdownHandler { public: MyMesh(mesh::Radio &radio, mesh::RNG &rng, mesh::RTCClock &rtc, SimpleMeshTables &tables, DataStore& store, AbstractUITask* ui=NULL); @@ -166,6 +166,14 @@ class MyMesh : public BaseChatMesh, public DataStoreHost { public: void savePrefs() { _store->savePrefs(_prefs, sensors.node_lat, sensors.node_lon); } + void onBeforeShutdown() override { + if (dirty_contacts_expiry) { + saveContacts(); + dirty_contacts_expiry = 0; + } + savePrefs(); + } + #if ENV_INCLUDE_GPS == 1 void applyGpsPrefs() { sensors.setSettingValue("gps", _prefs.gps_enabled ? "1" : "0"); diff --git a/examples/companion_radio/main.cpp b/examples/companion_radio/main.cpp index 4395c5b388..b3dc148d4f 100644 --- a/examples/companion_radio/main.cpp +++ b/examples/companion_radio/main.cpp @@ -237,6 +237,7 @@ void setup() { #ifdef DISPLAY_CLASS ui_task.begin(disp, &sensors, the_mesh.getNodePrefs()); // still want to pass this in as dependency, as prefs might be moved + ui_task.setShutdownHandler(&the_mesh); #endif } diff --git a/examples/companion_radio/ui-new/UITask.cpp b/examples/companion_radio/ui-new/UITask.cpp index 6f363d7f96..02e32dd8b2 100644 --- a/examples/companion_radio/ui-new/UITask.cpp +++ b/examples/companion_radio/ui-new/UITask.cpp @@ -694,6 +694,8 @@ void UITask::shutdown(bool restart){ #endif // PIN_BUZZER + if (_shutdown_handler) _shutdown_handler->onBeforeShutdown(); + if (restart) { _board->reboot(); } else { @@ -821,26 +823,31 @@ void UITask::loop() { #ifdef AUTO_SHUTDOWN_MILLIVOLTS if (millis() > next_batt_chck) { - uint16_t milliVolts = getBattMilliVolts(); - if (milliVolts > 0 && milliVolts < AUTO_SHUTDOWN_MILLIVOLTS) { - - // show low battery shutdown alert - // we should only do this for eink displays, which will persist after power loss - #if defined(THINKNODE_M1) || defined(LILYGO_TECHO) - if (_display != NULL) { - _display->startFrame(); - _display->setTextSize(2); - _display->setColor(DisplayDriver::RED); - _display->drawTextCentered(_display->width() / 2, 20, "Low Battery."); - _display->drawTextCentered(_display->width() / 2, 40, "Shutting Down!"); - _display->endFrame(); - } - #endif + uint32_t now = millis(); + if (!_board->isBattReadSafe(now)) { + // TX just completed — voltage hasn't recovered yet; retry after settle window. + next_batt_chck = now + POST_TX_BATT_SETTLE_MS + 50; + } else { + uint16_t milliVolts = getBattMilliVolts(); + if (milliVolts > 0 && milliVolts < AUTO_SHUTDOWN_MILLIVOLTS) { + + // show low battery shutdown alert + // we should only do this for eink displays, which will persist after power loss + #if defined(THINKNODE_M1) || defined(LILYGO_TECHO) + if (_display != NULL) { + _display->startFrame(); + _display->setTextSize(2); + _display->setColor(DisplayDriver::RED); + _display->drawTextCentered(_display->width() / 2, 20, "Low Battery."); + _display->drawTextCentered(_display->width() / 2, 40, "Shutting Down!"); + _display->endFrame(); + } + #endif - shutdown(); + shutdown(); - } - next_batt_chck = millis() + 8000; + } + next_batt_chck = millis() + 8000; } #endif } diff --git a/examples/simple_repeater/MyMesh.h b/examples/simple_repeater/MyMesh.h index 8ed0317e69..39651d2d62 100644 --- a/examples/simple_repeater/MyMesh.h +++ b/examples/simple_repeater/MyMesh.h @@ -192,6 +192,14 @@ class MyMesh : public mesh::Mesh, public CommonCLICallbacks { _cli.savePrefs(_fs); } + void flushPending() { + if (dirty_contacts_expiry) { + acl.save(_fs); + dirty_contacts_expiry = 0; + } + savePrefs(); + } + void sendFloodScoped(const TransportKey& scope, mesh::Packet* pkt, uint32_t delay_millis, uint8_t path_hash_size); // CommonCLICallbacks diff --git a/examples/simple_repeater/main.cpp b/examples/simple_repeater/main.cpp index 7fad801b98..aa3b7974e3 100644 --- a/examples/simple_repeater/main.cpp +++ b/examples/simple_repeater/main.cpp @@ -140,6 +140,7 @@ void loop() { userBtnDownAt = millis(); } else if ((unsigned long)(millis() - userBtnDownAt) >= USER_BTN_HOLD_OFF_MILLIS) { Serial.println("Powering off..."); + the_mesh.flushPending(); board.powerOff(); // does not return } } else { diff --git a/examples/simple_sensor/SensorMesh.cpp b/examples/simple_sensor/SensorMesh.cpp index 879fcbf026..cf78dc6e96 100644 --- a/examples/simple_sensor/SensorMesh.cpp +++ b/examples/simple_sensor/SensorMesh.cpp @@ -920,6 +920,10 @@ void SensorMesh::loop() { uint32_t curr = getRTCClock()->getCurrentTime(); if (curr >= last_read_time + SENSOR_READ_INTERVAL_SECS) { + // Skip this cycle if TX just completed — the current spike sags battery + // voltage enough to falsely trigger low-battery alerts on weaker cells. + // The loop runs again in milliseconds, so the read is only deferred briefly. + if (!board.isBattReadSafe(millis())) return; telemetry.reset(); telemetry.addVoltage(TELEM_CHANNEL_SELF, (float)board.getBattMilliVolts() / 1000.0f); // query other sensors -- target specific diff --git a/src/MeshCore.h b/src/MeshCore.h index 2db1d4c3ec..72ea3e0829 100644 --- a/src/MeshCore.h +++ b/src/MeshCore.h @@ -41,8 +41,29 @@ namespace mesh { #define BD_STARTUP_NORMAL 0 // getStartupReason() codes #define BD_STARTUP_RX_PACKET 1 +// Milliseconds to wait after TX completes before trusting battery ADC readings. +// LoRa TX causes a current spike that sags battery terminal voltage; on LiPo +// cells below ~50% SoC the sag is large enough to cross shutdown/alert thresholds. +#ifndef POST_TX_BATT_SETTLE_MS +#define POST_TX_BATT_SETTLE_MS 250 +#endif + class MainBoard { + + bool _tx_active = false; + uint32_t _last_tx_complete_ms = 0; + public: + // Called by the radio layer — not meant to be overridden. + void notifyTxStart() { _tx_active = true; } + void notifyTxComplete(uint32_t now_ms) { _tx_active = false; _last_tx_complete_ms = now_ms; } + + // Returns true when it is safe to read the battery ADC (TX not in progress + // and enough time has elapsed since the last transmission for voltage to recover). + bool isBattReadSafe(uint32_t now_ms, uint32_t settle_ms = POST_TX_BATT_SETTLE_MS) const { + return !_tx_active && (now_ms - _last_tx_complete_ms >= settle_ms); + } + virtual uint16_t getBattMilliVolts() = 0; virtual float getMCUTemperature() { return NAN; } virtual bool setAdcMultiplier(float multiplier) { return false; }; diff --git a/src/helpers/radiolib/RadioLibWrappers.cpp b/src/helpers/radiolib/RadioLibWrappers.cpp index b6519aefa7..eb354c3e67 100644 --- a/src/helpers/radiolib/RadioLibWrappers.cpp +++ b/src/helpers/radiolib/RadioLibWrappers.cpp @@ -151,6 +151,7 @@ uint32_t RadioLibWrapper::getEstAirtimeFor(int len_bytes) { } bool RadioLibWrapper::startSendRaw(const uint8_t* bytes, int len) { + _board->notifyTxStart(); _board->onBeforeTransmit(); int err = _radio->startTransmit((uint8_t *) bytes, len); if (err == RADIOLIB_ERR_NONE) { @@ -160,6 +161,7 @@ bool RadioLibWrapper::startSendRaw(const uint8_t* bytes, int len) { MESH_DEBUG_PRINTLN("RadioLibWrapper: error: startTransmit(%d)", err); idle(); // trigger another startRecv() _board->onAfterTransmit(); + _board->notifyTxComplete(millis()); return false; } @@ -175,6 +177,7 @@ bool RadioLibWrapper::isSendComplete() { void RadioLibWrapper::onSendFinished() { _radio->finishTransmit(); _board->onAfterTransmit(); + _board->notifyTxComplete(millis()); state = STATE_IDLE; }