Fixes for #4395: nrf52 flash filesystem reliability (#4406)

* bug #4184: fix config file loss due to filesystem write errors
* Use SafeFile for atomic file writing (with xor checksum readback)
* Write db.proto last because it could be the largest file on the FS (and less critical)
* Don't keep a tmp file around while writing db.proto (because too big to fit two files in the filesystem)
* generate a new critial fault if we encounter errors writing to flash
either CriticalErrorCode_FLASH_CORRUPTION_RECOVERABLE or CriticalErrorCode_FLASH_CORRUPTION_UNRECOVERABLE
(depending on if the second write attempt worked)
* reformat the filesystem if we detect it is corrupted (then rewrite our config files) (only on nrf52 - not sure
yet if we should bother on ESP32)
* If we have to format the FS, make sure to preserve the oem.proto if it exists

* add logLegacy() so old C code in libs can log via our logging

* move filesList() to a better location (used only in developer builds)

* Reformat with "trunk fmt" to match our coding conventions

* for #4395: don't use .exists() to before attempting file open
If a LFS filesystem is corrupted, .exists() can fail when a mere .open()
attempt would have succeeded.  Therefore better to do the .open() in hopes that
we can read the file (in case we need to reformat to fix the FS).
(Seen and confirmed in stress testing)

* for #4395 more fixes, see below for details:
* check for LFS assertion failures during file operations (needs customized lfs_util.h to provide suitable hooks)
* Remove fsCheck() because checking filesystem by writing to it is very high risk, it makes likelyhood that we will
be able to read the config protobufs quite low.
* Update the LFS inside of adafruitnrf52 to 1.7.2 (from their old 1.6.1) to get the following fix:
97d8d5e96a

* use disable_adafruit_usb.py now that we are (temporarily?) using a forked adafruit lib
We need to reach inside the adafruit project and turn off USE_TINYUSB, just doing that
from platformio.ini is no longer sufficient.

Tested on a wio-sdk-wm1110 board (which is the only board that had this problem)

---------

Co-authored-by: Ben Meadors <benmmeadors@gmail.com>
This commit is contained in:
geeksville
2024-08-13 04:45:39 -07:00
committed by GitHub
parent 6e8300287b
commit 62a0321c7d
11 changed files with 285 additions and 155 deletions

View File

@@ -26,6 +26,16 @@ SOFTWARE.*/
#include "DebugConfiguration.h"
/// A C wrapper for LOG_DEBUG that can be used from arduino C libs that don't know about C++ or meshtastic
extern "C" void logLegacy(const char *level, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
if (console)
console->vprintf(level, fmt, args);
va_end(args);
}
#if HAS_NETWORKING
Syslog::Syslog(UDP &client)
@@ -169,4 +179,4 @@ inline bool Syslog::_sendLog(uint16_t pri, const char *appName, const char *mess
return true;
}
#endif
#endif

View File

@@ -62,6 +62,9 @@
#endif
#endif
/// A C wrapper for LOG_DEBUG that can be used from arduino C libs that don't know about C++ or meshtastic
extern "C" void logLegacy(const char *level, const char *fmt, ...);
#define SYSLOG_NILVALUE "-"
#define SYSLOG_CRIT 2 /* critical conditions */

View File

@@ -48,6 +48,15 @@ void OSFS::writeNBytes(uint16_t address, unsigned int num, const byte *input)
}
#endif
bool lfs_assert_failed =
false; // Note: we use this global on all platforms, though it can only be set true on nrf52 (in our modified lfs_util.h)
extern "C" void lfs_assert(const char *reason)
{
LOG_ERROR("LFS assert: %s\n", reason);
lfs_assert_failed = true;
}
/**
* @brief Copies a file from one location to another.
*
@@ -199,7 +208,7 @@ std::vector<meshtastic_FileInfo> getFiles(const char *dirname, uint8_t levels)
* @param levels The number of levels of subdirectories to list.
* @param del Whether or not to delete the contents of the directory after listing.
*/
void listDir(const char *dirname, uint8_t levels, bool del = false)
void listDir(const char *dirname, uint8_t levels, bool del)
{
#ifdef FSCom
#if (defined(ARCH_ESP32) || defined(ARCH_RP2040) || defined(ARCH_PORTDUINO))
@@ -214,7 +223,9 @@ void listDir(const char *dirname, uint8_t levels, bool del = false)
}
File file = root.openNextFile();
while (file) {
while (
file &&
file.name()[0]) { // This file.name() check is a workaround for a bug in the Adafruit LittleFS nrf52 glue (see issue 4395)
if (file.isDirectory() && !String(file.name()).endsWith(".")) {
if (levels) {
#ifdef ARCH_ESP32
@@ -313,62 +324,6 @@ void rmDir(const char *dirname)
#endif
}
bool fsCheck()
{
#if defined(ARCH_NRF52)
size_t write_size = 0;
size_t read_size = 0;
char buf[32] = {0};
Adafruit_LittleFS_Namespace::File file(FSCom);
const char *text = "meshtastic fs test";
size_t text_length = strlen(text);
const char *filename = "/meshtastic.txt";
LOG_DEBUG("Try create file .\n");
if (file.open(filename, FILE_O_WRITE)) {
write_size = file.write(text);
} else {
LOG_DEBUG("Open file failed .\n");
goto FORMAT_FS;
}
if (write_size != text_length) {
LOG_DEBUG("Text bytes do not match .\n");
file.close();
goto FORMAT_FS;
}
file.close();
if (!file.open(filename, FILE_O_READ)) {
LOG_DEBUG("Open file failed .\n");
goto FORMAT_FS;
}
read_size = file.readBytes(buf, text_length);
if (read_size != text_length) {
LOG_DEBUG("Text bytes do not match .\n");
file.close();
goto FORMAT_FS;
}
if (memcmp(buf, text, text_length) != 0) {
LOG_DEBUG("The written bytes do not match the read bytes .\n");
file.close();
goto FORMAT_FS;
}
return true;
FORMAT_FS:
LOG_DEBUG("Format FS ....\n");
FSCom.format();
FSCom.begin();
return false;
#else
return true;
#endif
}
void fsInit()
{
#ifdef FSCom
@@ -378,35 +333,6 @@ void fsInit()
}
#if defined(ARCH_ESP32)
LOG_DEBUG("Filesystem files (%d/%d Bytes):\n", FSCom.usedBytes(), FSCom.totalBytes());
#elif defined(ARCH_NRF52)
/*
* nRF52840 has a certain chance of automatic formatting failure.
* Try to create a file after initializing the file system. If the creation fails,
* it means that the file system is not working properly. Please format it manually again.
* To check the normality of the file system, you need to disable the LFS_NO_ASSERT assertion.
* Otherwise, the assertion will be entered at the moment of reading or opening, and the FS will not be formatted.
* */
bool ret = false;
uint8_t retry = 3;
while (retry--) {
ret = fsCheck();
if (ret) {
LOG_DEBUG("File system check is OK.\n");
break;
}
delay(10);
}
// It may not be possible to reach this step.
// Add a loop here to prevent unpredictable situations from happening.
// Can add a screen to display error status later.
if (!ret) {
while (1) {
LOG_ERROR("The file system is damaged and cannot proceed to the next step.\n");
delay(1000);
}
}
#else
LOG_DEBUG("Filesystem files:\n");
#endif

View File

@@ -51,9 +51,13 @@ using namespace Adafruit_LittleFS_Namespace;
#endif
void fsInit();
void fsListFiles();
bool copyFile(const char *from, const char *to);
bool renameFile(const char *pathFrom, const char *pathTo);
std::vector<meshtastic_FileInfo> getFiles(const char *dirname, uint8_t levels);
void listDir(const char *dirname, uint8_t levels, bool del);
void listDir(const char *dirname, uint8_t levels, bool del = false);
void rmDir(const char *dirname);
void setupSDCard();
void setupSDCard();
extern bool lfs_assert_failed; // Note: we use this global on all platforms, though it can only be set true on nrf52 (in our
// modified lfs_util.h)

View File

@@ -11,6 +11,9 @@ static File openFile(const char *filename, bool fullAtomic)
String filenameTmp = filename;
filenameTmp += ".tmp";
// clear any previous LFS errors
lfs_assert_failed = false;
return FSCom.open(filenameTmp.c_str(), FILE_O_WRITE);
}
@@ -73,6 +76,9 @@ bool SafeFile::close()
/// Read our (closed) tempfile back in and compare the hash
bool SafeFile::testReadback()
{
bool lfs_failed = lfs_assert_failed;
lfs_assert_failed = false;
String filenameTmp = filename;
filenameTmp += ".tmp";
auto f2 = FSCom.open(filenameTmp.c_str(), FILE_O_READ);
@@ -93,7 +99,7 @@ bool SafeFile::testReadback()
return false;
}
return true;
return !lfs_failed;
}
#endif

View File

@@ -56,27 +56,6 @@ meshtastic_ChannelFile channelFile;
meshtastic_OEMStore oemStore;
static bool hasOemStore = false;
// These are not publically exposed - copied from InternalFileSystem.cpp
// #define FLASH_NRF52_PAGE_SIZE 4096
// #define LFS_FLASH_TOTAL_SIZE (7*FLASH_NRF52_PAGE_SIZE)
// #define LFS_BLOCK_SIZE 128
/// List all files in the FS and test write and readback.
/// Useful for filesystem stress testing - normally stripped from build by the linker.
void flashTest()
{
auto filesManifest = getFiles("/", 5);
uint32_t totalSize = 0;
for (size_t i = 0; i < filesManifest.size(); i++) {
LOG_INFO("File %s (size %d)\n", filesManifest[i].file_name, filesManifest[i].size_bytes);
totalSize += filesManifest[i].size_bytes;
}
LOG_INFO("%d files (total size %u)\n", filesManifest.size(), totalSize);
// LOG_INFO("Filesystem block size %u, total bytes %u", LFS_FLASH_TOTAL_SIZE, LFS_BLOCK_SIZE);
nodeDB->saveToDisk();
}
bool meshtastic_DeviceState_callback(pb_istream_t *istream, pb_ostream_t *ostream, const pb_field_iter_t *field)
{
if (ostream) {
@@ -617,11 +596,6 @@ LoadFileResult NodeDB::loadProto(const char *filename, size_t protoSize, size_t
LoadFileResult state = LoadFileResult::OTHER_FAILURE;
#ifdef FSCom
if (!FSCom.exists(filename)) {
LOG_INFO("File %s not found\n", filename);
return LoadFileResult::NOT_FOUND;
}
auto f = FSCom.open(filename, FILE_O_READ);
if (f) {