From 2c68710e8c252c798281ccdf8775c96e9f643d61 Mon Sep 17 00:00:00 2001 From: Jason P Date: Sat, 27 Dec 2025 11:17:55 -0600 Subject: [PATCH] Improve sanitizeString function for Node Names (#9086) --- src/graphics/SharedUIDisplay.cpp | 37 +++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/src/graphics/SharedUIDisplay.cpp b/src/graphics/SharedUIDisplay.cpp index 5660810e6..f5ca6ed03 100644 --- a/src/graphics/SharedUIDisplay.cpp +++ b/src/graphics/SharedUIDisplay.cpp @@ -470,18 +470,49 @@ bool isAllowedPunctuation(char c) return allowed.find(c) != std::string::npos; } +static void replaceAll(std::string &s, const std::string &from, const std::string &to) +{ + if (from.empty()) + return; + size_t pos = 0; + while ((pos = s.find(from, pos)) != std::string::npos) { + s.replace(pos, from.size(), to); + pos += to.size(); + } +} + std::string sanitizeString(const std::string &input) { std::string output; bool inReplacement = false; - for (char c : input) { - if (std::isalnum(static_cast(c)) || isAllowedPunctuation(c)) { + // Make a mutable copy so we can normalize UTF-8 “smart punctuation” into ASCII first. + std::string s = input; + + // Curly single quotes: ‘ ’ + replaceAll(s, "\xE2\x80\x98", "'"); // U+2018 + replaceAll(s, "\xE2\x80\x99", "'"); // U+2019 + + // Curly double quotes: “ ” + replaceAll(s, "\xE2\x80\x9C", "\""); // U+201C + replaceAll(s, "\xE2\x80\x9D", "\""); // U+201D + + // En dash / Em dash: – — + replaceAll(s, "\xE2\x80\x93", "-"); // U+2013 + replaceAll(s, "\xE2\x80\x94", "-"); // U+2014 + + // Non-breaking space + replaceAll(s, "\xC2\xA0", " "); // U+00A0 + + // Now do your original sanitize pass over the normalized string. + for (unsigned char uc : s) { + char c = static_cast(uc); + if (std::isalnum(uc) || isAllowedPunctuation(c)) { output += c; inReplacement = false; } else { if (!inReplacement) { - output += 0xbf; // ISO-8859-1 for inverted question mark + output += static_cast(0xBF); // ISO-8859-1 for inverted question mark inReplacement = true; } }