Files
firmware/src/graphics/niche/InkHUD/AppletFont.cpp

735 lines
34 KiB
C++

#ifdef MESHTASTIC_INCLUDE_INKHUD
#include "./AppletFont.h"
#include <assert.h>
using namespace NicheGraphics;
InkHUD::AppletFont::AppletFont()
{
// Default constructor uses the in-built AdafruitGFX font (not recommended)
}
InkHUD::AppletFont::AppletFont(const GFXfont &adafruitGFXFont, Encoding encoding, int8_t paddingTop, int8_t paddingBottom)
: gfxFont(&adafruitGFXFont), encoding(encoding)
{
// AdafruitGFX fonts are drawn relative to a "cursor line";
// they print as if the glyphs are resting on the line of piece of ruled paper.
// The glyphs also each have a different height.
// To simplify drawing, we will scan the entire font now, and determine an appropriate height for a line of text
// We also need to know where that "cursor line" sits inside this "line height";
// we need this additional info in order to align text by top-left, bottom-right, etc
// AdafruitGFX fonts do declare a line-height, but this seems to include a certain amount of padding,
// which we'd rather not deal with. If we want padding, we'll add it manually.
this->ascenderHeight = 0;
this->descenderHeight = 0;
this->height = 0;
// Scan each glyph in the AdafruitGFX font
for (uint16_t i = 0; i <= (gfxFont->last - gfxFont->first); i++) {
uint8_t glyphHeight = gfxFont->glyph[i].height; // Height of glyph
this->height = max(this->height, glyphHeight); // Store if it's a new max
// Calculate how far the glyph rises the cursor line
// Store if new max value
// Caution: signed and unsigned types
int8_t glyphAscender = 0 - gfxFont->glyph[i].yOffset;
if (glyphAscender > 0)
this->ascenderHeight = max(this->ascenderHeight, (uint8_t)glyphAscender);
int8_t glyphDescender = gfxFont->glyph[i].height + gfxFont->glyph[i].yOffset;
if (glyphDescender > 0)
this->descenderHeight = max(this->descenderHeight, (uint8_t)glyphDescender);
}
// Apply any manual padding to grow or shrink the line size
// Helpful if a font has one or two exceptionally large characters, which would make the lines ridiculously tall
ascenderHeight += paddingTop;
descenderHeight += paddingBottom;
// Find how far the cursor advances when we "print" a space character
spaceCharWidth = gfxFont->glyph[(uint8_t)' ' - gfxFont->first].xAdvance;
}
/*
▲ ##### # ▲
│ # # │
lineHeight │ ### # │
│ # # # # │ heightAboveCursor
│ # # # # │
│ # # #### │
│ -----------------#----
│ # │ heightBelowCursor
▼ ### ▼
*/
uint8_t InkHUD::AppletFont::lineHeight()
{
return this->height;
}
// AdafruitGFX fonts print characters so that they nicely on an imaginary line (think: ruled paper).
// This value is the height of the font, above that imaginary line.
// Used to calculate the true height of the font
uint8_t InkHUD::AppletFont::heightAboveCursor()
{
return this->ascenderHeight;
}
// AdafruitGFX fonts print characters so that they nicely on an imaginary line (think: ruled paper).
// This value is the height of the font, below that imaginary line.
// Used to calculate the true height of the font
uint8_t InkHUD::AppletFont::heightBelowCursor()
{
return this->descenderHeight;
}
// Width of the space character
// Used with Applet::printWrapped
uint8_t InkHUD::AppletFont::widthBetweenWords()
{
return this->spaceCharWidth;
}
// Convert a unicode char from set of UTF-8 bytes to UTF-32
// Used by AppletFont::applyEncoding, which remaps unicode chars for extended ASCII fonts, based on their UTF-32 value
uint32_t InkHUD::AppletFont::toUtf32(std::string utf8)
{
uint32_t utf32 = 0;
switch (utf8.length()) {
case 2:
// 5 bits + 6 bits
utf32 |= (utf8.at(0) & 0b00011111) << 6;
utf32 |= (utf8.at(1) & 0b00111111);
break;
case 3:
// 4 bits + 6 bits + 6 bits
utf32 |= (utf8.at(0) & 0b00001111) << (6 + 6);
utf32 |= (utf8.at(1) & 0b00111111) << 6;
utf32 |= (utf8.at(2) & 0b00111111);
break;
case 4:
// 3 bits + 6 bits + 6 bits + 6 bits
utf32 |= (utf8.at(0) & 0b00000111) << (6 + 6 + 6);
utf32 |= (utf8.at(1) & 0b00111111) << (6 + 6);
utf32 |= (utf8.at(2) & 0b00111111) << 6;
utf32 |= (utf8.at(3) & 0b00111111);
break;
default:
return 0;
}
return utf32;
}
// Process a string, collating UTF-8 bytes, and sending them off for re-encoding to extended ASCII
// Not all InkHUD text is passed through here, only text which could potentially contain non-ASCII chars
std::string InkHUD::AppletFont::decodeUTF8(std::string encoded)
{
// Final processed output
std::string decoded;
// Holds bytes for one UTF-8 char during parsing
std::string utf8Char;
uint8_t utf8CharSize = 0;
for (char &c : encoded) {
// If first byte
if (utf8Char.empty()) {
// If MSB is unset, byte is an ASCII char
// If MSB is set, byte is part of a UTF-8 char. Counting number of higher-order bits tells how many bytes in char
if ((c & 0x80)) {
char c1 = c;
while (c1 & 0x80) {
c1 <<= 1;
utf8CharSize++;
}
}
}
// Append the byte to the UTF-8 char we're building
utf8Char += c;
// More bytes left to collect. Iterate.
if (utf8Char.length() < utf8CharSize)
continue;
// Now collected all bytes for this char
// Remap the value to match the encoding of our 8-bit AppletFont
decoded += applyEncoding(utf8Char);
// Reset, ready to build next UTF-8 char from the encoded bytes
utf8Char.clear();
utf8CharSize = 0;
} // For each char
// All chars processed, return result
return decoded;
}
// Re-encode a single UTF-8 character to extended ASCII
// Target encoding depends on the font
char InkHUD::AppletFont::applyEncoding(std::string utf8)
{
// ##################################################### Syntactic Sugar #####################################################
#define REMAP(in, out) \
case in: \
return out;
// ###########################################################################################################################
// Latin - Central Europe
// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT
if (encoding == WINDOWS_1250) {
// 1-Byte chars: no remapping
if (utf8.length() == 1)
return utf8.at(0);
// Multi-byte chars:
switch (toUtf32(utf8)) {
REMAP(0x20AC, 0x80); // EURO SIGN
REMAP(0x201A, 0x82); // SINGLE LOW-9 QUOTATION MARK
REMAP(0x201E, 0x84); // DOUBLE LOW-9 QUOTATION MARK
REMAP(0x2026, 0x85); // HORIZONTAL ELLIPSIS
REMAP(0x2020, 0x86); // DAGGER
REMAP(0x2021, 0x87); // DOUBLE DAGGER
REMAP(0x2030, 0x89); // PER MILLE SIGN
REMAP(0x0160, 0x8A); // LATIN CAPITAL LETTER S WITH CARON
REMAP(0x2039, 0x8B); // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
REMAP(0x015A, 0x8C); // LATIN CAPITAL LETTER S WITH ACUTE
REMAP(0x0164, 0x8D); // LATIN CAPITAL LETTER T WITH CARON
REMAP(0x017D, 0x8E); // LATIN CAPITAL LETTER Z WITH CARON
REMAP(0x0179, 0x8F); // LATIN CAPITAL LETTER Z WITH ACUTE
REMAP(0x2018, 0x91); // LEFT SINGLE QUOTATION MARK
REMAP(0x2019, 0x92); // RIGHT SINGLE QUOTATION MARK
REMAP(0x201C, 0x93); // LEFT DOUBLE QUOTATION MARK
REMAP(0x201D, 0x94); // RIGHT DOUBLE QUOTATION MARK
REMAP(0x2022, 0x95); // BULLET
REMAP(0x2013, 0x96); // EN DASH
REMAP(0x2014, 0x97); // EM DASH
REMAP(0x2122, 0x99); // TRADE MARK SIGN
REMAP(0x0161, 0x9A); // LATIN SMALL LETTER S WITH CARON
REMAP(0x203A, 0x9B); // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
REMAP(0x015B, 0x9C); // LATIN SMALL LETTER S WITH ACUTE
REMAP(0x0165, 0x9D); // LATIN SMALL LETTER T WITH CARON
REMAP(0x017E, 0x9E); // LATIN SMALL LETTER Z WITH CARON
REMAP(0x017A, 0x9F); // LATIN SMALL LETTER Z WITH ACUTE
REMAP(0x00A0, 0xA0); // NO-BREAK SPACE
REMAP(0x02C7, 0xA1); // CARON
REMAP(0x02D8, 0xA2); // BREVE
REMAP(0x0141, 0xA3); // LATIN CAPITAL LETTER L WITH STROKE
REMAP(0x00A4, 0xA4); // CURRENCY SIGN
REMAP(0x0104, 0xA5); // LATIN CAPITAL LETTER A WITH OGONEK
REMAP(0x00A6, 0xA6); // BROKEN BAR
REMAP(0x00A7, 0xA7); // SECTION SIGN
REMAP(0x00A8, 0xA8); // DIAERESIS
REMAP(0x00A9, 0xA9); // COPYRIGHT SIGN
REMAP(0x015E, 0xAA); // LATIN CAPITAL LETTER S WITH CEDILLA
REMAP(0x00AB, 0xAB); // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
REMAP(0x00AC, 0xAC); // NOT SIGN
REMAP(0x00AD, 0xAD); // SOFT HYPHEN
REMAP(0x00AE, 0xAE); // REGISTERED SIGN
REMAP(0x017B, 0xAF); // LATIN CAPITAL LETTER Z WITH DOT ABOVE
REMAP(0x00B0, 0xB0); // DEGREE SIGN
REMAP(0x00B1, 0xB1); // PLUS-MINUS SIGN
REMAP(0x02DB, 0xB2); // OGONEK
REMAP(0x0142, 0xB3); // LATIN SMALL LETTER L WITH STROKE
REMAP(0x00B4, 0xB4); // ACUTE ACCENT
REMAP(0x00B5, 0xB5); // MICRO SIGN
REMAP(0x00B6, 0xB6); // PILCROW SIGN
REMAP(0x00B7, 0xB7); // MIDDLE DOT
REMAP(0x00B8, 0xB8); // CEDILLA
REMAP(0x0105, 0xB9); // LATIN SMALL LETTER A WITH OGONEK
REMAP(0x015F, 0xBA); // LATIN SMALL LETTER S WITH CEDILLA
REMAP(0x00BB, 0xBB); // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
REMAP(0x013D, 0xBC); // LATIN CAPITAL LETTER L WITH CARON
REMAP(0x02DD, 0xBD); // DOUBLE ACUTE ACCENT
REMAP(0x013E, 0xBE); // LATIN SMALL LETTER L WITH CARON
REMAP(0x017C, 0xBF); // LATIN SMALL LETTER Z WITH DOT ABOVE
REMAP(0x0154, 0xC0); // LATIN CAPITAL LETTER R WITH ACUTE
REMAP(0x00C1, 0xC1); // LATIN CAPITAL LETTER A WITH ACUTE
REMAP(0x00C2, 0xC2); // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
REMAP(0x0102, 0xC3); // LATIN CAPITAL LETTER A WITH BREVE
REMAP(0x00C4, 0xC4); // LATIN CAPITAL LETTER A WITH DIAERESIS
REMAP(0x0139, 0xC5); // LATIN CAPITAL LETTER L WITH ACUTE
REMAP(0x0106, 0xC6); // LATIN CAPITAL LETTER C WITH ACUTE
REMAP(0x00C7, 0xC7); // LATIN CAPITAL LETTER C WITH CEDILLA
REMAP(0x010C, 0xC8); // LATIN CAPITAL LETTER C WITH CARON
REMAP(0x00C9, 0xC9); // LATIN CAPITAL LETTER E WITH ACUTE
REMAP(0x0118, 0xCA); // LATIN CAPITAL LETTER E WITH OGONEK
REMAP(0x00CB, 0xCB); // LATIN CAPITAL LETTER E WITH DIAERESIS
REMAP(0x011A, 0xCC); // LATIN CAPITAL LETTER E WITH CARON
REMAP(0x00CD, 0xCD); // LATIN CAPITAL LETTER I WITH ACUTE
REMAP(0x00CE, 0xCE); // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
REMAP(0x010E, 0xCF); // LATIN CAPITAL LETTER D WITH CARON
REMAP(0x0110, 0xD0); // LATIN CAPITAL LETTER D WITH STROKE
REMAP(0x0143, 0xD1); // LATIN CAPITAL LETTER N WITH ACUTE
REMAP(0x0147, 0xD2); // LATIN CAPITAL LETTER N WITH CARON
REMAP(0x00D3, 0xD3); // LATIN CAPITAL LETTER O WITH ACUTE
REMAP(0x00D4, 0xD4); // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
REMAP(0x0150, 0xD5); // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
REMAP(0x00D6, 0xD6); // LATIN CAPITAL LETTER O WITH DIAERESIS
REMAP(0x00D7, 0xD7); // MULTIPLICATION SIGN
REMAP(0x0158, 0xD8); // LATIN CAPITAL LETTER R WITH CARON
REMAP(0x016E, 0xD9); // LATIN CAPITAL LETTER U WITH RING ABOVE
REMAP(0x00DA, 0xDA); // LATIN CAPITAL LETTER U WITH ACUTE
REMAP(0x0170, 0xDB); // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
REMAP(0x00DC, 0xDC); // LATIN CAPITAL LETTER U WITH DIAERESIS
REMAP(0x00DD, 0xDD); // LATIN CAPITAL LETTER Y WITH ACUTE
REMAP(0x0162, 0xDE); // LATIN CAPITAL LETTER T WITH CEDILLA
REMAP(0x00DF, 0xDF); // LATIN SMALL LETTER SHARP S
REMAP(0x0155, 0xE0); // LATIN SMALL LETTER R WITH ACUTE
REMAP(0x00E1, 0xE1); // LATIN SMALL LETTER A WITH ACUTE
REMAP(0x00E2, 0xE2); // LATIN SMALL LETTER A WITH CIRCUMFLEX
REMAP(0x0103, 0xE3); // LATIN SMALL LETTER A WITH BREVE
REMAP(0x00E4, 0xE4); // LATIN SMALL LETTER A WITH DIAERESIS
REMAP(0x013A, 0xE5); // LATIN SMALL LETTER L WITH ACUTE
REMAP(0x0107, 0xE6); // LATIN SMALL LETTER C WITH ACUTE
REMAP(0x00E7, 0xE7); // LATIN SMALL LETTER C WITH CEDILLA
REMAP(0x010D, 0xE8); // LATIN SMALL LETTER C WITH CARON
REMAP(0x00E9, 0xE9); // LATIN SMALL LETTER E WITH ACUTE
REMAP(0x0119, 0xEA); // LATIN SMALL LETTER E WITH OGONEK
REMAP(0x00EB, 0xEB); // LATIN SMALL LETTER E WITH DIAERESIS
REMAP(0x011B, 0xEC); // LATIN SMALL LETTER E WITH CARON
REMAP(0x00ED, 0xED); // LATIN SMALL LETTER I WITH ACUTE
REMAP(0x00EE, 0xEE); // LATIN SMALL LETTER I WITH CIRCUMFLEX
REMAP(0x010F, 0xEF); // LATIN SMALL LETTER D WITH CARON
REMAP(0x0111, 0xF0); // LATIN SMALL LETTER D WITH STROKE
REMAP(0x0144, 0xF1); // LATIN SMALL LETTER N WITH ACUTE
REMAP(0x0148, 0xF2); // LATIN SMALL LETTER N WITH CARON
REMAP(0x00F3, 0xF3); // LATIN SMALL LETTER O WITH ACUTE
REMAP(0x00F4, 0xF4); // LATIN SMALL LETTER O WITH CIRCUMFLEX
REMAP(0x0151, 0xF5); // LATIN SMALL LETTER O WITH DOUBLE ACUTE
REMAP(0x00F6, 0xF6); // LATIN SMALL LETTER O WITH DIAERESIS
REMAP(0x00F7, 0xF7); // DIVISION SIGN
REMAP(0x0159, 0xF8); // LATIN SMALL LETTER R WITH CARON
REMAP(0x016F, 0xF9); // LATIN SMALL LETTER U WITH RING ABOVE
REMAP(0x00FA, 0xFA); // LATIN SMALL LETTER U WITH ACUTE
REMAP(0x0171, 0xFB); // LATIN SMALL LETTER U WITH DOUBLE ACUTE
REMAP(0x00FC, 0xFC); // LATIN SMALL LETTER U WITH DIAERESIS
REMAP(0x00FD, 0xFD); // LATIN SMALL LETTER Y WITH ACUTE
REMAP(0x0163, 0xFE); // LATIN SMALL LETTER T WITH CEDILLA
REMAP(0x02D9, 0xFF); // DOT ABOVE
}
}
// Latin - Cyrillic
// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT
else if (encoding == WINDOWS_1251) {
// 1-Byte chars: no remapping
if (utf8.length() == 1)
return utf8.at(0);
// Multi-byte chars:
switch (toUtf32(utf8)) {
REMAP(0x0402, 0x80); // CYRILLIC CAPITAL LETTER DJE
REMAP(0x0403, 0x81); // CYRILLIC CAPITAL LETTER GJE
REMAP(0x201A, 0x82); // SINGLE LOW-9 QUOTATION MARK
REMAP(0x0453, 0x83); // CYRILLIC SMALL LETTER GJE
REMAP(0x201E, 0x84); // DOUBLE LOW-9 QUOTATION MARK
REMAP(0x2026, 0x85); // HORIZONTAL ELLIPSIS
REMAP(0x2020, 0x86); // DAGGER
REMAP(0x2021, 0x87); // DOUBLE DAGGER
REMAP(0x20AC, 0x88); // EURO SIGN
REMAP(0x2030, 0x89); // PER MILLE SIGN
REMAP(0x0409, 0x8A); // CYRILLIC CAPITAL LETTER LJE
REMAP(0x2039, 0x8B); // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
REMAP(0x040A, 0x8C); // CYRILLIC CAPITAL LETTER NJE
REMAP(0x040C, 0x8D); // CYRILLIC CAPITAL LETTER KJE
REMAP(0x040B, 0x8E); // CYRILLIC CAPITAL LETTER TSHE
REMAP(0x040F, 0x8F); // CYRILLIC CAPITAL LETTER DZHE
REMAP(0x0452, 0x90); // CYRILLIC SMALL LETTER DJE
REMAP(0x2018, 0x91); // LEFT SINGLE QUOTATION MARK
REMAP(0x2019, 0x92); // RIGHT SINGLE QUOTATION MARK
REMAP(0x201C, 0x93); // LEFT DOUBLE QUOTATION MARK
REMAP(0x201D, 0x94); // RIGHT DOUBLE QUOTATION MARK
REMAP(0x2022, 0x95); // BULLET
REMAP(0x2013, 0x96); // EN DASH
REMAP(0x2014, 0x97); // EM DASH
REMAP(0x2122, 0x99); // TRADE MARK SIGN
REMAP(0x0459, 0x9A); // CYRILLIC SMALL LETTER LJE
REMAP(0x203A, 0x9B); // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
REMAP(0x045A, 0x9C); // CYRILLIC SMALL LETTER NJE
REMAP(0x045C, 0x9D); // CYRILLIC SMALL LETTER KJE
REMAP(0x045B, 0x9E); // CYRILLIC SMALL LETTER TSHE
REMAP(0x045F, 0x9F); // CYRILLIC SMALL LETTER DZHE
REMAP(0x00A0, 0xA0); // NO-BREAK SPACE
REMAP(0x040E, 0xA1); // CYRILLIC CAPITAL LETTER SHORT U
REMAP(0x045E, 0xA2); // CYRILLIC SMALL LETTER SHORT U
REMAP(0x0408, 0xA3); // CYRILLIC CAPITAL LETTER JE
REMAP(0x00A4, 0xA4); // CURRENCY SIGN
REMAP(0x0490, 0xA5); // CYRILLIC CAPITAL LETTER GHE WITH UPTURN
REMAP(0x00A6, 0xA6); // BROKEN BAR
REMAP(0x00A7, 0xA7); // SECTION SIGN
REMAP(0x0401, 0xA8); // CYRILLIC CAPITAL LETTER IO
REMAP(0x00A9, 0xA9); // COPYRIGHT SIGN
REMAP(0x0404, 0xAA); // CYRILLIC CAPITAL LETTER UKRAINIAN IE
REMAP(0x00AB, 0xAB); // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
REMAP(0x00AC, 0xAC); // NOT SIGN
REMAP(0x00AD, 0xAD); // SOFT HYPHEN
REMAP(0x00AE, 0xAE); // REGISTERED SIGN
REMAP(0x0407, 0xAF); // CYRILLIC CAPITAL LETTER YI
REMAP(0x00B0, 0xB0); // DEGREE SIGN
REMAP(0x00B1, 0xB1); // PLUS-MINUS SIGN
REMAP(0x0406, 0xB2); // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
REMAP(0x0456, 0xB3); // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
REMAP(0x0491, 0xB4); // CYRILLIC SMALL LETTER GHE WITH UPTURN
REMAP(0x00B5, 0xB5); // MICRO SIGN
REMAP(0x00B6, 0xB6); // PILCROW SIGN
REMAP(0x00B7, 0xB7); // MIDDLE DOT
REMAP(0x0451, 0xB8); // CYRILLIC SMALL LETTER IO
REMAP(0x2116, 0xB9); // NUMERO SIGN
REMAP(0x0454, 0xBA); // CYRILLIC SMALL LETTER UKRAINIAN IE
REMAP(0x00BB, 0xBB); // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
REMAP(0x0458, 0xBC); // CYRILLIC SMALL LETTER JE
REMAP(0x0405, 0xBD); // CYRILLIC CAPITAL LETTER DZE
REMAP(0x0455, 0xBE); // CYRILLIC SMALL LETTER DZE
REMAP(0x0457, 0xBF); // CYRILLIC SMALL LETTER YI
REMAP(0x0410, 0xC0); // CYRILLIC CAPITAL LETTER A
REMAP(0x0411, 0xC1); // CYRILLIC CAPITAL LETTER BE
REMAP(0x0412, 0xC2); // CYRILLIC CAPITAL LETTER VE
REMAP(0x0413, 0xC3); // CYRILLIC CAPITAL LETTER GHE
REMAP(0x0414, 0xC4); // CYRILLIC CAPITAL LETTER DE
REMAP(0x0415, 0xC5); // CYRILLIC CAPITAL LETTER IE
REMAP(0x0416, 0xC6); // CYRILLIC CAPITAL LETTER ZHE
REMAP(0x0417, 0xC7); // CYRILLIC CAPITAL LETTER ZE
REMAP(0x0418, 0xC8); // CYRILLIC CAPITAL LETTER I
REMAP(0x0419, 0xC9); // CYRILLIC CAPITAL LETTER SHORT I
REMAP(0x041A, 0xCA); // CYRILLIC CAPITAL LETTER KA
REMAP(0x041B, 0xCB); // CYRILLIC CAPITAL LETTER EL
REMAP(0x041C, 0xCC); // CYRILLIC CAPITAL LETTER EM
REMAP(0x041D, 0xCD); // CYRILLIC CAPITAL LETTER EN
REMAP(0x041E, 0xCE); // CYRILLIC CAPITAL LETTER O
REMAP(0x041F, 0xCF); // CYRILLIC CAPITAL LETTER PE
REMAP(0x0420, 0xD0); // CYRILLIC CAPITAL LETTER ER
REMAP(0x0421, 0xD1); // CYRILLIC CAPITAL LETTER ES
REMAP(0x0422, 0xD2); // CYRILLIC CAPITAL LETTER TE
REMAP(0x0423, 0xD3); // CYRILLIC CAPITAL LETTER U
REMAP(0x0424, 0xD4); // CYRILLIC CAPITAL LETTER EF
REMAP(0x0425, 0xD5); // CYRILLIC CAPITAL LETTER HA
REMAP(0x0426, 0xD6); // CYRILLIC CAPITAL LETTER TSE
REMAP(0x0427, 0xD7); // CYRILLIC CAPITAL LETTER CHE
REMAP(0x0428, 0xD8); // CYRILLIC CAPITAL LETTER SHA
REMAP(0x0429, 0xD9); // CYRILLIC CAPITAL LETTER SHCHA
REMAP(0x042A, 0xDA); // CYRILLIC CAPITAL LETTER HARD SIGN
REMAP(0x042B, 0xDB); // CYRILLIC CAPITAL LETTER YERU
REMAP(0x042C, 0xDC); // CYRILLIC CAPITAL LETTER SOFT SIGN
REMAP(0x042D, 0xDD); // CYRILLIC CAPITAL LETTER E
REMAP(0x042E, 0xDE); // CYRILLIC CAPITAL LETTER YU
REMAP(0x042F, 0xDF); // CYRILLIC CAPITAL LETTER YA
REMAP(0x0430, 0xE0); // CYRILLIC SMALL LETTER A
REMAP(0x0431, 0xE1); // CYRILLIC SMALL LETTER BE
REMAP(0x0432, 0xE2); // CYRILLIC SMALL LETTER VE
REMAP(0x0433, 0xE3); // CYRILLIC SMALL LETTER GHE
REMAP(0x0434, 0xE4); // CYRILLIC SMALL LETTER DE
REMAP(0x0435, 0xE5); // CYRILLIC SMALL LETTER IE
REMAP(0x0436, 0xE6); // CYRILLIC SMALL LETTER ZHE
REMAP(0x0437, 0xE7); // CYRILLIC SMALL LETTER ZE
REMAP(0x0438, 0xE8); // CYRILLIC SMALL LETTER I
REMAP(0x0439, 0xE9); // CYRILLIC SMALL LETTER SHORT I
REMAP(0x043A, 0xEA); // CYRILLIC SMALL LETTER KA
REMAP(0x043B, 0xEB); // CYRILLIC SMALL LETTER EL
REMAP(0x043C, 0xEC); // CYRILLIC SMALL LETTER EM
REMAP(0x043D, 0xED); // CYRILLIC SMALL LETTER EN
REMAP(0x043E, 0xEE); // CYRILLIC SMALL LETTER O
REMAP(0x043F, 0xEF); // CYRILLIC SMALL LETTER PE
REMAP(0x0440, 0xF0); // CYRILLIC SMALL LETTER ER
REMAP(0x0441, 0xF1); // CYRILLIC SMALL LETTER ES
REMAP(0x0442, 0xF2); // CYRILLIC SMALL LETTER TE
REMAP(0x0443, 0xF3); // CYRILLIC SMALL LETTER U
REMAP(0x0444, 0xF4); // CYRILLIC SMALL LETTER EF
REMAP(0x0445, 0xF5); // CYRILLIC SMALL LETTER HA
REMAP(0x0446, 0xF6); // CYRILLIC SMALL LETTER TSE
REMAP(0x0447, 0xF7); // CYRILLIC SMALL LETTER CHE
REMAP(0x0448, 0xF8); // CYRILLIC SMALL LETTER SHA
REMAP(0x0449, 0xF9); // CYRILLIC SMALL LETTER SHCHA
REMAP(0x044A, 0xFA); // CYRILLIC SMALL LETTER HARD SIGN
REMAP(0x044B, 0xFB); // CYRILLIC SMALL LETTER YERU
REMAP(0x044C, 0xFC); // CYRILLIC SMALL LETTER SOFT SIGN
REMAP(0x044D, 0xFD); // CYRILLIC SMALL LETTER E
REMAP(0x044E, 0xFE); // CYRILLIC SMALL LETTER YU
REMAP(0x044F, 0xFF); // CYRILLIC SMALL LETTER YA
}
}
// Latin - Western Europe
// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
else if (encoding == WINDOWS_1252) {
// 1-Byte chars: no remapping
if (utf8.length() == 1)
return utf8.at(0);
// Multi-byte chars:
switch (toUtf32(utf8)) {
REMAP(0x20AC, 0x80) // EURO SIGN
REMAP(0x201A, 0x82) // SINGLE LOW-9 QUOTATION MARK
REMAP(0x0192, 0x83) // LATIN SMALL LETTER F WITH HOOK
REMAP(0x201E, 0x84) // DOUBLE LOW-9 QUOTATION MARK
REMAP(0x2026, 0x85) // HORIZONTAL ELLIPSIS
REMAP(0x2020, 0x86) // DAGGER
REMAP(0x2021, 0x87) // DOUBLE DAGGER
REMAP(0x02C6, 0x88) // MODIFIER LETTER CIRCUMFLEX ACCENT
REMAP(0x2030, 0x89) // PER MILLE SIGN
REMAP(0x0160, 0x8A) // LATIN CAPITAL LETTER S WITH CARON
REMAP(0x2039, 0x8B) // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
REMAP(0x0152, 0x8C) // LATIN CAPITAL LIGATURE OE
REMAP(0x017D, 0x8E) // LATIN CAPITAL LETTER Z WITH CARON
REMAP(0x2018, 0x91) // LEFT SINGLE QUOTATION MARK
REMAP(0x2019, 0x92) // RIGHT SINGLE QUOTATION MARK
REMAP(0x201C, 0x93) // LEFT DOUBLE QUOTATION MARK
REMAP(0x201D, 0x94) // RIGHT DOUBLE QUOTATION MARK
REMAP(0x2022, 0x95) // BULLET
REMAP(0x2013, 0x96) // EN DASH
REMAP(0x2014, 0x97) // EM DASH
REMAP(0x02DC, 0x98) // SMALL TILDE
REMAP(0x2122, 0x99) // TRADE MARK SIGN
REMAP(0x0161, 0x9A) // LATIN SMALL LETTER S WITH CARON
REMAP(0x203A, 0x9B) // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
REMAP(0x0153, 0x9C) // LATIN SMALL LIGATURE OE
REMAP(0x017E, 0x9E) // LATIN SMALL LETTER Z WITH CARON
REMAP(0x0178, 0x9F) // LATIN CAPITAL LETTER Y WITH DIAERESIS
REMAP(0x00A0, 0xA0) // NO-BREAK SPACE
REMAP(0x00A1, 0xA1) // INVERTED EXCLAMATION MARK
REMAP(0x00A2, 0xA2) // CENT SIGN
REMAP(0x00A3, 0xA3) // POUND SIGN
REMAP(0x00A4, 0xA4) // CURRENCY SIGN
REMAP(0x00A5, 0xA5) // YEN SIGN
REMAP(0x00A6, 0xA6) // BROKEN BAR
REMAP(0x00A7, 0xA7) // SECTION SIGN
REMAP(0x00A8, 0xA8) // DIAERESIS
REMAP(0x00A9, 0xA9) // COPYRIGHT SIGN
REMAP(0x00AA, 0xAA) // FEMININE ORDINAL INDICATOR
REMAP(0x00AB, 0xAB) // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
REMAP(0x00AC, 0xAC) // NOT SIGN
REMAP(0x00AD, 0xAD) // SOFT HYPHEN
REMAP(0x00AE, 0xAE) // REGISTERED SIGN
REMAP(0x00AF, 0xAF) // MACRON
REMAP(0x00B0, 0xB0) // DEGREE SIGN
REMAP(0x00B1, 0xB1) // PLUS-MINUS SIGN
REMAP(0x00B2, 0xB2) // SUPERSCRIPT TWO
REMAP(0x00B3, 0xB3) // SUPERSCRIPT THREE
REMAP(0x00B4, 0xB4) // ACUTE ACCENT
REMAP(0x00B5, 0xB5) // MICRO SIGN
REMAP(0x00B6, 0xB6) // PILCROW SIGN
REMAP(0x00B7, 0xB7) // MIDDLE DOT
REMAP(0x00B8, 0xB8) // CEDILLA
REMAP(0x00B9, 0xB9) // SUPERSCRIPT ONE
REMAP(0x00BA, 0xBA) // MASCULINE ORDINAL INDICATOR
REMAP(0x00BB, 0xBB) // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
REMAP(0x00BC, 0xBC) // VULGAR FRACTION ONE QUARTER
REMAP(0x00BD, 0xBD) // VULGAR FRACTION ONE HALF
REMAP(0x00BE, 0xBE) // VULGAR FRACTION THREE QUARTERS
REMAP(0x00BF, 0xBF) // INVERTED QUESTION MARK
REMAP(0x00C0, 0xC0) // LATIN CAPITAL LETTER A WITH GRAVE
REMAP(0x00C1, 0xC1) // LATIN CAPITAL LETTER A WITH ACUTE
REMAP(0x00C2, 0xC2) // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
REMAP(0x00C3, 0xC3) // LATIN CAPITAL LETTER A WITH TILDE
REMAP(0x00C4, 0xC4) // LATIN CAPITAL LETTER A WITH DIAERESIS
REMAP(0x00C5, 0xC5) // LATIN CAPITAL LETTER A WITH RING ABOVE
REMAP(0x00C6, 0xC6) // LATIN CAPITAL LETTER AE
REMAP(0x00C7, 0xC7) // LATIN CAPITAL LETTER C WITH CEDILLA
REMAP(0x00C8, 0xC8) // LATIN CAPITAL LETTER E WITH GRAVE
REMAP(0x00C9, 0xC9) // LATIN CAPITAL LETTER E WITH ACUTE
REMAP(0x00CA, 0xCA) // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
REMAP(0x00CB, 0xCB) // LATIN CAPITAL LETTER E WITH DIAERESIS
REMAP(0x00CC, 0xCC) // LATIN CAPITAL LETTER I WITH GRAVE
REMAP(0x00CD, 0xCD) // LATIN CAPITAL LETTER I WITH ACUTE
REMAP(0x00CE, 0xCE) // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
REMAP(0x00CF, 0xCF) // LATIN CAPITAL LETTER I WITH DIAERESIS
REMAP(0x00D0, 0xD0) // LATIN CAPITAL LETTER ETH
REMAP(0x00D1, 0xD1) // LATIN CAPITAL LETTER N WITH TILDE
REMAP(0x00D2, 0xD2) // LATIN CAPITAL LETTER O WITH GRAVE
REMAP(0x00D3, 0xD3) // LATIN CAPITAL LETTER O WITH ACUTE
REMAP(0x00D4, 0xD4) // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
REMAP(0x00D5, 0xD5) // LATIN CAPITAL LETTER O WITH TILDE
REMAP(0x00D6, 0xD6) // LATIN CAPITAL LETTER O WITH DIAERESIS
REMAP(0x00D7, 0xD7) // MULTIPLICATION SIGN
REMAP(0x00D8, 0xD8) // LATIN CAPITAL LETTER O WITH STROKE
REMAP(0x00D9, 0xD9) // LATIN CAPITAL LETTER U WITH GRAVE
REMAP(0x00DA, 0xDA) // LATIN CAPITAL LETTER U WITH ACUTE
REMAP(0x00DB, 0xDB) // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
REMAP(0x00DC, 0xDC) // LATIN CAPITAL LETTER U WITH DIAERESIS
REMAP(0x00DD, 0xDD) // LATIN CAPITAL LETTER Y WITH ACUTE
REMAP(0x00DE, 0xDE) // LATIN CAPITAL LETTER THORN
REMAP(0x00DF, 0xDF) // LATIN SMALL LETTER SHARP S
REMAP(0x00E0, 0xE0) // LATIN SMALL LETTER A WITH GRAVE
REMAP(0x00E1, 0xE1) // LATIN SMALL LETTER A WITH ACUTE
REMAP(0x00E2, 0xE2) // LATIN SMALL LETTER A WITH CIRCUMFLEX
REMAP(0x00E3, 0xE3) // LATIN SMALL LETTER A WITH TILDE
REMAP(0x00E4, 0xE4) // LATIN SMALL LETTER A WITH DIAERESIS
REMAP(0x00E5, 0xE5) // LATIN SMALL LETTER A WITH RING ABOVE
REMAP(0x00E6, 0xE6) // LATIN SMALL LETTER AE
REMAP(0x00E7, 0xE7) // LATIN SMALL LETTER C WITH CEDILLA
REMAP(0x00E8, 0xE8) // LATIN SMALL LETTER E WITH GRAVE
REMAP(0x00E9, 0xE9) // LATIN SMALL LETTER E WITH ACUTE
REMAP(0x00EA, 0xEA) // LATIN SMALL LETTER E WITH CIRCUMFLEX
REMAP(0x00EB, 0xEB) // LATIN SMALL LETTER E WITH DIAERESIS
REMAP(0x00EC, 0xEC) // LATIN SMALL LETTER I WITH GRAVE
REMAP(0x00ED, 0xED) // LATIN SMALL LETTER I WITH ACUTE
REMAP(0x00EE, 0xEE) // LATIN SMALL LETTER I WITH CIRCUMFLEX
REMAP(0x00EF, 0xEF) // LATIN SMALL LETTER I WITH DIAERESIS
REMAP(0x00F0, 0xF0) // LATIN SMALL LETTER ETH
REMAP(0x00F1, 0xF1) // LATIN SMALL LETTER N WITH TILDE
REMAP(0x00F2, 0xF2) // LATIN SMALL LETTER O WITH GRAVE
REMAP(0x00F3, 0xF3) // LATIN SMALL LETTER O WITH ACUTE
REMAP(0x00F4, 0xF4) // LATIN SMALL LETTER O WITH CIRCUMFLEX
REMAP(0x00F5, 0xF5) // LATIN SMALL LETTER O WITH TILDE
REMAP(0x00F6, 0xF6) // LATIN SMALL LETTER O WITH DIAERESIS
REMAP(0x00F7, 0xF7) // DIVISION SIGN
REMAP(0x00F8, 0xF8) // LATIN SMALL LETTER O WITH STROKE
REMAP(0x00F9, 0xF9) // LATIN SMALL LETTER U WITH GRAVE
REMAP(0x00FA, 0xFA) // LATIN SMALL LETTER U WITH ACUTE
REMAP(0x00FB, 0xFB) // LATIN SMALL LETTER U WITH CIRCUMFLEX
REMAP(0x00FC, 0xFC) // LATIN SMALL LETTER U WITH DIAERESIS
REMAP(0x00FD, 0xFD) // LATIN SMALL LETTER Y WITH ACUTE
REMAP(0x00FE, 0xFE) // LATIN SMALL LETTER THORN
REMAP(0x00FF, 0xFF) // LATIN SMALL LETTER Y WITH DIAERESIS
}
}
else /*ASCII or Unhandled*/ {
if (utf8.length() == 1)
return utf8.at(0);
}
// All single-byte (ASCII) characters should have been handled by now
// Only unhandled multi-byte UTF8 characters should remain
assert(utf8.length() > 1);
// Parse emoji
// Strip emoji modifiers
switch (toUtf32(utf8)) {
REMAP(0x1F44D, 0x01) // 👍 Thumbs Up
REMAP(0x1F44E, 0x02) // 👎 Thumbs Down
REMAP(0x1F60A, 0x03) // 😊 Smiling Face with Smiling Eyes
REMAP(0x1F642, 0x03) // 🙂 Slightly Smiling Face
REMAP(0x1F601, 0x03) // 😁 Grinning Face with Smiling Eye
REMAP(0x1F602, 0x04) // 😂 Face with Tears of Joy
REMAP(0x1F923, 0x04) // 🤣 Rolling on the Floor Laughing
REMAP(0x1F606, 0x04) // 😆 Smiling with Open Mouth and Closed Eyes
REMAP(0x1F44B, 0x05) // 👋 Waving Hand
REMAP(0x02600, 0x06) // ☀ Sun
REMAP(0x1F31E, 0x06) // 🌞 Sun with Face
// 0x07 - Bell character (unused)
REMAP(0x1F327, 0x08) // 🌧️ Cloud with Rain
REMAP(0x02601, 0x09) // ☁️ Cloud
REMAP(0x1F32B, 0x09) // Fog
REMAP(0x1F9E1, 0x0B) // 🧡 Orange Heart
REMAP(0x02763, 0x0B) // ❣ Heart Exclamation
REMAP(0x02764, 0x0B) // ❤ Heart
REMAP(0x1F495, 0x0B) // 💕 Two Hearts
REMAP(0x1F496, 0x0B) // 💖 Sparkling Heart
REMAP(0x1F497, 0x0B) // 💗 Growing Heart
REMAP(0x1F498, 0x0B) // 💘 Heart with Arrow
REMAP(0x1F4A9, 0x0C) // 💩 Pile of Poo
// 0x0D - Carriage return (unused)
REMAP(0x1F514, 0x0E) // 🔔 Bell
REMAP(0x1F62D, 0x0F) // 😭 Loudly Crying Face
REMAP(0x1F622, 0x0F) // 😢 Crying Face
REMAP(0x1F64F, 0x10) // 🙏 Person with Folded Hands
REMAP(0x1F618, 0x11) // 😘 Face Throwing a Kiss
REMAP(0x1F389, 0x12) // 🎉 Party Popper
REMAP(0x1F600, 0x13) // 😀 Grinning Face
REMAP(0x1F603, 0x13) // 😃 Smiling Face with Open Mouth
REMAP(0x1F604, 0x13) // 😄 Smiling Face with Open Mouth and Smiling Eyes
REMAP(0x1F97A, 0x14) // 🥺 Face with Pleading Eyes
REMAP(0x1F605, 0x15) // 😅 Smiling with Sweat
REMAP(0x1F525, 0x16) // 🔥 Fire
REMAP(0x1F926, 0x17) // 🤦 Face Palm
REMAP(0x1F937, 0x18) // 🤷 Shrug
REMAP(0x1F644, 0x19) // 🙄 Face with Rolling Eyes
// 0x1A Substitution (unused)
REMAP(0x1F917, 0x1B) // 🤗 Hugging Face
REMAP(0x1F609, 0x1C) // 😉 Winking Face
REMAP(0x1F61C, 0x1C) // 😜 Face with Stuck-Out Tongue and Winking Eye
REMAP(0x1F60F, 0x1C) // 😏 Smirking Face
REMAP(0x1F914, 0x1D) // 🤔 Thinking Face
REMAP(0x1FAE1, 0x1E) // 🫡 Saluting Face
REMAP(0x1F44C, 0x1F) // 👌 OK Hand Sign
REMAP(0x02755, '!') // ❕
REMAP(0x02757, '!') // ❗
REMAP(0x0203C, '!') // ‼
REMAP(0x02753, '?') // ❓
REMAP(0x02754, '?') // ❔
REMAP(0x02049, '?') // ⁉
// Modifiers (deleted)
REMAP(0x02640, 0x7F) // Gender
REMAP(0x02642, 0x7F)
REMAP(0x1F3FB, 0x7F) // Skin Tones
REMAP(0x1F3FC, 0x7F)
REMAP(0x1F3FD, 0x7F)
REMAP(0x1F3FE, 0x7F)
REMAP(0x1F3FF, 0x7F)
REMAP(0x0FE00, 0x7F) // Variation Selectors
REMAP(0x0FE01, 0x7F)
REMAP(0x0FE02, 0x7F)
REMAP(0x0FE03, 0x7F)
REMAP(0x0FE04, 0x7F)
REMAP(0x0FE05, 0x7F)
REMAP(0x0FE06, 0x7F)
REMAP(0x0FE07, 0x7F)
REMAP(0x0FE08, 0x7F)
REMAP(0x0FE09, 0x7F)
REMAP(0x0FE0A, 0x7F)
REMAP(0x0FE0B, 0x7F)
REMAP(0x0FE0C, 0x7F)
REMAP(0x0FE0D, 0x7F)
REMAP(0x0FE0E, 0x7F)
REMAP(0x0FE0F, 0x7F)
REMAP(0x0200D, 0x7F) // Zero Width Joiner
}
// If not handled, return SUB
return '\x1A';
// Sweep up the syntactic sugar
// Don't want ants in the house
#undef REMAP
}
#endif