[U-Boot] [PATCH 6/8] video/console: Convert UTF-8 codes to CP437 code points
Andre Przywara
andre.przywara at arm.com
Sat Mar 23 01:30:00 UTC 2019
The character set used by U-Boot's built-in fonts is the old "code
page 437" (from the original IBM PC).
However people would probably expect UTF-8 on a terminal these days, the
UEFI code definitely does.
Provide a conversion routine to convert a UTF-8 byte stream into a CP437
character code. This uses a combination of arrays and switch/case
statements to provide an efficient way of translating the large Unicode
character range to the 8 bits used for CP437.
This fixes UEFI display on the DM_VIDEO console, which were garbled for
any non-ASCII characters, for instance for the block graphic characters
used by Grub to display the menu.
Signed-off-by: Andre Przywara <andre.przywara at arm.com>
---
drivers/video/Makefile | 1 +
drivers/video/utf8_cp437.c | 170 ++++++++++++++++++++++++++++++++++++++
drivers/video/vidconsole-uclass.c | 8 +-
include/video_console.h | 9 ++
4 files changed, 186 insertions(+), 2 deletions(-)
create mode 100644 drivers/video/utf8_cp437.c
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 671f037c35..8decf407bb 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_CONSOLE_TRUETYPE) += console_truetype.o fonts/
obj-$(CONFIG_DISPLAY) += display-uclass.o
obj-$(CONFIG_DM_VIDEO) += backlight-uclass.o
obj-$(CONFIG_DM_VIDEO) += panel-uclass.o simple_panel.o
+obj-$(CONFIG_DM_VIDEO) += utf8_cp437.o
obj-$(CONFIG_DM_VIDEO) += video-uclass.o vidconsole-uclass.o
obj-$(CONFIG_DM_VIDEO) += video_bmp.o
endif
diff --git a/drivers/video/utf8_cp437.c b/drivers/video/utf8_cp437.c
new file mode 100644
index 0000000000..983da39406
--- /dev/null
+++ b/drivers/video/utf8_cp437.c
@@ -0,0 +1,170 @@
+/*
+ * Convert UTF-8 bytes into a code page 437 character.
+ * Based on the table in the Code_page_437 Wikipedia page.
+ */
+
+#include <stdint.h>
+
+static uint8_t code_points_00a0[] = {
+ 255, 173, 155, 156, 7, 157, 7, 21,
+ 7, 7, 166, 174, 170, 7, 7, 7,
+ 248, 241, 253, 7, 7, 230, 20, 250,
+ 7, 7, 167, 175, 172, 171, 7, 168,
+ 7, 7, 7, 7, 142, 143, 146, 128,
+ 7, 144, 7, 7, 7, 7, 7, 7,
+ 7, 165, 7, 7, 7, 7, 153, 7,
+ 7, 7, 7, 7, 154, 7, 7, 225,
+ 133, 160, 131, 7, 132, 134, 145, 135,
+ 138, 130, 136, 137, 141, 161, 140, 139,
+ 7, 164, 149, 162, 147, 7, 148, 246,
+ 7, 151, 163, 150, 129, 7, 7, 152,
+};
+
+static uint8_t code_points_2550[] = {
+ 205, 186, 213, 214, 201, 184, 183, 187,
+ 212, 211, 200, 190, 189, 188, 198, 199,
+ 204, 181, 182, 185, 209, 210, 203, 207,
+ 208, 202, 216, 215, 206
+};
+
+static uint8_t utf8_convert_11bit(uint16_t code)
+{
+ switch (code) {
+ case 0x0192: return 159;
+ case 0x0393: return 226;
+ case 0x0398: return 233;
+ case 0x03A3: return 228;
+ case 0x03A6: return 232;
+ case 0x03A9: return 234;
+ case 0x03B1: return 224;
+ case 0x03B4: return 235;
+ case 0x03B5: return 238;
+ case 0x03C0: return 227;
+ case 0x03C3: return 229;
+ case 0x03C4: return 231;
+ case 0x03C6: return 237;
+ }
+
+ return 0;
+};
+
+static uint8_t utf8_convert_2xxx(uint16_t code)
+{
+ switch (code) {
+ case 0x2022: return 7;
+ case 0x203C: return 19;
+ case 0x207F: return 252;
+ case 0x20A7: return 158;
+ case 0x2190: return 27;
+ case 0x2191: return 24;
+ case 0x2192: return 26;
+ case 0x2193: return 25;
+ case 0x2194: return 29;
+ case 0x2195: return 18;
+ case 0x21A8: return 23;
+ case 0x2219: return 249;
+ case 0x221A: return 251;
+ case 0x221E: return 236;
+ case 0x221F: return 28;
+ case 0x2229: return 239;
+ case 0x2248: return 247;
+ case 0x2261: return 240;
+ case 0x2264: return 243;
+ case 0x2265: return 242;
+ case 0x2310: return 169;
+ case 0x2320: return 244;
+ case 0x2321: return 245;
+ case 0x2500: return 196;
+ case 0x2502: return 179;
+ case 0x250C: return 218;
+ case 0x2510: return 191;
+ case 0x2514: return 192;
+ case 0x2518: return 217;
+ case 0x251C: return 195;
+ case 0x2524: return 180;
+ case 0x252C: return 194;
+ case 0x2534: return 193;
+ case 0x253C: return 197;
+ case 0x2580: return 223;
+ case 0x2584: return 220;
+ case 0x2588: return 219;
+ case 0x258C: return 221;
+ case 0x2590: return 222;
+ case 0x2591: return 176;
+ case 0x2592: return 177;
+ case 0x2593: return 178;
+ case 0x25A0: return 254;
+ case 0x25AC: return 22;
+ case 0x25B2: return 30;
+ case 0x25BA: return 16;
+ case 0x25BC: return 31;
+ case 0x25C4: return 17;
+ case 0x25CB: return 9;
+ case 0x25D8: return 8;
+ case 0x25D9: return 10;
+ case 0x263A: return 1;
+ case 0x263B: return 2;
+ case 0x263C: return 15;
+ case 0x2640: return 12;
+ case 0x2642: return 11;
+ case 0x2660: return 6;
+ case 0x2663: return 5;
+ case 0x2665: return 3;
+ case 0x2666: return 4;
+ case 0x266A: return 13;
+ case 0x266B: return 14;
+ }
+
+ return 0;
+}
+
+uint8_t convert_uc16_to_cp437(uint16_t code)
+{
+ if (code < 0x7f) // ASCII
+ return code;
+ if (code < 0xa0) // high control characters
+ return code;
+ if (code < 0x100) // international characters
+ return code_points_00a0[code - 0xa0];
+ if (code < 0x800)
+ return utf8_convert_11bit(code);
+ if (code >= 0x2550 && code < 0x256d) // block graphics
+ return code_points_2550[code - 0x2550];
+
+ return utf8_convert_2xxx(code);
+}
+
+uint8_t convert_utf8_to_cp437(uint8_t c, uint32_t *esc)
+{
+ int shift;
+ uint32_t ucp;
+
+ if (c < 127) // ASCII
+ return c;
+ if (c == 127)
+ return 8; // DEL (?)
+
+ switch (c & 0xf0) {
+ case 0xc0: case 0xd0: // two bytes sequence
+ *esc = (1U << 24) | ((c & 0x1f) << 6);
+ return 0;
+ case 0xe0: // three bytes sequence
+ *esc = (2U << 24) | ((c & 0x0f) << 12);
+ return 0;
+ case 0xf0: // four bytes sequence
+ *esc = (3U << 24) | ((c & 0x07) << 18);
+ return 0;
+ case 0x80: case 0x90: case 0xa0: case 0xb0: // continuation
+ shift = (*esc >> 24) - 1;
+ ucp = *esc & 0xffffff;
+ if (shift) {
+ *esc = (shift << 24) | ucp | (c & 0x3f) << (shift * 6);
+ return 0;
+ }
+ *esc = 0;
+
+ return convert_uc16_to_cp437(ucp | (c & 0x3f));
+ }
+
+ return 0;
+}
diff --git a/drivers/video/vidconsole-uclass.c b/drivers/video/vidconsole-uclass.c
index e16567029a..275c6c05c8 100644
--- a/drivers/video/vidconsole-uclass.c
+++ b/drivers/video/vidconsole-uclass.c
@@ -457,7 +457,7 @@ error:
priv->escape = 0;
}
-/* Put that actual character on the screen (using the CP437 code page). */
+/* Put that actual character on the screen (using the font native code page). */
static int vidconsole_output_glyph(struct udevice *dev, char ch)
{
struct vidconsole_priv *priv = dev_get_uclass_priv(dev);
@@ -486,6 +486,7 @@ static int vidconsole_output_glyph(struct udevice *dev, char ch)
int vidconsole_put_char(struct udevice *dev, char ch)
{
struct vidconsole_priv *priv = dev_get_uclass_priv(dev);
+ uint8_t glyph_idx;
int ret;
if (priv->escape) {
@@ -520,7 +521,10 @@ int vidconsole_put_char(struct udevice *dev, char ch)
priv->last_ch = 0;
break;
default:
- ret = vidconsole_output_glyph(dev, ch);
+ glyph_idx = convert_utf8_to_cp437(ch, &priv->ucs);
+ if (glyph_idx == 0) /* UTF-8 continuation */
+ return 0;
+ ret = vidconsole_output_glyph(dev, glyph_idx);
if (ret < 0)
return ret;
break;
diff --git a/include/video_console.h b/include/video_console.h
index 52a41ac200..07e5fd0226 100644
--- a/include/video_console.h
+++ b/include/video_console.h
@@ -81,6 +81,7 @@ struct vidconsole_priv {
int escape_len;
int row_saved;
int col_saved;
+ u32 ucs;
char escape_buf[32];
};
@@ -240,6 +241,14 @@ void vidconsole_position_cursor(struct udevice *dev, unsigned col,
*/
u32 vid_console_color(struct video_priv *priv, unsigned int idx);
+/*
+ * Convert an UTF-8 byte into the corresponding character in the CP437
+ * code page. Returns 0 if that character is part of a multi-byte sequence.
+ * for which *esc holds the state of. Repeatedly feed in more bytes until
+ * the return value is not 0 anymore.
+ */
+uint8_t convert_utf8_to_cp437(uint8_t c, uint32_t *esc);
+
#endif
#endif
--
2.14.5
More information about the U-Boot
mailing list