[U-Boot] [PATCH 6/8] video/console: Convert UTF-8 codes to CP437 code points

Andre Przywara andre.przywara at arm.com
Sat Mar 23 01:30:00 UTC 2019


The character set used by U-Boot's built-in fonts is the old "code
page 437" (from the original IBM PC).
However people would probably expect UTF-8 on a terminal these days, the
UEFI code definitely does.

Provide a conversion routine to convert a UTF-8 byte stream into a CP437
character code. This uses a combination of arrays and switch/case
statements to provide an efficient way of translating the large Unicode
character range to the 8 bits used for CP437.

This fixes UEFI display on the DM_VIDEO console, which were garbled for
any non-ASCII characters, for instance for the block graphic characters
used by Grub to display the menu.

Signed-off-by: Andre Przywara <andre.przywara at arm.com>
---
 drivers/video/Makefile            |   1 +
 drivers/video/utf8_cp437.c        | 170 ++++++++++++++++++++++++++++++++++++++
 drivers/video/vidconsole-uclass.c |   8 +-
 include/video_console.h           |   9 ++
 4 files changed, 186 insertions(+), 2 deletions(-)
 create mode 100644 drivers/video/utf8_cp437.c

diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 671f037c35..8decf407bb 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_CONSOLE_TRUETYPE) += console_truetype.o fonts/
 obj-$(CONFIG_DISPLAY) += display-uclass.o
 obj-$(CONFIG_DM_VIDEO) += backlight-uclass.o
 obj-$(CONFIG_DM_VIDEO) += panel-uclass.o simple_panel.o
+obj-$(CONFIG_DM_VIDEO) += utf8_cp437.o
 obj-$(CONFIG_DM_VIDEO) += video-uclass.o vidconsole-uclass.o
 obj-$(CONFIG_DM_VIDEO) += video_bmp.o
 endif
diff --git a/drivers/video/utf8_cp437.c b/drivers/video/utf8_cp437.c
new file mode 100644
index 0000000000..983da39406
--- /dev/null
+++ b/drivers/video/utf8_cp437.c
@@ -0,0 +1,170 @@
+/*
+ * Convert UTF-8 bytes into a code page 437 character.
+ * Based on the table in the Code_page_437 Wikipedia page.
+ */
+
+#include <stdint.h>
+
+static uint8_t code_points_00a0[] = {
+	255, 173, 155, 156,   7, 157,   7,  21,
+	  7,   7, 166, 174, 170,   7,   7,   7,
+	248, 241, 253,   7,   7, 230,  20, 250,
+	  7,   7, 167, 175, 172, 171,   7, 168,
+	  7,   7,   7,   7, 142, 143, 146, 128,
+	  7, 144,   7,   7,   7,   7,   7,   7,
+	  7, 165,   7,   7,   7,   7, 153,   7,
+	  7,   7,   7,   7, 154,   7,   7, 225,
+	133, 160, 131,   7, 132, 134, 145, 135,
+	138, 130, 136, 137, 141, 161, 140, 139,
+	  7, 164, 149, 162, 147,   7, 148, 246,
+	  7, 151, 163, 150, 129,   7,   7, 152,
+};
+
+static uint8_t code_points_2550[] = {
+	205, 186, 213, 214, 201, 184, 183, 187,
+	212, 211, 200, 190, 189, 188, 198, 199,
+	204, 181, 182, 185, 209, 210, 203, 207,
+	208, 202, 216, 215, 206
+};
+
+static uint8_t utf8_convert_11bit(uint16_t code)
+{
+	switch (code) {
+	case 0x0192: return 159;
+	case 0x0393: return 226;
+	case 0x0398: return 233;
+	case 0x03A3: return 228;
+	case 0x03A6: return 232;
+	case 0x03A9: return 234;
+	case 0x03B1: return 224;
+	case 0x03B4: return 235;
+	case 0x03B5: return 238;
+	case 0x03C0: return 227;
+	case 0x03C3: return 229;
+	case 0x03C4: return 231;
+	case 0x03C6: return 237;
+	}
+
+	return 0;
+};
+
+static uint8_t utf8_convert_2xxx(uint16_t code)
+{
+	switch (code) {
+	case 0x2022: return 7;
+	case 0x203C: return 19;
+	case 0x207F: return 252;
+	case 0x20A7: return 158;
+	case 0x2190: return 27;
+	case 0x2191: return 24;
+	case 0x2192: return 26;
+	case 0x2193: return 25;
+	case 0x2194: return 29;
+	case 0x2195: return 18;
+	case 0x21A8: return 23;
+	case 0x2219: return 249;
+	case 0x221A: return 251;
+	case 0x221E: return 236;
+	case 0x221F: return 28;
+	case 0x2229: return 239;
+	case 0x2248: return 247;
+	case 0x2261: return 240;
+	case 0x2264: return 243;
+	case 0x2265: return 242;
+	case 0x2310: return 169;
+	case 0x2320: return 244;
+	case 0x2321: return 245;
+	case 0x2500: return 196;
+	case 0x2502: return 179;
+	case 0x250C: return 218;
+	case 0x2510: return 191;
+	case 0x2514: return 192;
+	case 0x2518: return 217;
+	case 0x251C: return 195;
+	case 0x2524: return 180;
+	case 0x252C: return 194;
+	case 0x2534: return 193;
+	case 0x253C: return 197;
+	case 0x2580: return 223;
+	case 0x2584: return 220;
+	case 0x2588: return 219;
+	case 0x258C: return 221;
+	case 0x2590: return 222;
+	case 0x2591: return 176;
+	case 0x2592: return 177;
+	case 0x2593: return 178;
+	case 0x25A0: return 254;
+	case 0x25AC: return 22;
+	case 0x25B2: return 30;
+	case 0x25BA: return 16;
+	case 0x25BC: return 31;
+	case 0x25C4: return 17;
+	case 0x25CB: return 9;
+	case 0x25D8: return 8;
+	case 0x25D9: return 10;
+	case 0x263A: return 1;
+	case 0x263B: return 2;
+	case 0x263C: return 15;
+	case 0x2640: return 12;
+	case 0x2642: return 11;
+	case 0x2660: return 6;
+	case 0x2663: return 5;
+	case 0x2665: return 3;
+	case 0x2666: return 4;
+	case 0x266A: return 13;
+	case 0x266B: return 14;
+	}
+
+	return 0;
+}
+
+uint8_t convert_uc16_to_cp437(uint16_t code)
+{
+	if (code < 0x7f)		// ASCII
+		return code;
+	if (code < 0xa0)		// high control characters
+		return code;
+	if (code < 0x100)		// international characters
+		return code_points_00a0[code - 0xa0];
+	if (code < 0x800)
+		return utf8_convert_11bit(code);
+	if (code >= 0x2550 && code < 0x256d)	// block graphics
+		return code_points_2550[code - 0x2550];
+
+	return utf8_convert_2xxx(code);
+}
+
+uint8_t convert_utf8_to_cp437(uint8_t c, uint32_t *esc)
+{
+	int shift;
+	uint32_t ucp;
+
+	if (c < 127)			// ASCII
+		return c;
+	if (c == 127)
+		return 8;		// DEL (?)
+
+	switch (c & 0xf0) {
+	case 0xc0: case 0xd0:		// two bytes sequence
+		*esc = (1U << 24) | ((c & 0x1f) << 6);
+		return 0;
+	case 0xe0:			// three bytes sequence
+		*esc = (2U << 24) | ((c & 0x0f) << 12);
+		return 0;
+	case 0xf0:			// four bytes sequence
+		*esc = (3U << 24) | ((c & 0x07) << 18);
+		return 0;
+	case 0x80: case 0x90: case 0xa0: case 0xb0:	// continuation
+		shift = (*esc >> 24) - 1;
+		ucp = *esc & 0xffffff;
+		if (shift) {
+			*esc = (shift << 24) | ucp | (c & 0x3f) << (shift * 6);
+			return 0;
+		}
+		*esc = 0;
+
+		return convert_uc16_to_cp437(ucp | (c & 0x3f));
+	}
+
+	return 0;
+}
diff --git a/drivers/video/vidconsole-uclass.c b/drivers/video/vidconsole-uclass.c
index e16567029a..275c6c05c8 100644
--- a/drivers/video/vidconsole-uclass.c
+++ b/drivers/video/vidconsole-uclass.c
@@ -457,7 +457,7 @@ error:
 	priv->escape = 0;
 }
 
-/* Put that actual character on the screen (using the CP437 code page). */
+/* Put that actual character on the screen (using the font native code page). */
 static int vidconsole_output_glyph(struct udevice *dev, char ch)
 {
 	struct vidconsole_priv *priv = dev_get_uclass_priv(dev);
@@ -486,6 +486,7 @@ static int vidconsole_output_glyph(struct udevice *dev, char ch)
 int vidconsole_put_char(struct udevice *dev, char ch)
 {
 	struct vidconsole_priv *priv = dev_get_uclass_priv(dev);
+	uint8_t glyph_idx;
 	int ret;
 
 	if (priv->escape) {
@@ -520,7 +521,10 @@ int vidconsole_put_char(struct udevice *dev, char ch)
 		priv->last_ch = 0;
 		break;
 	default:
-		ret = vidconsole_output_glyph(dev, ch);
+		glyph_idx = convert_utf8_to_cp437(ch, &priv->ucs);
+		if (glyph_idx == 0)	/* UTF-8 continuation */
+			return 0;
+		ret = vidconsole_output_glyph(dev, glyph_idx);
 		if (ret < 0)
 			return ret;
 		break;
diff --git a/include/video_console.h b/include/video_console.h
index 52a41ac200..07e5fd0226 100644
--- a/include/video_console.h
+++ b/include/video_console.h
@@ -81,6 +81,7 @@ struct vidconsole_priv {
 	int escape_len;
 	int row_saved;
 	int col_saved;
+	u32 ucs;
 	char escape_buf[32];
 };
 
@@ -240,6 +241,14 @@ void vidconsole_position_cursor(struct udevice *dev, unsigned col,
  */
 u32 vid_console_color(struct video_priv *priv, unsigned int idx);
 
+/*
+ * Convert an UTF-8 byte into the corresponding character in the CP437
+ * code page. Returns 0 if that character is part of a multi-byte sequence.
+ * for which *esc holds the state of. Repeatedly feed in more bytes until
+ * the return value is not 0 anymore.
+ */
+uint8_t convert_utf8_to_cp437(uint8_t c, uint32_t *esc);
+
 #endif
 
 #endif
-- 
2.14.5



More information about the U-Boot mailing list