[PATCH 3/6] lib/charset: utf8_get() should return error
Heinrich Schuchardt
xypron.glpk at gmx.de
Sat Feb 27 14:08:37 CET 2021
utf8_get() should return an error if hitting an illegal UTF-8 sequence and
not silently convert the input to a question mark.
Correct utf_8() and the its unit test.
console_read_unicode() now will ignore illegal UTF-8 sequences.
Signed-off-by: Heinrich Schuchardt <xypron.glpk at gmx.de>
---
lib/charset.c | 25 ++++++++++++++++---------
test/unicode_ut.c | 7 +++++++
2 files changed, 23 insertions(+), 9 deletions(-)
diff --git a/lib/charset.c b/lib/charset.c
index 1345c8f9f0..946d5ee23e 100644
--- a/lib/charset.c
+++ b/lib/charset.c
@@ -32,7 +32,7 @@ static struct capitalization_table capitalization_table[] =
*
* @read_u8: - stream reader
* @src: - string buffer passed to stream reader, optional
- * Return: - Unicode code point
+ * Return: - Unicode code point, or -1
*/
static int get_code(u8 (*read_u8)(void *data), void *data)
{
@@ -78,7 +78,7 @@ static int get_code(u8 (*read_u8)(void *data), void *data)
}
return ch;
error:
- return '?';
+ return -1;
}
/**
@@ -120,14 +120,21 @@ static u8 read_console(void *data)
int console_read_unicode(s32 *code)
{
- if (!tstc()) {
- /* No input available */
- return 1;
- }
+ for (;;) {
+ s32 c;
- /* Read Unicode code */
- *code = get_code(read_console, NULL);
- return 0;
+ if (!tstc()) {
+ /* No input available */
+ return 1;
+ }
+
+ /* Read Unicode code */
+ c = get_code(read_console, NULL);
+ if (c > 0) {
+ *code = c;
+ return 0;
+ }
+ }
}
s32 utf8_get(const char **src)
diff --git a/test/unicode_ut.c b/test/unicode_ut.c
index 2cc6b5feff..154361aea7 100644
--- a/test/unicode_ut.c
+++ b/test/unicode_ut.c
@@ -52,6 +52,7 @@ static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96,
static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00};
static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00};
static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00};
+static const char j4[] = {0xa1, 0x00};
static int unicode_test_u16_strlen(struct unit_test_state *uts)
{
@@ -165,6 +166,12 @@ static int unicode_test_utf8_get(struct unit_test_state *uts)
ut_asserteq(0x0001048d, code);
ut_asserteq_ptr(s, d4 + 4);
+ /* Check illegal character */
+ s = j4;
+ code = utf8_get((const char **)&s);
+ ut_asserteq(-1, code);
+ ut_asserteq_ptr(j4 + 1, s);
+
return 0;
}
UNICODE_TEST(unicode_test_utf8_get);
--
2.30.0
More information about the U-Boot
mailing list