From 997b57fc1a375811af84bc1d18a5e59957b2a937 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sun, 28 Feb 2021 18:17:28 +0100 Subject: [PATCH 1/6] efi_loader: console size of vidconsole If stdout is 'vidconsole', we correctly set the console size. If stdout is 'vidconsole,serial', the video console is ignored. We should always evaluate the size of vidconsole if it is the primary console. Signed-off-by: Heinrich Schuchardt --- lib/efi_loader/efi_console.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/efi_loader/efi_console.c b/lib/efi_loader/efi_console.c index edcfce7bec..c4003554c2 100644 --- a/lib/efi_loader/efi_console.c +++ b/lib/efi_loader/efi_console.c @@ -311,7 +311,7 @@ static void query_console_size(void) const char *stdout_name = env_get("stdout"); int rows = 25, cols = 80; - if (stdout_name && !strcmp(stdout_name, "vidconsole") && + if (stdout_name && !strncmp(stdout_name, "vidconsole", 10) && IS_ENABLED(CONFIG_DM_VIDEO)) { struct stdio_dev *stdout_dev = stdio_get_by_name("vidconsole"); From 70616a1ed8c7fe22aa19eb674915623bd236926f Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sat, 27 Feb 2021 14:08:35 +0100 Subject: [PATCH 2/6] efi_loader: move codepage 437 table Move the Unicode to codepage 437 table to charset.c Signed-off-by: Heinrich Schuchardt --- include/charset.h | 5 +++++ lib/charset.c | 6 ++++++ lib/efi_loader/efi_unicode_collation.c | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/include/charset.h b/include/charset.h index cc650a2ce7..64ba91f791 100644 --- a/include/charset.h +++ b/include/charset.h @@ -13,6 +13,11 @@ #define MAX_UTF8_PER_UTF16 3 +/** + * codepage_437 - Unicode to codepage 437 translation table + */ +extern const u16 codepage_437[128]; + /** * console_read_unicode() - read Unicode code point from console * diff --git a/lib/charset.c b/lib/charset.c index 2177014ee1..814847d165 100644 --- a/lib/charset.c +++ b/lib/charset.c @@ -8,9 +8,15 @@ #include #include #include +#include #include #include +/** + * codepage_437 - Unicode to codepage 437 translation table + */ +const u16 codepage_437[128] = CP437; + static struct capitalization_table capitalization_table[] = #ifdef CONFIG_EFI_UNICODE_CAPITALIZATION UNICODE_CAPITALIZATION_TABLE; diff --git a/lib/efi_loader/efi_unicode_collation.c b/lib/efi_loader/efi_unicode_collation.c index f6c875bc33..bf5314c4ff 100644 --- a/lib/efi_loader/efi_unicode_collation.c +++ b/lib/efi_loader/efi_unicode_collation.c @@ -23,7 +23,7 @@ static const char illegal[] = "+,<=>:;\"/\\|?*[]\x7f"; static const u16 codepage[] = CP1250; #else /* Unicode code points for code page 437 characters 0x80 - 0xff */ -static const u16 codepage[] = CP437; +static const u16 *codepage = codepage_437; #endif /* GUID of the EFI_UNICODE_COLLATION_PROTOCOL2 */ From 73bb90cabcdffcd528d1002a12779779196bf200 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sat, 27 Feb 2021 14:08:36 +0100 Subject: [PATCH 3/6] efi_loader: carve out utf_to_cp() Carve out a function to translate a Unicode code point to an 8bit codepage. Provide a unit test for the new function. Signed-off-by: Heinrich Schuchardt --- include/charset.h | 11 ++++++++++ lib/charset.c | 28 +++++++++++++++++++++++++ lib/efi_loader/efi_unicode_collation.c | 19 +++-------------- test/unicode_ut.c | 29 ++++++++++++++++++++++++++ 4 files changed, 71 insertions(+), 16 deletions(-) diff --git a/include/charset.h b/include/charset.h index 64ba91f791..52e7d1474e 100644 --- a/include/charset.h +++ b/include/charset.h @@ -275,4 +275,15 @@ u16 *u16_strdup(const void *src); */ uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size); +/** + * utf_to_cp() - translate Unicode code point to 8bit codepage + * + * Codepoints that do not exist in the codepage are rendered as question mark. + * + * @c: pointer to Unicode code point to be translated + * @codepage: Unicode to codepage translation table + * Return: 0 on success, -ENOENT if codepoint cannot be translated + */ +int utf_to_cp(s32 *c, const u16 *codepage); + #endif /* __CHARSET_H_ */ diff --git a/lib/charset.c b/lib/charset.c index 814847d165..1345c8f9f0 100644 --- a/lib/charset.c +++ b/lib/charset.c @@ -10,6 +10,7 @@ #include #include #include +#include #include /** @@ -472,3 +473,30 @@ uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size) return dest; } + +/** + * utf_to_cp() - translate Unicode code point to 8bit codepage + * + * Codepoints that do not exist in the codepage are rendered as question mark. + * + * @c: pointer to Unicode code point to be translated + * @codepage: Unicode to codepage translation table + * Return: 0 on success, -ENOENT if codepoint cannot be translated + */ +int utf_to_cp(s32 *c, const u16 *codepage) +{ + if (*c >= 0x80) { + int j; + + /* Look up codepage translation */ + for (j = 0; j < 0x80; ++j) { + if (*c == codepage[j]) { + *c = j + 0x80; + return 0; + } + } + *c = '?'; + return -ENOENT; + } + return 0; +} diff --git a/lib/efi_loader/efi_unicode_collation.c b/lib/efi_loader/efi_unicode_collation.c index bf5314c4ff..36be798f64 100644 --- a/lib/efi_loader/efi_unicode_collation.c +++ b/lib/efi_loader/efi_unicode_collation.c @@ -300,23 +300,10 @@ static bool EFIAPI efi_str_to_fat(struct efi_unicode_collation_protocol *this, break; } c = utf_to_upper(c); - if (c >= 0x80) { - int j; - - /* Look for codepage translation */ - for (j = 0; j < 0x80; ++j) { - if (c == codepage[j]) { - c = j + 0x80; - break; - } - } - if (j >= 0x80) { - c = '_'; - ret = true; - } - } else if (c && (c < 0x20 || strchr(illegal, c))) { - c = '_'; + if (utf_to_cp(&c, codepage) || + (c && (c < 0x20 || strchr(illegal, c)))) { ret = true; + c = '_'; } fat[i] = c; diff --git a/test/unicode_ut.c b/test/unicode_ut.c index 6130ef0b54..2cc6b5feff 100644 --- a/test/unicode_ut.c +++ b/test/unicode_ut.c @@ -595,6 +595,35 @@ static int unicode_test_u16_strsize(struct unit_test_state *uts) } UNICODE_TEST(unicode_test_u16_strsize); +static int unicode_test_utf_to_cp(struct unit_test_state *uts) +{ + int ret; + s32 c; + + c = '\n'; + ret = utf_to_cp(&c, codepage_437); + ut_asserteq(0, ret); + ut_asserteq('\n', c); + + c = 'a'; + ret = utf_to_cp(&c, codepage_437); + ut_asserteq(0, ret); + ut_asserteq('a', c); + + c = 0x03c4; /* Greek small letter tau */ + ret = utf_to_cp(&c, codepage_437); + ut_asserteq(0, ret); + ut_asserteq(0xe7, c); + + c = 0x03a4; /* Greek capital letter tau */ + ret = utf_to_cp(&c, codepage_437); + ut_asserteq(-ENOENT, ret); + ut_asserteq('?', c); + + return 0; +} +UNICODE_TEST(unicode_test_utf_to_cp); + #ifdef CONFIG_EFI_LOADER static int unicode_test_efi_create_indexed_name(struct unit_test_state *uts) { From ddbaff53da5b99563fa371db0b09544e139fdabb Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sat, 27 Feb 2021 14:08:37 +0100 Subject: [PATCH 4/6] lib/charset: utf8_get() should return error utf8_get() should return an error if hitting an illegal UTF-8 sequence and not silently convert the input to a question mark. Correct utf_8() and the its unit test. console_read_unicode() now will ignore illegal UTF-8 sequences. Signed-off-by: Heinrich Schuchardt --- lib/charset.c | 25 ++++++++++++++++--------- test/unicode_ut.c | 7 +++++++ 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/lib/charset.c b/lib/charset.c index 1345c8f9f0..946d5ee23e 100644 --- a/lib/charset.c +++ b/lib/charset.c @@ -32,7 +32,7 @@ static struct capitalization_table capitalization_table[] = * * @read_u8: - stream reader * @src: - string buffer passed to stream reader, optional - * Return: - Unicode code point + * Return: - Unicode code point, or -1 */ static int get_code(u8 (*read_u8)(void *data), void *data) { @@ -78,7 +78,7 @@ static int get_code(u8 (*read_u8)(void *data), void *data) } return ch; error: - return '?'; + return -1; } /** @@ -120,14 +120,21 @@ static u8 read_console(void *data) int console_read_unicode(s32 *code) { - if (!tstc()) { - /* No input available */ - return 1; - } + for (;;) { + s32 c; - /* Read Unicode code */ - *code = get_code(read_console, NULL); - return 0; + if (!tstc()) { + /* No input available */ + return 1; + } + + /* Read Unicode code */ + c = get_code(read_console, NULL); + if (c > 0) { + *code = c; + return 0; + } + } } s32 utf8_get(const char **src) diff --git a/test/unicode_ut.c b/test/unicode_ut.c index 2cc6b5feff..154361aea7 100644 --- a/test/unicode_ut.c +++ b/test/unicode_ut.c @@ -52,6 +52,7 @@ static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96, static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00}; static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00}; static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00}; +static const char j4[] = {0xa1, 0x00}; static int unicode_test_u16_strlen(struct unit_test_state *uts) { @@ -165,6 +166,12 @@ static int unicode_test_utf8_get(struct unit_test_state *uts) ut_asserteq(0x0001048d, code); ut_asserteq_ptr(s, d4 + 4); + /* Check illegal character */ + s = j4; + code = utf8_get((const char **)&s); + ut_asserteq(-1, code); + ut_asserteq_ptr(j4 + 1, s); + return 0; } UNICODE_TEST(unicode_test_utf8_get); From e91789e2f6611c0d7f3510691c154e524e7cfa43 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sat, 27 Feb 2021 14:08:38 +0100 Subject: [PATCH 5/6] lib/charset: UTF-8 stream conversion Provide functions to convert an UTF-8 stream to code page 437 or UTF-32. Add unit tests. Signed-off-by: Heinrich Schuchardt --- include/charset.h | 18 +++++++++++ lib/charset.c | 55 +++++++++++++++++++++++++++------ test/unicode_ut.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 9 deletions(-) diff --git a/include/charset.h b/include/charset.h index 52e7d1474e..a911160f19 100644 --- a/include/charset.h +++ b/include/charset.h @@ -286,4 +286,22 @@ uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size); */ int utf_to_cp(s32 *c, const u16 *codepage); +/** + * utf8_to_cp437_stream() - convert UTF-8 stream to codepage 437 + * + * @c: next UTF-8 character to convert + * @buffer: buffer, at least 5 characters + * Return: next codepage 437 character or 0 + */ +int utf8_to_cp437_stream(u8 c, char *buffer); + +/** + * utf8_to_utf32_stream() - convert UTF-8 stream to UTF-32 + * + * @c: next UTF-8 character to convert + * @buffer: buffer, at least 5 characters + * Return: next codepage 437 character or 0 + */ +int utf8_to_utf32_stream(u8 c, char *buffer); + #endif /* __CHARSET_H_ */ diff --git a/lib/charset.c b/lib/charset.c index 946d5ee23e..f44c58d9d8 100644 --- a/lib/charset.c +++ b/lib/charset.c @@ -481,15 +481,6 @@ uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size) return dest; } -/** - * utf_to_cp() - translate Unicode code point to 8bit codepage - * - * Codepoints that do not exist in the codepage are rendered as question mark. - * - * @c: pointer to Unicode code point to be translated - * @codepage: Unicode to codepage translation table - * Return: 0 on success, -ENOENT if codepoint cannot be translated - */ int utf_to_cp(s32 *c, const u16 *codepage) { if (*c >= 0x80) { @@ -507,3 +498,49 @@ int utf_to_cp(s32 *c, const u16 *codepage) } return 0; } + +int utf8_to_cp437_stream(u8 c, char *buffer) +{ + char *end; + const char *pos; + s32 s; + int ret; + + for (;;) { + pos = buffer; + end = buffer + strlen(buffer); + *end++ = c; + *end = 0; + s = utf8_get(&pos); + if (s > 0) { + *buffer = 0; + ret = utf_to_cp(&s, codepage_437); + return s; + } + if (pos == end) + return 0; + *buffer = 0; + } +} + +int utf8_to_utf32_stream(u8 c, char *buffer) +{ + char *end; + const char *pos; + s32 s; + + for (;;) { + pos = buffer; + end = buffer + strlen(buffer); + *end++ = c; + *end = 0; + s = utf8_get(&pos); + if (s > 0) { + *buffer = 0; + return s; + } + if (pos == end) + return 0; + *buffer = 0; + } +} diff --git a/test/unicode_ut.c b/test/unicode_ut.c index 154361aea7..6f6aea5f60 100644 --- a/test/unicode_ut.c +++ b/test/unicode_ut.c @@ -47,6 +47,9 @@ static const char d3[] = {0xe6, 0xbd, 0x9c, 0xe6, 0xb0, 0xb4, 0xe8, 0x89, /* Three letters translating to two utf-16 word each */ static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96, 0xf0, 0x90, 0x92, 0x87, 0x00}; +/* Letter not in code page 437 */ +static const char d5[] = {0xCE, 0x92, 0x20, 0x69, 0x73, 0x20, 0x6E, 0x6F, + 0x74, 0x20, 0x42, 0x00}; /* Illegal utf-8 strings */ static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00}; @@ -631,6 +634,81 @@ static int unicode_test_utf_to_cp(struct unit_test_state *uts) } UNICODE_TEST(unicode_test_utf_to_cp); +static void utf8_to_cp437_stream_helper(const char *in, char *out) +{ + char buffer[5]; + int ret; + + *buffer = 0; + for (; *in; ++in) { + ret = utf8_to_cp437_stream(*in, buffer); + if (ret) + *out++ = ret; + } + *out = 0; +} + +static int unicode_test_utf8_to_cp437_stream(struct unit_test_state *uts) +{ + char buf[16]; + + utf8_to_cp437_stream_helper(d1, buf); + ut_asserteq_str("U-Boot", buf); + utf8_to_cp437_stream_helper(d2, buf); + ut_asserteq_str("kafb\xa0tur", buf); + utf8_to_cp437_stream_helper(d5, buf); + ut_asserteq_str("? is not B", buf); + utf8_to_cp437_stream_helper(j2, buf); + ut_asserteq_str("j2l", buf); + + return 0; +} +UNICODE_TEST(unicode_test_utf8_to_cp437_stream); + +static void utf8_to_utf32_stream_helper(const char *in, s32 *out) +{ + char buffer[5]; + int ret; + + *buffer = 0; + for (; *in; ++in) { + ret = utf8_to_utf32_stream(*in, buffer); + if (ret) + *out++ = ret; + } + *out = 0; +} + +static int unicode_test_utf8_to_utf32_stream(struct unit_test_state *uts) +{ + s32 buf[16]; + + const u32 u1[] = {0x55, 0x2D, 0x42, 0x6F, 0x6F, 0x74, 0x0000}; + const u32 u2[] = {0x6B, 0x61, 0x66, 0x62, 0xE1, 0x74, 0x75, 0x72, 0x00}; + const u32 u3[] = {0x0392, 0x20, 0x69, 0x73, 0x20, 0x6E, 0x6F, 0x74, + 0x20, 0x42, 0x00}; + const u32 u4[] = {0x6A, 0x32, 0x6C, 0x00}; + + memset(buf, 0, sizeof(buf)); + utf8_to_utf32_stream_helper(d1, buf); + ut_asserteq_mem(u1, buf, sizeof(u1)); + + memset(buf, 0, sizeof(buf)); + utf8_to_utf32_stream_helper(d2, buf); + ut_asserteq_mem(u2, buf, sizeof(u2)); + + memset(buf, 0, sizeof(buf)); + utf8_to_utf32_stream_helper(d5, buf); + ut_asserteq_mem(u3, buf, sizeof(u3)); + + memset(buf, 0, sizeof(buf)); + utf8_to_utf32_stream_helper(j2, buf); + ut_asserteq_mem(u4, buf, sizeof(u4)); + + return 0; +} +UNICODE_TEST(unicode_test_utf8_to_utf32_stream); + #ifdef CONFIG_EFI_LOADER static int unicode_test_efi_create_indexed_name(struct unit_test_state *uts) { From 7d3eff3412886f277c724a9effcbe545c4cdd5b5 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Tue, 2 Mar 2021 08:07:19 +0100 Subject: [PATCH 6/6] efi_loader: correct uboot_bin_env.its file format Up to now the EFI capsule Python tests were always skipped. The reason is that mkimage fails with: uboot_bin_env.its:13.21-23.5: Warning (unit_address_vs_reg): /images/u-boot-bin@100000: node has a unit name, but no reg property uboot_bin_env.its:24.21-34.5: Warning (unit_address_vs_reg): /images/u-boot-env@150000: node has a unit name, but no reg property If a unit in a device-tree has an address, a reg property must be provided. But adding a reg property is not the solution here. Since 2017 unit addresses are disallowed for FIT, cf. common/image-fit.c:1624. So remove the unit addresses in uboot_bin_env.its. Signed-off-by: Heinrich Schuchardt --- test/py/tests/test_efi_capsule/uboot_bin_env.its | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/py/tests/test_efi_capsule/uboot_bin_env.its b/test/py/tests/test_efi_capsule/uboot_bin_env.its index 31e2f8049f..fc65907481 100644 --- a/test/py/tests/test_efi_capsule/uboot_bin_env.its +++ b/test/py/tests/test_efi_capsule/uboot_bin_env.its @@ -10,7 +10,7 @@ #address-cells = <2>; images { - u-boot-bin@100000 { + u-boot-bin { description = "U-Boot binary on SPI Flash"; data = /incbin/("BINFILE1"); compression = "none"; @@ -21,7 +21,7 @@ algo = "sha1"; }; }; - u-boot-env@150000 { + u-boot-env { description = "U-Boot environment on SPI Flash"; data = /incbin/("BINFILE2"); compression = "none";