[U-Boot] [PATCH v2 13/17] efi_loader: capitalization table
Alexander Graf
agraf at suse.de
Sat Sep 1 09:43:45 UTC 2018
On 31.08.18 21:31, Heinrich Schuchardt wrote:
> This patch provides a define to initialize a table that maps lower to
> capital letters for Unicode code point 0x0000 - 0xffff.
>
> Signed-off-by: Heinrich Schuchardt <xypron.glpk at gmx.de>
> ---
> v2
> add shorter tables for code pages 437 and 1250
> ---
> MAINTAINERS | 1 +
> include/capitalization.h | 2028 ++++++++++++++++++++++++++++++++++++++
> 2 files changed, 2029 insertions(+)
> create mode 100644 include/capitalization.h
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 46f826a0fe..8c9cd83347 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -381,6 +381,7 @@ T: git git://github.com/agraf/u-boot.git
> F: doc/README.uefi
> F: doc/README.iscsi
> F: Documentation/efi.rst
> +F: include/capitalization.h
> F: include/efi*
> F: include/pe.h
> F: include/asm-generic/pe.h
> diff --git a/include/capitalization.h b/include/capitalization.h
> new file mode 100644
> index 0000000000..2c24e1bf47
> --- /dev/null
> +++ b/include/capitalization.h
> @@ -0,0 +1,2028 @@
> +/* SPDX-License-Identifier: Unicode-DFS-2016 */
> +/*
> + * Capitalization tables
> + */
> +
> +struct capitalization_table {
> + u16 upper;
> + u16 lower;
> +};
> +
> +/*
> + * Correspondence table for small and capital Unicode letters in the range of
> + * 0x0000 - 0xffff based on http://www.unicode.org/Public/UCA/11.0.0/allkeys.txt
> + */
> +#define UNICODE_CAPITALIZATION_TABLE { \
> + { 0x0531, /* ARMENIAN CAPITAL LETTER AYB */ \
> + 0x0561, /* ARMENIAN SMALL LETTER AYB */ }, \
[...]
> + { 0x2C7F, /* LATIN CAPITAL LETTER Z WITH SWASH TAIL */ \
> + 0x0240, /* LATIN SMALL LETTER Z WITH SWASH TAIL */ }, \
> + { 0x0000, /* END OF LIST CAPITAL LETTERS */ \
> + 0x0000, /* END OF LIST SMALL LETTERS */ }, \
> +}
> +
> +/*
> + * Correspondence table for small and capital letters of codepage 437.
> + */
> +#define CP437_CAPITALIZATION_TABLE { \
> + { 0x03a6, 0x03c6, }, \
I like how you added comments to each entry on what exactly the
character is. Please keep that habit in the trimmed down tables too.
> + { 0x03a3, 0x03c3, }, \
> + { 0x0041, 0x0061, }, \
> + { 0x00c4, 0x00e4, }, \
> + { 0x00c5, 0x00e5, }, \
> + { 0x00c6, 0x00e6, }, \
> + { 0x0042, 0x0062, }, \
> + { 0x0043, 0x0063, }, \
> + { 0x00c7, 0x00e7, }, \
> + { 0x0044, 0x0064, }, \
> + { 0x0045, 0x0065, }, \
> + { 0x00c9, 0x00e9, }, \
> + { 0x0046, 0x0066, }, \
> + { 0x0047, 0x0067, }, \
> + { 0x0048, 0x0068, }, \
> + { 0x0049, 0x0069, }, \
> + { 0x004a, 0x006a, }, \
> + { 0x004b, 0x006b, }, \
> + { 0x004c, 0x006c, }, \
> + { 0x004d, 0x006d, }, \
> + { 0x004e, 0x006e, }, \
> + { 0x00d1, 0x00f1, }, \
> + { 0x004f, 0x006f, }, \
> + { 0x00d6, 0x00f6, }, \
> + { 0x0050, 0x0070, }, \
> + { 0x0051, 0x0071, }, \
Most of these are just latin A to Z. These are already covered in your
conversion by code, no? So you can just omit them.
> + { 0x0052, 0x0072, }, \
> + { 0x0053, 0x0073, }, \
> + { 0x0054, 0x0074, }, \
> + { 0x0055, 0x0075, }, \
> + { 0x00dc, 0x00fc, }, \
> + { 0x0056, 0x0076, }, \
> + { 0x0057, 0x0077, }, \
> + { 0x0058, 0x0078, }, \
> + { 0x0059, 0x0079, }, \
> + { 0x005a, 0x007a, }, \
> + { 0x0000, 0x0000, }, \
... that would leave 11 entries for cp437 ...
> +}
> +
> +/*
> + * Correspondence table for small and capital letters of codepage 1250.
> + */
> +#define CP1250_CAPITALIZATION_TABLE { \
> + { 0x0041, 0x0061, }, \
Please sort the list by code point - or any other recognizable sorting
order ;).
> + { 0x00c1, 0x00e1, }, \
> + { 0x0102, 0x0103, }, \
> + { 0x00c2, 0x00e2, }, \
> + { 0x00c4, 0x00e4, }, \
> + { 0x0104, 0x0105, }, \
> + { 0x0042, 0x0062, }, \
> + { 0x0043, 0x0063, }, \
> + { 0x0106, 0x0107, }, \
> + { 0x010c, 0x010d, }, \
> + { 0x00c7, 0x00e7, }, \
> + { 0x0044, 0x0064, }, \
> + { 0x010e, 0x010f, }, \
> + { 0x0110, 0x0111, }, \
> + { 0x0045, 0x0065, }, \
> + { 0x00c9, 0x00e9, }, \
> + { 0x011a, 0x011b, }, \
> + { 0x00cb, 0x00eb, }, \
> + { 0x0118, 0x0119, }, \
> + { 0x0046, 0x0066, }, \
> + { 0x0047, 0x0067, }, \
> + { 0x0048, 0x0068, }, \
> + { 0x0049, 0x0069, }, \
> + { 0x00cd, 0x00ed, }, \
> + { 0x00ce, 0x00ee, }, \
> + { 0x004a, 0x006a, }, \
> + { 0x004b, 0x006b, }, \
> + { 0x004c, 0x006c, }, \
> + { 0x0139, 0x013a, }, \
> + { 0x013d, 0x013e, }, \
> + { 0x0141, 0x0142, }, \
> + { 0x004d, 0x006d, }, \
> + { 0x004e, 0x006e, }, \
> + { 0x0143, 0x0144, }, \
> + { 0x0147, 0x0148, }, \
> + { 0x004f, 0x006f, }, \
> + { 0x00d3, 0x00f3, }, \
> + { 0x00d4, 0x00f4, }, \
> + { 0x00d6, 0x00f6, }, \
> + { 0x0150, 0x0151, }, \
> + { 0x0050, 0x0070, }, \
> + { 0x0051, 0x0071, }, \
> + { 0x0052, 0x0072, }, \
> + { 0x0154, 0x0155, }, \
> + { 0x0158, 0x0159, }, \
> + { 0x0053, 0x0073, }, \
> + { 0x015a, 0x015b, }, \
> + { 0x0160, 0x0161, }, \
> + { 0x015e, 0x015f, }, \
> + { 0x0054, 0x0074, }, \
> + { 0x0164, 0x0165, }, \
> + { 0x0162, 0x0163, }, \
> + { 0x0055, 0x0075, }, \
> + { 0x00da, 0x00fa, }, \
> + { 0x00dc, 0x00fc, }, \
> + { 0x0170, 0x0171, }, \
> + { 0x016e, 0x016f, }, \
> + { 0x0056, 0x0076, }, \
> + { 0x0057, 0x0077, }, \
> + { 0x0058, 0x0078, }, \
> + { 0x0059, 0x0079, }, \
> + { 0x00dd, 0x00fd, }, \
> + { 0x005a, 0x007a, }, \
> + { 0x0179, 0x017a, }, \
> + { 0x017d, 0x017e, }, \
> + { 0x017b, 0x017c, }, \
... and 40 unique points for cp1250.
How about we just combine the two tables into one and call it "western"?
Alex
More information about the U-Boot
mailing list