[U-Boot] [PATCH v2 13/17] efi_loader: capitalization table

Alexander Graf agraf at suse.de
Sat Sep 1 09:50:03 UTC 2018



On 01.09.18 11:43, Alexander Graf wrote:
> 
> 
> On 31.08.18 21:31, Heinrich Schuchardt wrote:
>> This patch provides a define to initialize a table that maps lower to
>> capital letters for Unicode code point 0x0000 - 0xffff.
>>
>> Signed-off-by: Heinrich Schuchardt <xypron.glpk at gmx.de>
>> ---
>> v2
>> 	add shorter tables for code pages 437 and 1250
>> ---
>>  MAINTAINERS              |    1 +
>>  include/capitalization.h | 2028 ++++++++++++++++++++++++++++++++++++++
>>  2 files changed, 2029 insertions(+)
>>  create mode 100644 include/capitalization.h
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index 46f826a0fe..8c9cd83347 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -381,6 +381,7 @@ T:	git git://github.com/agraf/u-boot.git
>>  F:	doc/README.uefi
>>  F:	doc/README.iscsi
>>  F:	Documentation/efi.rst
>> +F:	include/capitalization.h
>>  F:	include/efi*
>>  F:	include/pe.h
>>  F:	include/asm-generic/pe.h
>> diff --git a/include/capitalization.h b/include/capitalization.h
>> new file mode 100644
>> index 0000000000..2c24e1bf47
>> --- /dev/null
>> +++ b/include/capitalization.h
>> @@ -0,0 +1,2028 @@
>> +/* SPDX-License-Identifier: Unicode-DFS-2016 */
>> +/*
>> + * Capitalization tables
>> + */
>> +
>> +struct capitalization_table {
>> +	u16 upper;
>> +	u16 lower;
>> +};
>> +
>> +/*
>> + * Correspondence table for small and capital Unicode letters in the range of
>> + * 0x0000 - 0xffff based on http://www.unicode.org/Public/UCA/11.0.0/allkeys.txt
>> + */
>> +#define UNICODE_CAPITALIZATION_TABLE { \
>> +	{ 0x0531, /* ARMENIAN CAPITAL LETTER AYB */ \
>> +	  0x0561, /* ARMENIAN SMALL LETTER AYB */ }, \
> 
> 
> [...]
> 
>> +	{ 0x2C7F, /* LATIN CAPITAL LETTER Z WITH SWASH TAIL */ \
>> +	  0x0240, /* LATIN SMALL LETTER Z WITH SWASH TAIL */ }, \
>> +	{ 0x0000, /* END OF LIST CAPITAL LETTERS */ \
>> +	  0x0000, /* END OF LIST SMALL LETTERS */ }, \
>> +}
>> +
>> +/*
>> + * Correspondence table for small and capital letters of codepage 437.
>> + */
>> +#define CP437_CAPITALIZATION_TABLE { \
>> +	{ 0x03a6, 0x03c6, }, \
> 
> I like how you added comments to each entry on what exactly the
> character is. Please keep that habit in the trimmed down tables too.
> 
>> +	{ 0x03a3, 0x03c3, }, \
>> +	{ 0x0041, 0x0061, }, \
>> +	{ 0x00c4, 0x00e4, }, \
>> +	{ 0x00c5, 0x00e5, }, \
>> +	{ 0x00c6, 0x00e6, }, \
>> +	{ 0x0042, 0x0062, }, \
>> +	{ 0x0043, 0x0063, }, \
>> +	{ 0x00c7, 0x00e7, }, \
>> +	{ 0x0044, 0x0064, }, \
>> +	{ 0x0045, 0x0065, }, \
>> +	{ 0x00c9, 0x00e9, }, \
>> +	{ 0x0046, 0x0066, }, \
>> +	{ 0x0047, 0x0067, }, \
>> +	{ 0x0048, 0x0068, }, \
>> +	{ 0x0049, 0x0069, }, \
>> +	{ 0x004a, 0x006a, }, \
>> +	{ 0x004b, 0x006b, }, \
>> +	{ 0x004c, 0x006c, }, \
>> +	{ 0x004d, 0x006d, }, \
>> +	{ 0x004e, 0x006e, }, \
>> +	{ 0x00d1, 0x00f1, }, \
>> +	{ 0x004f, 0x006f, }, \
>> +	{ 0x00d6, 0x00f6, }, \
>> +	{ 0x0050, 0x0070, }, \
>> +	{ 0x0051, 0x0071, }, \
> 
> Most of these are just latin A to Z. These are already covered in your
> conversion by code, no? So you can just omit them.
> 
>> +	{ 0x0052, 0x0072, }, \
>> +	{ 0x0053, 0x0073, }, \
>> +	{ 0x0054, 0x0074, }, \
>> +	{ 0x0055, 0x0075, }, \
>> +	{ 0x00dc, 0x00fc, }, \
>> +	{ 0x0056, 0x0076, }, \
>> +	{ 0x0057, 0x0077, }, \
>> +	{ 0x0058, 0x0078, }, \
>> +	{ 0x0059, 0x0079, }, \
>> +	{ 0x005a, 0x007a, }, \
>> +	{ 0x0000, 0x0000, }, \
> 
> ... that would leave 11 entries for cp437 ...
> 
>> +}
>> +
>> +/*
>> + * Correspondence table for small and capital letters of codepage 1250.
>> + */
>> +#define CP1250_CAPITALIZATION_TABLE { \
>> +	{ 0x0041, 0x0061, }, \
> 
> Please sort the list by code point - or any other recognizable sorting
> order ;).
> 
>> +	{ 0x00c1, 0x00e1, }, \
>> +	{ 0x0102, 0x0103, }, \
>> +	{ 0x00c2, 0x00e2, }, \
>> +	{ 0x00c4, 0x00e4, }, \
>> +	{ 0x0104, 0x0105, }, \
>> +	{ 0x0042, 0x0062, }, \
>> +	{ 0x0043, 0x0063, }, \
>> +	{ 0x0106, 0x0107, }, \
>> +	{ 0x010c, 0x010d, }, \
>> +	{ 0x00c7, 0x00e7, }, \
>> +	{ 0x0044, 0x0064, }, \
>> +	{ 0x010e, 0x010f, }, \
>> +	{ 0x0110, 0x0111, }, \
>> +	{ 0x0045, 0x0065, }, \
>> +	{ 0x00c9, 0x00e9, }, \
>> +	{ 0x011a, 0x011b, }, \
>> +	{ 0x00cb, 0x00eb, }, \
>> +	{ 0x0118, 0x0119, }, \
>> +	{ 0x0046, 0x0066, }, \
>> +	{ 0x0047, 0x0067, }, \
>> +	{ 0x0048, 0x0068, }, \
>> +	{ 0x0049, 0x0069, }, \
>> +	{ 0x00cd, 0x00ed, }, \
>> +	{ 0x00ce, 0x00ee, }, \
>> +	{ 0x004a, 0x006a, }, \
>> +	{ 0x004b, 0x006b, }, \
>> +	{ 0x004c, 0x006c, }, \
>> +	{ 0x0139, 0x013a, }, \
>> +	{ 0x013d, 0x013e, }, \
>> +	{ 0x0141, 0x0142, }, \
>> +	{ 0x004d, 0x006d, }, \
>> +	{ 0x004e, 0x006e, }, \
>> +	{ 0x0143, 0x0144, }, \
>> +	{ 0x0147, 0x0148, }, \
>> +	{ 0x004f, 0x006f, }, \
>> +	{ 0x00d3, 0x00f3, }, \
>> +	{ 0x00d4, 0x00f4, }, \
>> +	{ 0x00d6, 0x00f6, }, \
>> +	{ 0x0150, 0x0151, }, \
>> +	{ 0x0050, 0x0070, }, \
>> +	{ 0x0051, 0x0071, }, \
>> +	{ 0x0052, 0x0072, }, \
>> +	{ 0x0154, 0x0155, }, \
>> +	{ 0x0158, 0x0159, }, \
>> +	{ 0x0053, 0x0073, }, \
>> +	{ 0x015a, 0x015b, }, \
>> +	{ 0x0160, 0x0161, }, \
>> +	{ 0x015e, 0x015f, }, \
>> +	{ 0x0054, 0x0074, }, \
>> +	{ 0x0164, 0x0165, }, \
>> +	{ 0x0162, 0x0163, }, \
>> +	{ 0x0055, 0x0075, }, \
>> +	{ 0x00da, 0x00fa, }, \
>> +	{ 0x00dc, 0x00fc, }, \
>> +	{ 0x0170, 0x0171, }, \
>> +	{ 0x016e, 0x016f, }, \
>> +	{ 0x0056, 0x0076, }, \
>> +	{ 0x0057, 0x0077, }, \
>> +	{ 0x0058, 0x0078, }, \
>> +	{ 0x0059, 0x0079, }, \
>> +	{ 0x00dd, 0x00fd, }, \
>> +	{ 0x005a, 0x007a, }, \
>> +	{ 0x0179, 0x017a, }, \
>> +	{ 0x017d, 0x017e, }, \
>> +	{ 0x017b, 0x017c, }, \
> 
> ... and 40 unique points for cp1250.
> 
> How about we just combine the two tables into one and call it "western"?

Actually, thinking about it again, keeping the tables separate is
probably a good idea.


Alex


More information about the U-Boot mailing list