mirror of
https://github.com/u-boot/u-boot.git
synced 2025-05-03 10:17:21 +00:00
lib/charset: UTF-8 stream conversion
Provide functions to convert an UTF-8 stream to code page 437 or UTF-32. Add unit tests. Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
This commit is contained in:
parent
ddbaff53da
commit
e91789e2f6
3 changed files with 142 additions and 9 deletions
|
@ -286,4 +286,22 @@ uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size);
|
||||||
*/
|
*/
|
||||||
int utf_to_cp(s32 *c, const u16 *codepage);
|
int utf_to_cp(s32 *c, const u16 *codepage);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* utf8_to_cp437_stream() - convert UTF-8 stream to codepage 437
|
||||||
|
*
|
||||||
|
* @c: next UTF-8 character to convert
|
||||||
|
* @buffer: buffer, at least 5 characters
|
||||||
|
* Return: next codepage 437 character or 0
|
||||||
|
*/
|
||||||
|
int utf8_to_cp437_stream(u8 c, char *buffer);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* utf8_to_utf32_stream() - convert UTF-8 stream to UTF-32
|
||||||
|
*
|
||||||
|
* @c: next UTF-8 character to convert
|
||||||
|
* @buffer: buffer, at least 5 characters
|
||||||
|
* Return: next codepage 437 character or 0
|
||||||
|
*/
|
||||||
|
int utf8_to_utf32_stream(u8 c, char *buffer);
|
||||||
|
|
||||||
#endif /* __CHARSET_H_ */
|
#endif /* __CHARSET_H_ */
|
||||||
|
|
|
@ -481,15 +481,6 @@ uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
|
||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* utf_to_cp() - translate Unicode code point to 8bit codepage
|
|
||||||
*
|
|
||||||
* Codepoints that do not exist in the codepage are rendered as question mark.
|
|
||||||
*
|
|
||||||
* @c: pointer to Unicode code point to be translated
|
|
||||||
* @codepage: Unicode to codepage translation table
|
|
||||||
* Return: 0 on success, -ENOENT if codepoint cannot be translated
|
|
||||||
*/
|
|
||||||
int utf_to_cp(s32 *c, const u16 *codepage)
|
int utf_to_cp(s32 *c, const u16 *codepage)
|
||||||
{
|
{
|
||||||
if (*c >= 0x80) {
|
if (*c >= 0x80) {
|
||||||
|
@ -507,3 +498,49 @@ int utf_to_cp(s32 *c, const u16 *codepage)
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int utf8_to_cp437_stream(u8 c, char *buffer)
|
||||||
|
{
|
||||||
|
char *end;
|
||||||
|
const char *pos;
|
||||||
|
s32 s;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
pos = buffer;
|
||||||
|
end = buffer + strlen(buffer);
|
||||||
|
*end++ = c;
|
||||||
|
*end = 0;
|
||||||
|
s = utf8_get(&pos);
|
||||||
|
if (s > 0) {
|
||||||
|
*buffer = 0;
|
||||||
|
ret = utf_to_cp(&s, codepage_437);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
if (pos == end)
|
||||||
|
return 0;
|
||||||
|
*buffer = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int utf8_to_utf32_stream(u8 c, char *buffer)
|
||||||
|
{
|
||||||
|
char *end;
|
||||||
|
const char *pos;
|
||||||
|
s32 s;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
pos = buffer;
|
||||||
|
end = buffer + strlen(buffer);
|
||||||
|
*end++ = c;
|
||||||
|
*end = 0;
|
||||||
|
s = utf8_get(&pos);
|
||||||
|
if (s > 0) {
|
||||||
|
*buffer = 0;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
if (pos == end)
|
||||||
|
return 0;
|
||||||
|
*buffer = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -47,6 +47,9 @@ static const char d3[] = {0xe6, 0xbd, 0x9c, 0xe6, 0xb0, 0xb4, 0xe8, 0x89,
|
||||||
/* Three letters translating to two utf-16 word each */
|
/* Three letters translating to two utf-16 word each */
|
||||||
static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96,
|
static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96,
|
||||||
0xf0, 0x90, 0x92, 0x87, 0x00};
|
0xf0, 0x90, 0x92, 0x87, 0x00};
|
||||||
|
/* Letter not in code page 437 */
|
||||||
|
static const char d5[] = {0xCE, 0x92, 0x20, 0x69, 0x73, 0x20, 0x6E, 0x6F,
|
||||||
|
0x74, 0x20, 0x42, 0x00};
|
||||||
|
|
||||||
/* Illegal utf-8 strings */
|
/* Illegal utf-8 strings */
|
||||||
static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00};
|
static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00};
|
||||||
|
@ -631,6 +634,81 @@ static int unicode_test_utf_to_cp(struct unit_test_state *uts)
|
||||||
}
|
}
|
||||||
UNICODE_TEST(unicode_test_utf_to_cp);
|
UNICODE_TEST(unicode_test_utf_to_cp);
|
||||||
|
|
||||||
|
static void utf8_to_cp437_stream_helper(const char *in, char *out)
|
||||||
|
{
|
||||||
|
char buffer[5];
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
*buffer = 0;
|
||||||
|
for (; *in; ++in) {
|
||||||
|
ret = utf8_to_cp437_stream(*in, buffer);
|
||||||
|
if (ret)
|
||||||
|
*out++ = ret;
|
||||||
|
}
|
||||||
|
*out = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int unicode_test_utf8_to_cp437_stream(struct unit_test_state *uts)
|
||||||
|
{
|
||||||
|
char buf[16];
|
||||||
|
|
||||||
|
utf8_to_cp437_stream_helper(d1, buf);
|
||||||
|
ut_asserteq_str("U-Boot", buf);
|
||||||
|
utf8_to_cp437_stream_helper(d2, buf);
|
||||||
|
ut_asserteq_str("kafb\xa0tur", buf);
|
||||||
|
utf8_to_cp437_stream_helper(d5, buf);
|
||||||
|
ut_asserteq_str("? is not B", buf);
|
||||||
|
utf8_to_cp437_stream_helper(j2, buf);
|
||||||
|
ut_asserteq_str("j2l", buf);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
UNICODE_TEST(unicode_test_utf8_to_cp437_stream);
|
||||||
|
|
||||||
|
static void utf8_to_utf32_stream_helper(const char *in, s32 *out)
|
||||||
|
{
|
||||||
|
char buffer[5];
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
*buffer = 0;
|
||||||
|
for (; *in; ++in) {
|
||||||
|
ret = utf8_to_utf32_stream(*in, buffer);
|
||||||
|
if (ret)
|
||||||
|
*out++ = ret;
|
||||||
|
}
|
||||||
|
*out = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int unicode_test_utf8_to_utf32_stream(struct unit_test_state *uts)
|
||||||
|
{
|
||||||
|
s32 buf[16];
|
||||||
|
|
||||||
|
const u32 u1[] = {0x55, 0x2D, 0x42, 0x6F, 0x6F, 0x74, 0x0000};
|
||||||
|
const u32 u2[] = {0x6B, 0x61, 0x66, 0x62, 0xE1, 0x74, 0x75, 0x72, 0x00};
|
||||||
|
const u32 u3[] = {0x0392, 0x20, 0x69, 0x73, 0x20, 0x6E, 0x6F, 0x74,
|
||||||
|
0x20, 0x42, 0x00};
|
||||||
|
const u32 u4[] = {0x6A, 0x32, 0x6C, 0x00};
|
||||||
|
|
||||||
|
memset(buf, 0, sizeof(buf));
|
||||||
|
utf8_to_utf32_stream_helper(d1, buf);
|
||||||
|
ut_asserteq_mem(u1, buf, sizeof(u1));
|
||||||
|
|
||||||
|
memset(buf, 0, sizeof(buf));
|
||||||
|
utf8_to_utf32_stream_helper(d2, buf);
|
||||||
|
ut_asserteq_mem(u2, buf, sizeof(u2));
|
||||||
|
|
||||||
|
memset(buf, 0, sizeof(buf));
|
||||||
|
utf8_to_utf32_stream_helper(d5, buf);
|
||||||
|
ut_asserteq_mem(u3, buf, sizeof(u3));
|
||||||
|
|
||||||
|
memset(buf, 0, sizeof(buf));
|
||||||
|
utf8_to_utf32_stream_helper(j2, buf);
|
||||||
|
ut_asserteq_mem(u4, buf, sizeof(u4));
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
UNICODE_TEST(unicode_test_utf8_to_utf32_stream);
|
||||||
|
|
||||||
#ifdef CONFIG_EFI_LOADER
|
#ifdef CONFIG_EFI_LOADER
|
||||||
static int unicode_test_efi_create_indexed_name(struct unit_test_state *uts)
|
static int unicode_test_efi_create_indexed_name(struct unit_test_state *uts)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Reference in a new issue