xref: /rk3399_rockchip-uboot/lib/charset.c (revision 407d40f45fa9ead49c6b5f6750e82f727edfe0f0)
1*407d40f4SRob Clark /*
2*407d40f4SRob Clark  *  charset conversion utils
3*407d40f4SRob Clark  *
4*407d40f4SRob Clark  *  Copyright (c) 2017 Rob Clark
5*407d40f4SRob Clark  *
6*407d40f4SRob Clark  *  SPDX-License-Identifier:     GPL-2.0+
7*407d40f4SRob Clark  */
8*407d40f4SRob Clark 
9*407d40f4SRob Clark #include <common.h>
10*407d40f4SRob Clark #include <charset.h>
11*407d40f4SRob Clark #include <malloc.h>
12*407d40f4SRob Clark 
13*407d40f4SRob Clark /*
14*407d40f4SRob Clark  * utf8/utf16 conversion mostly lifted from grub
15*407d40f4SRob Clark  */
16*407d40f4SRob Clark 
utf16_strlen(const uint16_t * in)17*407d40f4SRob Clark size_t utf16_strlen(const uint16_t *in)
18*407d40f4SRob Clark {
19*407d40f4SRob Clark 	size_t i;
20*407d40f4SRob Clark 	for (i = 0; in[i]; i++);
21*407d40f4SRob Clark 	return i;
22*407d40f4SRob Clark }
23*407d40f4SRob Clark 
utf16_strnlen(const uint16_t * in,size_t count)24*407d40f4SRob Clark size_t utf16_strnlen(const uint16_t *in, size_t count)
25*407d40f4SRob Clark {
26*407d40f4SRob Clark 	size_t i;
27*407d40f4SRob Clark 	for (i = 0; count-- && in[i]; i++);
28*407d40f4SRob Clark 	return i;
29*407d40f4SRob Clark }
30*407d40f4SRob Clark 
utf16_strcpy(uint16_t * dest,const uint16_t * src)31*407d40f4SRob Clark uint16_t *utf16_strcpy(uint16_t *dest, const uint16_t *src)
32*407d40f4SRob Clark {
33*407d40f4SRob Clark 	uint16_t *tmp = dest;
34*407d40f4SRob Clark 
35*407d40f4SRob Clark 	while ((*dest++ = *src++) != '\0')
36*407d40f4SRob Clark 		/* nothing */;
37*407d40f4SRob Clark 	return tmp;
38*407d40f4SRob Clark 
39*407d40f4SRob Clark }
40*407d40f4SRob Clark 
utf16_strdup(const uint16_t * s)41*407d40f4SRob Clark uint16_t *utf16_strdup(const uint16_t *s)
42*407d40f4SRob Clark {
43*407d40f4SRob Clark 	uint16_t *new;
44*407d40f4SRob Clark 	if (!s || !(new = malloc((utf16_strlen(s) + 1) * 2)))
45*407d40f4SRob Clark 		return NULL;
46*407d40f4SRob Clark 	utf16_strcpy(new, s);
47*407d40f4SRob Clark 	return new;
48*407d40f4SRob Clark }
49*407d40f4SRob Clark 
50*407d40f4SRob Clark /* Convert UTF-16 to UTF-8.  */
utf16_to_utf8(uint8_t * dest,const uint16_t * src,size_t size)51*407d40f4SRob Clark uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
52*407d40f4SRob Clark {
53*407d40f4SRob Clark 	uint32_t code_high = 0;
54*407d40f4SRob Clark 
55*407d40f4SRob Clark 	while (size--) {
56*407d40f4SRob Clark 		uint32_t code = *src++;
57*407d40f4SRob Clark 
58*407d40f4SRob Clark 		if (code_high) {
59*407d40f4SRob Clark 			if (code >= 0xDC00 && code <= 0xDFFF) {
60*407d40f4SRob Clark 				/* Surrogate pair.  */
61*407d40f4SRob Clark 				code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;
62*407d40f4SRob Clark 
63*407d40f4SRob Clark 				*dest++ = (code >> 18) | 0xF0;
64*407d40f4SRob Clark 				*dest++ = ((code >> 12) & 0x3F) | 0x80;
65*407d40f4SRob Clark 				*dest++ = ((code >> 6) & 0x3F) | 0x80;
66*407d40f4SRob Clark 				*dest++ = (code & 0x3F) | 0x80;
67*407d40f4SRob Clark 			} else {
68*407d40f4SRob Clark 				/* Error...  */
69*407d40f4SRob Clark 				*dest++ = '?';
70*407d40f4SRob Clark 				/* *src may be valid. Don't eat it.  */
71*407d40f4SRob Clark 				src--;
72*407d40f4SRob Clark 			}
73*407d40f4SRob Clark 
74*407d40f4SRob Clark 			code_high = 0;
75*407d40f4SRob Clark 		} else {
76*407d40f4SRob Clark 			if (code <= 0x007F) {
77*407d40f4SRob Clark 				*dest++ = code;
78*407d40f4SRob Clark 			} else if (code <= 0x07FF) {
79*407d40f4SRob Clark 				*dest++ = (code >> 6) | 0xC0;
80*407d40f4SRob Clark 				*dest++ = (code & 0x3F) | 0x80;
81*407d40f4SRob Clark 			} else if (code >= 0xD800 && code <= 0xDBFF) {
82*407d40f4SRob Clark 				code_high = code;
83*407d40f4SRob Clark 				continue;
84*407d40f4SRob Clark 			} else if (code >= 0xDC00 && code <= 0xDFFF) {
85*407d40f4SRob Clark 				/* Error... */
86*407d40f4SRob Clark 				*dest++ = '?';
87*407d40f4SRob Clark 			} else if (code < 0x10000) {
88*407d40f4SRob Clark 				*dest++ = (code >> 12) | 0xE0;
89*407d40f4SRob Clark 				*dest++ = ((code >> 6) & 0x3F) | 0x80;
90*407d40f4SRob Clark 				*dest++ = (code & 0x3F) | 0x80;
91*407d40f4SRob Clark 			} else {
92*407d40f4SRob Clark 				*dest++ = (code >> 18) | 0xF0;
93*407d40f4SRob Clark 				*dest++ = ((code >> 12) & 0x3F) | 0x80;
94*407d40f4SRob Clark 				*dest++ = ((code >> 6) & 0x3F) | 0x80;
95*407d40f4SRob Clark 				*dest++ = (code & 0x3F) | 0x80;
96*407d40f4SRob Clark 			}
97*407d40f4SRob Clark 		}
98*407d40f4SRob Clark 	}
99*407d40f4SRob Clark 
100*407d40f4SRob Clark 	return dest;
101*407d40f4SRob Clark }
102