1*407d40f4SRob Clark /* 2*407d40f4SRob Clark * charset conversion utils 3*407d40f4SRob Clark * 4*407d40f4SRob Clark * Copyright (c) 2017 Rob Clark 5*407d40f4SRob Clark * 6*407d40f4SRob Clark * SPDX-License-Identifier: GPL-2.0+ 7*407d40f4SRob Clark */ 8*407d40f4SRob Clark 9*407d40f4SRob Clark #include <common.h> 10*407d40f4SRob Clark #include <charset.h> 11*407d40f4SRob Clark #include <malloc.h> 12*407d40f4SRob Clark 13*407d40f4SRob Clark /* 14*407d40f4SRob Clark * utf8/utf16 conversion mostly lifted from grub 15*407d40f4SRob Clark */ 16*407d40f4SRob Clark utf16_strlen(const uint16_t * in)17*407d40f4SRob Clarksize_t utf16_strlen(const uint16_t *in) 18*407d40f4SRob Clark { 19*407d40f4SRob Clark size_t i; 20*407d40f4SRob Clark for (i = 0; in[i]; i++); 21*407d40f4SRob Clark return i; 22*407d40f4SRob Clark } 23*407d40f4SRob Clark utf16_strnlen(const uint16_t * in,size_t count)24*407d40f4SRob Clarksize_t utf16_strnlen(const uint16_t *in, size_t count) 25*407d40f4SRob Clark { 26*407d40f4SRob Clark size_t i; 27*407d40f4SRob Clark for (i = 0; count-- && in[i]; i++); 28*407d40f4SRob Clark return i; 29*407d40f4SRob Clark } 30*407d40f4SRob Clark utf16_strcpy(uint16_t * dest,const uint16_t * src)31*407d40f4SRob Clarkuint16_t *utf16_strcpy(uint16_t *dest, const uint16_t *src) 32*407d40f4SRob Clark { 33*407d40f4SRob Clark uint16_t *tmp = dest; 34*407d40f4SRob Clark 35*407d40f4SRob Clark while ((*dest++ = *src++) != '\0') 36*407d40f4SRob Clark /* nothing */; 37*407d40f4SRob Clark return tmp; 38*407d40f4SRob Clark 39*407d40f4SRob Clark } 40*407d40f4SRob Clark utf16_strdup(const uint16_t * s)41*407d40f4SRob Clarkuint16_t *utf16_strdup(const uint16_t *s) 42*407d40f4SRob Clark { 43*407d40f4SRob Clark uint16_t *new; 44*407d40f4SRob Clark if (!s || !(new = malloc((utf16_strlen(s) + 1) * 2))) 45*407d40f4SRob Clark return NULL; 46*407d40f4SRob Clark utf16_strcpy(new, s); 47*407d40f4SRob Clark return new; 48*407d40f4SRob Clark } 49*407d40f4SRob Clark 50*407d40f4SRob Clark /* Convert UTF-16 to UTF-8. */ utf16_to_utf8(uint8_t * dest,const uint16_t * src,size_t size)51*407d40f4SRob Clarkuint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size) 52*407d40f4SRob Clark { 53*407d40f4SRob Clark uint32_t code_high = 0; 54*407d40f4SRob Clark 55*407d40f4SRob Clark while (size--) { 56*407d40f4SRob Clark uint32_t code = *src++; 57*407d40f4SRob Clark 58*407d40f4SRob Clark if (code_high) { 59*407d40f4SRob Clark if (code >= 0xDC00 && code <= 0xDFFF) { 60*407d40f4SRob Clark /* Surrogate pair. */ 61*407d40f4SRob Clark code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000; 62*407d40f4SRob Clark 63*407d40f4SRob Clark *dest++ = (code >> 18) | 0xF0; 64*407d40f4SRob Clark *dest++ = ((code >> 12) & 0x3F) | 0x80; 65*407d40f4SRob Clark *dest++ = ((code >> 6) & 0x3F) | 0x80; 66*407d40f4SRob Clark *dest++ = (code & 0x3F) | 0x80; 67*407d40f4SRob Clark } else { 68*407d40f4SRob Clark /* Error... */ 69*407d40f4SRob Clark *dest++ = '?'; 70*407d40f4SRob Clark /* *src may be valid. Don't eat it. */ 71*407d40f4SRob Clark src--; 72*407d40f4SRob Clark } 73*407d40f4SRob Clark 74*407d40f4SRob Clark code_high = 0; 75*407d40f4SRob Clark } else { 76*407d40f4SRob Clark if (code <= 0x007F) { 77*407d40f4SRob Clark *dest++ = code; 78*407d40f4SRob Clark } else if (code <= 0x07FF) { 79*407d40f4SRob Clark *dest++ = (code >> 6) | 0xC0; 80*407d40f4SRob Clark *dest++ = (code & 0x3F) | 0x80; 81*407d40f4SRob Clark } else if (code >= 0xD800 && code <= 0xDBFF) { 82*407d40f4SRob Clark code_high = code; 83*407d40f4SRob Clark continue; 84*407d40f4SRob Clark } else if (code >= 0xDC00 && code <= 0xDFFF) { 85*407d40f4SRob Clark /* Error... */ 86*407d40f4SRob Clark *dest++ = '?'; 87*407d40f4SRob Clark } else if (code < 0x10000) { 88*407d40f4SRob Clark *dest++ = (code >> 12) | 0xE0; 89*407d40f4SRob Clark *dest++ = ((code >> 6) & 0x3F) | 0x80; 90*407d40f4SRob Clark *dest++ = (code & 0x3F) | 0x80; 91*407d40f4SRob Clark } else { 92*407d40f4SRob Clark *dest++ = (code >> 18) | 0xF0; 93*407d40f4SRob Clark *dest++ = ((code >> 12) & 0x3F) | 0x80; 94*407d40f4SRob Clark *dest++ = ((code >> 6) & 0x3F) | 0x80; 95*407d40f4SRob Clark *dest++ = (code & 0x3F) | 0x80; 96*407d40f4SRob Clark } 97*407d40f4SRob Clark } 98*407d40f4SRob Clark } 99*407d40f4SRob Clark 100*407d40f4SRob Clark return dest; 101*407d40f4SRob Clark } 102