xref: /optee_os/core/lib/libtomcrypt/src/pk/asn1/der/utf8/der_decode_utf8_string.c (revision 2a65ecaf7d6f855e24ce1a117fe1931f7378f82c)
1 /* LibTomCrypt, modular cryptographic library -- Tom St Denis */
2 /* SPDX-License-Identifier: Unlicense */
3 #include "tomcrypt_private.h"
4 
5 /**
6   @file der_decode_utf8_string.c
7   ASN.1 DER, encode a UTF8 STRING, Tom St Denis
8 */
9 
10 
11 #ifdef LTC_DER
12 
13 /**
14   Decode a UTF8 STRING and recover an array of unicode characters.
15   @param in      The DER encoded UTF8 STRING
16   @param inlen   The size of the DER UTF8 STRING
17   @param out     [out] The array of unicode characters (wchar_t*)
18   @param outlen  [in/out] The number of unicode characters in the array
19   @return CRYPT_OK if successful
20 */
der_decode_utf8_string(const unsigned char * in,unsigned long inlen,wchar_t * out,unsigned long * outlen)21 int der_decode_utf8_string(const unsigned char *in,  unsigned long inlen,
22                                        wchar_t *out, unsigned long *outlen)
23 {
24    wchar_t       tmp;
25    unsigned long x, y, z, len;
26    int err;
27 
28    LTC_ARGCHK(in     != NULL);
29    LTC_ARGCHK(out    != NULL);
30    LTC_ARGCHK(outlen != NULL);
31 
32    /* must have header at least */
33    if (inlen < 2) {
34       return CRYPT_INVALID_PACKET;
35    }
36 
37    /* check for 0x0C */
38    if ((in[0] & 0x1F) != 0x0C) {
39       return CRYPT_INVALID_PACKET;
40    }
41    x = 1;
42 
43    /* get the length of the data */
44    y = inlen - x;
45    if ((err = der_decode_asn1_length(in + x, &y, &len)) != CRYPT_OK) {
46       return err;
47    }
48    x += y;
49 
50    if (len > (inlen - x)) {
51       return CRYPT_INVALID_PACKET;
52    }
53 
54    /* proceed to recover unicode characters from utf8 data.
55       for reference see Section 3 of RFC 3629:
56 
57         https://tools.ietf.org/html/rfc3629#section-3
58     */
59    len += x;
60    for (y = 0; x < len; ) {
61       /* read first byte */
62       tmp = in[x++];
63 
64       /* a unicode character is recovered from a sequence of 1 to 4 utf8 bytes.
65          the form of those bytes must match a row in the following table:
66 
67            0xxxxxxx
68            110xxxxx 10xxxxxx
69            1110xxxx 10xxxxxx 10xxxxxx
70            11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
71 
72          the number of leading ones in the first byte (0,2,3,4) determines the
73          number of remaining bytes to read (0,1,2,3)
74        */
75 
76       /* determine z, the number of leading ones.
77          this is done by left-shifting tmp, which clears the ms-bits */
78       for (z = 0; (tmp & 0x80) && (z <= 4); z++, tmp = (tmp << 1) & 0xFF);
79 
80       /* z should be in {0,2,3,4} */
81       if (z == 1 || z > 4) {
82          return CRYPT_INVALID_PACKET;
83       }
84 
85       /* right-shift tmp to restore least-sig bits */
86       tmp >>= z;
87 
88       /* now update z so it equals the number of additional bytes to read */
89       if (z > 0) { --z; }
90 
91       if (x + z > len) {
92          return CRYPT_INVALID_PACKET;
93       }
94 
95       /* read remaining bytes */
96       while (z-- != 0) {
97          if ((in[x] & 0xC0) != 0x80) {
98             return CRYPT_INVALID_PACKET;
99          }
100          tmp = (tmp << 6) | ((wchar_t)in[x++] & 0x3F);
101       }
102 
103       if (y < *outlen) {
104          out[y] = tmp;
105       }
106       y++;
107    }
108    if (y > *outlen) {
109       err = CRYPT_BUFFER_OVERFLOW;
110    } else {
111       err = CRYPT_OK;
112    }
113    *outlen = y;
114 
115    return err;
116 }
117 
118 #endif
119