xref: /optee_os/core/lib/libtomcrypt/src/ciphers/aes/aesni.c (revision 2a65ecaf7d6f855e24ce1a117fe1931f7378f82c)
1 /* LibTomCrypt, modular cryptographic library -- Tom St Denis */
2 /* SPDX-License-Identifier: Unlicense */
3 
4 /* AES-NI implementation by Steffen Jaeckel */
5 /**
6   @file aesni.c
7   Implementation of AES via the AES-NI instruction on x86_64
8 */
9 
10 #include "tomcrypt_private.h"
11 
12 #if defined(LTC_HAS_AES_NI)
13 
14 const struct ltc_cipher_descriptor aesni_desc =
15 {
16     "aes",
17     6,
18     16, 32, 16, 10,
19     aesni_setup, aesni_ecb_encrypt, aesni_ecb_decrypt, aesni_test, aesni_done, aesni_keysize,
20     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
21 };
22 
23 #include <emmintrin.h>
24 #include <smmintrin.h>
25 #include <wmmintrin.h>
26 
27 #define setup_mix(t, c) _mm_extract_epi32(_mm_aeskeygenassist_si128(t, 0), c)
28 #define temp_load(k) _mm_loadu_si128((__m128i*)(k))
29 #define temp_update(t, k) _mm_insert_epi32(t, k, 3)
30 #define temp_invert(k) _mm_aesimc_si128(*((__m128i*)(k)))
31 
32 
33 static const ulong32 rcon[] = {
34     0x01UL, 0x02UL, 0x04UL, 0x08UL, 0x10UL, 0x20UL, 0x40UL, 0x80UL, 0x1BUL, 0x36UL
35 };
36 
37  /**
38     Initialize the AES (Rijndael) block cipher
39     @param key The symmetric key you wish to pass
40     @param keylen The key length in bytes
41     @param num_rounds The number of rounds desired (0 for default)
42     @param skey The key in as scheduled by this function.
43     @return CRYPT_OK if successful
44  */
aesni_setup(const unsigned char * key,int keylen,int num_rounds,symmetric_key * skey)45 int aesni_setup(const unsigned char *key, int keylen, int num_rounds, symmetric_key *skey)
46 {
47    int i;
48    __m128i temp;
49    ulong32 *rk, *K;
50    ulong32 *rrk;
51    LTC_ARGCHK(key != NULL);
52    LTC_ARGCHK(skey != NULL);
53 
54    if (keylen != 16 && keylen != 24 && keylen != 32) {
55       return CRYPT_INVALID_KEYSIZE;
56    }
57 
58    if (num_rounds != 0 && num_rounds != (keylen / 4 + 6)) {
59       return CRYPT_INVALID_ROUNDS;
60    }
61 
62    skey->rijndael.Nr = keylen / 4 + 6;
63    K = LTC_ALIGN_BUF(skey->rijndael.K, 16);
64    skey->rijndael.eK = K;
65    K += 60;
66    skey->rijndael.dK = K;
67 
68    /* setup the forward key */
69    i = 0;
70    rk = skey->rijndael.eK;
71    LOAD32L(rk[0], key);
72    LOAD32L(rk[1], key + 4);
73    LOAD32L(rk[2], key + 8);
74    LOAD32L(rk[3], key + 12);
75    if (keylen == 16) {
76       temp = temp_load(key);
77       for (;;) {
78          rk[4] = rk[0] ^ setup_mix(temp, 3) ^ rcon[i];
79          rk[5] = rk[1] ^ rk[4];
80          rk[6] = rk[2] ^ rk[5];
81          rk[7] = rk[3] ^ rk[6];
82          if (++i == 10) {
83             break;
84          }
85          temp = temp_update(temp, rk[7]);
86          rk += 4;
87       }
88    } else if (keylen == 24) {
89       LOAD32L(rk[4], key + 16);
90       LOAD32L(rk[5], key + 20);
91       temp = temp_load(key + 8);
92       for (;;) {
93          rk[6] = rk[0] ^ setup_mix(temp, 3) ^ rcon[i];
94          rk[7] = rk[1] ^ rk[6];
95          rk[8] = rk[2] ^ rk[7];
96          rk[9] = rk[3] ^ rk[8];
97          if (++i == 8) {
98             break;
99          }
100          rk[10] = rk[4] ^ rk[9];
101          rk[11] = rk[5] ^ rk[10];
102          temp = temp_update(temp, rk[11]);
103          rk += 6;
104       }
105    } else if (keylen == 32) {
106       LOAD32L(rk[4], key + 16);
107       LOAD32L(rk[5], key + 20);
108       LOAD32L(rk[6], key + 24);
109       LOAD32L(rk[7], key + 28);
110       temp = temp_load(key + 16);
111       for (;;) {
112          rk[8] = rk[0] ^ setup_mix(temp, 3) ^ rcon[i];
113          rk[9] = rk[1] ^ rk[8];
114          rk[10] = rk[2] ^ rk[9];
115          rk[11] = rk[3] ^ rk[10];
116          if (++i == 7) {
117             break;
118          }
119          temp = temp_update(temp, rk[11]);
120          rk[12] = rk[4] ^ setup_mix(temp, 2);
121          rk[13] = rk[5] ^ rk[12];
122          rk[14] = rk[6] ^ rk[13];
123          rk[15] = rk[7] ^ rk[14];
124          temp = temp_update(temp, rk[15]);
125          rk += 8;
126       }
127    } else {
128       /* this can't happen */
129       /* coverity[dead_error_line] */
130       return CRYPT_ERROR;
131    }
132 
133    /* setup the inverse key now */
134    rk = skey->rijndael.dK;
135    rrk = skey->rijndael.eK + skey->rijndael.Nr * 4;
136 
137    /* apply the inverse MixColumn transform to all round keys but the first and the last: */
138    /* copy first */
139    *rk++ = *rrk++;
140    *rk++ = *rrk++;
141    *rk++ = *rrk++;
142    *rk = *rrk;
143    rk -= 3;
144    rrk -= 3;
145 
146    for (i = 1; i < skey->rijndael.Nr; i++) {
147       rrk -= 4;
148       rk += 4;
149       temp = temp_invert(rk);
150       *((__m128i*) rk) = temp_invert(rrk);
151    }
152 
153    /* copy last */
154    rrk -= 4;
155    rk += 4;
156    *rk++ = *rrk++;
157    *rk++ = *rrk++;
158    *rk++ = *rrk++;
159    *rk = *rrk;
160 
161    return CRYPT_OK;
162 }
163 
164 /**
165   Encrypts a block of text with AES
166   @param pt The input plaintext (16 bytes)
167   @param ct The output ciphertext (16 bytes)
168   @param skey The key as scheduled
169   @return CRYPT_OK if successful
170 */
171 #ifdef LTC_CLEAN_STACK
s_aesni_ecb_encrypt(const unsigned char * pt,unsigned char * ct,const symmetric_key * skey)172 static int s_aesni_ecb_encrypt(const unsigned char *pt, unsigned char *ct, const symmetric_key *skey)
173 #else
174 int aesni_ecb_encrypt(const unsigned char *pt, unsigned char *ct, const symmetric_key *skey)
175 #endif
176 {
177    int Nr, r;
178    const __m128i *skeys;
179    __m128i block;
180 
181    LTC_ARGCHK(pt != NULL);
182    LTC_ARGCHK(ct != NULL);
183    LTC_ARGCHK(skey != NULL);
184 
185    Nr = skey->rijndael.Nr;
186 
187    if (Nr < 2 || Nr > 16) return CRYPT_INVALID_ROUNDS;
188 
189    skeys = (__m128i*) skey->rijndael.eK;
190    block = _mm_loadu_si128((const __m128i*) (pt));
191 
192    block = _mm_xor_si128(block, skeys[0]);
193    for (r = 1; r < Nr - 1; r += 2) {
194       block = _mm_aesenc_si128(block, skeys[r]);
195       block = _mm_aesenc_si128(block, skeys[r + 1]);
196    }
197    block = _mm_aesenc_si128(block, skeys[Nr - 1]);
198    block = _mm_aesenclast_si128(block, skeys[Nr]);
199 
200    _mm_storeu_si128((__m128i*) ct, block);
201 
202    return CRYPT_OK;
203 }
204 
205 #ifdef LTC_CLEAN_STACK
aesni_ecb_encrypt(const unsigned char * pt,unsigned char * ct,const symmetric_key * skey)206 int aesni_ecb_encrypt(const unsigned char *pt, unsigned char *ct, const symmetric_key *skey)
207 {
208    int err = s_aesni_ecb_encrypt(pt, ct, skey);
209    burn_stack(sizeof(unsigned long)*8 + sizeof(unsigned long*) + sizeof(int)*2);
210    return err;
211 }
212 #endif
213 
214 
215 /**
216   Decrypts a block of text with AES
217   @param ct The input ciphertext (16 bytes)
218   @param pt The output plaintext (16 bytes)
219   @param skey The key as scheduled
220   @return CRYPT_OK if successful
221 */
222 #ifdef LTC_CLEAN_STACK
s_aesni_ecb_decrypt(const unsigned char * ct,unsigned char * pt,const symmetric_key * skey)223 static int s_aesni_ecb_decrypt(const unsigned char *ct, unsigned char *pt, const symmetric_key *skey)
224 #else
225 int aesni_ecb_decrypt(const unsigned char *ct, unsigned char *pt, const symmetric_key *skey)
226 #endif
227 {
228    int Nr, r;
229    const __m128i *skeys;
230    __m128i block;
231 
232    LTC_ARGCHK(pt != NULL);
233    LTC_ARGCHK(ct != NULL);
234    LTC_ARGCHK(skey != NULL);
235 
236    Nr = skey->rijndael.Nr;
237 
238    if (Nr < 2 || Nr > 16) return CRYPT_INVALID_ROUNDS;
239 
240    skeys = (__m128i*) skey->rijndael.dK;
241    block = _mm_loadu_si128((const __m128i*) (ct));
242 
243    block = _mm_xor_si128(block, skeys[0]);
244    for (r = 1; r < Nr - 1; r += 2) {
245       block = _mm_aesdec_si128(block, skeys[r]);
246       block = _mm_aesdec_si128(block, skeys[r + 1]);
247    }
248    block = _mm_aesdec_si128(block, skeys[Nr - 1]);
249    block = _mm_aesdeclast_si128(block, skeys[Nr]);
250 
251    _mm_storeu_si128((__m128i*) pt, block);
252 
253    return CRYPT_OK;
254 }
255 
256 
257 #ifdef LTC_CLEAN_STACK
aesni_ecb_decrypt(const unsigned char * ct,unsigned char * pt,const symmetric_key * skey)258 int aesni_ecb_decrypt(const unsigned char *ct, unsigned char *pt, const symmetric_key *skey)
259 {
260    int err = s_aesni_ecb_decrypt(ct, pt, skey);
261    burn_stack(sizeof(unsigned long)*8 + sizeof(unsigned long*) + sizeof(int)*2);
262    return err;
263 }
264 #endif
265 
266 /**
267   Performs a self-test of the AES block cipher
268   @return CRYPT_OK if functional, CRYPT_NOP if self-test has been disabled
269 */
aesni_test(void)270 int aesni_test(void)
271 {
272  #ifndef LTC_TEST
273     return CRYPT_NOP;
274  #else
275  int err;
276  static const struct {
277      int keylen;
278      unsigned char key[32], pt[16], ct[16];
279  } tests[] = {
280     { 16,
281       { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
282         0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
283       { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
284         0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
285       { 0x69, 0xc4, 0xe0, 0xd8, 0x6a, 0x7b, 0x04, 0x30,
286         0xd8, 0xcd, 0xb7, 0x80, 0x70, 0xb4, 0xc5, 0x5a }
287     }, {
288       24,
289       { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
290         0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
291         0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 },
292       { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
293         0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
294       { 0xdd, 0xa9, 0x7c, 0xa4, 0x86, 0x4c, 0xdf, 0xe0,
295         0x6e, 0xaf, 0x70, 0xa0, 0xec, 0x0d, 0x71, 0x91 }
296     }, {
297       32,
298       { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
299         0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
300         0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
301         0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f },
302       { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
303         0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
304       { 0x8e, 0xa2, 0xb7, 0xca, 0x51, 0x67, 0x45, 0xbf,
305         0xea, 0xfc, 0x49, 0x90, 0x4b, 0x49, 0x60, 0x89 }
306     }
307  };
308 
309   symmetric_key key;
310   unsigned char tmp[2][16];
311   int i, y;
312 
313   for (i = 0; i < (int)(sizeof(tests)/sizeof(tests[0])); i++) {
314     zeromem(&key, sizeof(key));
315     if ((err = aesni_setup(tests[i].key, tests[i].keylen, 0, &key)) != CRYPT_OK) {
316        return err;
317     }
318 
319     aesni_ecb_encrypt(tests[i].pt, tmp[0], &key);
320     aesni_ecb_decrypt(tmp[0], tmp[1], &key);
321     if (compare_testvector(tmp[0], 16, tests[i].ct, 16, "AES-NI Encrypt", i) ||
322           compare_testvector(tmp[1], 16, tests[i].pt, 16, "AES-NI Decrypt", i)) {
323         return CRYPT_FAIL_TESTVECTOR;
324     }
325 
326     /* now see if we can encrypt all zero bytes 1000 times, decrypt and come back where we started */
327     for (y = 0; y < 16; y++) tmp[0][y] = 0;
328     for (y = 0; y < 1000; y++) aesni_ecb_encrypt(tmp[0], tmp[0], &key);
329     for (y = 0; y < 1000; y++) aesni_ecb_decrypt(tmp[0], tmp[0], &key);
330     for (y = 0; y < 16; y++) if (tmp[0][y] != 0) return CRYPT_FAIL_TESTVECTOR;
331   }
332   return CRYPT_OK;
333  #endif
334 }
335 
336 
337 /** Terminate the context
338    @param skey    The scheduled key
339 */
aesni_done(symmetric_key * skey)340 void aesni_done(symmetric_key *skey)
341 {
342   LTC_UNUSED_PARAM(skey);
343 }
344 
345 
346 /**
347   Gets suitable key size
348   @param keysize [in/out] The length of the recommended key (in bytes).  This function will store the suitable size back in this variable.
349   @return CRYPT_OK if the input key size is acceptable.
350 */
aesni_keysize(int * keysize)351 int aesni_keysize(int *keysize)
352 {
353    LTC_ARGCHK(keysize != NULL);
354 
355    if (*keysize < 16) {
356       return CRYPT_INVALID_KEYSIZE;
357    }
358    if (*keysize < 24) {
359       *keysize = 16;
360       return CRYPT_OK;
361    }
362    if (*keysize < 32) {
363       *keysize = 24;
364       return CRYPT_OK;
365    }
366    *keysize = 32;
367    return CRYPT_OK;
368 }
369 
370 #endif
371 
372