1 /* LibTomCrypt, modular cryptographic library -- Tom St Denis */
2 /* SPDX-License-Identifier: Unlicense */
3
4 /* AES-NI implementation by Steffen Jaeckel */
5 /**
6 @file aesni.c
7 Implementation of AES via the AES-NI instruction on x86_64
8 */
9
10 #include "tomcrypt_private.h"
11
12 #if defined(LTC_HAS_AES_NI)
13
14 const struct ltc_cipher_descriptor aesni_desc =
15 {
16 "aes",
17 6,
18 16, 32, 16, 10,
19 aesni_setup, aesni_ecb_encrypt, aesni_ecb_decrypt, aesni_test, aesni_done, aesni_keysize,
20 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
21 };
22
23 #include <emmintrin.h>
24 #include <smmintrin.h>
25 #include <wmmintrin.h>
26
27 #define setup_mix(t, c) _mm_extract_epi32(_mm_aeskeygenassist_si128(t, 0), c)
28 #define temp_load(k) _mm_loadu_si128((__m128i*)(k))
29 #define temp_update(t, k) _mm_insert_epi32(t, k, 3)
30 #define temp_invert(k) _mm_aesimc_si128(*((__m128i*)(k)))
31
32
33 static const ulong32 rcon[] = {
34 0x01UL, 0x02UL, 0x04UL, 0x08UL, 0x10UL, 0x20UL, 0x40UL, 0x80UL, 0x1BUL, 0x36UL
35 };
36
37 /**
38 Initialize the AES (Rijndael) block cipher
39 @param key The symmetric key you wish to pass
40 @param keylen The key length in bytes
41 @param num_rounds The number of rounds desired (0 for default)
42 @param skey The key in as scheduled by this function.
43 @return CRYPT_OK if successful
44 */
aesni_setup(const unsigned char * key,int keylen,int num_rounds,symmetric_key * skey)45 int aesni_setup(const unsigned char *key, int keylen, int num_rounds, symmetric_key *skey)
46 {
47 int i;
48 __m128i temp;
49 ulong32 *rk, *K;
50 ulong32 *rrk;
51 LTC_ARGCHK(key != NULL);
52 LTC_ARGCHK(skey != NULL);
53
54 if (keylen != 16 && keylen != 24 && keylen != 32) {
55 return CRYPT_INVALID_KEYSIZE;
56 }
57
58 if (num_rounds != 0 && num_rounds != (keylen / 4 + 6)) {
59 return CRYPT_INVALID_ROUNDS;
60 }
61
62 skey->rijndael.Nr = keylen / 4 + 6;
63 K = LTC_ALIGN_BUF(skey->rijndael.K, 16);
64 skey->rijndael.eK = K;
65 K += 60;
66 skey->rijndael.dK = K;
67
68 /* setup the forward key */
69 i = 0;
70 rk = skey->rijndael.eK;
71 LOAD32L(rk[0], key);
72 LOAD32L(rk[1], key + 4);
73 LOAD32L(rk[2], key + 8);
74 LOAD32L(rk[3], key + 12);
75 if (keylen == 16) {
76 temp = temp_load(key);
77 for (;;) {
78 rk[4] = rk[0] ^ setup_mix(temp, 3) ^ rcon[i];
79 rk[5] = rk[1] ^ rk[4];
80 rk[6] = rk[2] ^ rk[5];
81 rk[7] = rk[3] ^ rk[6];
82 if (++i == 10) {
83 break;
84 }
85 temp = temp_update(temp, rk[7]);
86 rk += 4;
87 }
88 } else if (keylen == 24) {
89 LOAD32L(rk[4], key + 16);
90 LOAD32L(rk[5], key + 20);
91 temp = temp_load(key + 8);
92 for (;;) {
93 rk[6] = rk[0] ^ setup_mix(temp, 3) ^ rcon[i];
94 rk[7] = rk[1] ^ rk[6];
95 rk[8] = rk[2] ^ rk[7];
96 rk[9] = rk[3] ^ rk[8];
97 if (++i == 8) {
98 break;
99 }
100 rk[10] = rk[4] ^ rk[9];
101 rk[11] = rk[5] ^ rk[10];
102 temp = temp_update(temp, rk[11]);
103 rk += 6;
104 }
105 } else if (keylen == 32) {
106 LOAD32L(rk[4], key + 16);
107 LOAD32L(rk[5], key + 20);
108 LOAD32L(rk[6], key + 24);
109 LOAD32L(rk[7], key + 28);
110 temp = temp_load(key + 16);
111 for (;;) {
112 rk[8] = rk[0] ^ setup_mix(temp, 3) ^ rcon[i];
113 rk[9] = rk[1] ^ rk[8];
114 rk[10] = rk[2] ^ rk[9];
115 rk[11] = rk[3] ^ rk[10];
116 if (++i == 7) {
117 break;
118 }
119 temp = temp_update(temp, rk[11]);
120 rk[12] = rk[4] ^ setup_mix(temp, 2);
121 rk[13] = rk[5] ^ rk[12];
122 rk[14] = rk[6] ^ rk[13];
123 rk[15] = rk[7] ^ rk[14];
124 temp = temp_update(temp, rk[15]);
125 rk += 8;
126 }
127 } else {
128 /* this can't happen */
129 /* coverity[dead_error_line] */
130 return CRYPT_ERROR;
131 }
132
133 /* setup the inverse key now */
134 rk = skey->rijndael.dK;
135 rrk = skey->rijndael.eK + skey->rijndael.Nr * 4;
136
137 /* apply the inverse MixColumn transform to all round keys but the first and the last: */
138 /* copy first */
139 *rk++ = *rrk++;
140 *rk++ = *rrk++;
141 *rk++ = *rrk++;
142 *rk = *rrk;
143 rk -= 3;
144 rrk -= 3;
145
146 for (i = 1; i < skey->rijndael.Nr; i++) {
147 rrk -= 4;
148 rk += 4;
149 temp = temp_invert(rk);
150 *((__m128i*) rk) = temp_invert(rrk);
151 }
152
153 /* copy last */
154 rrk -= 4;
155 rk += 4;
156 *rk++ = *rrk++;
157 *rk++ = *rrk++;
158 *rk++ = *rrk++;
159 *rk = *rrk;
160
161 return CRYPT_OK;
162 }
163
164 /**
165 Encrypts a block of text with AES
166 @param pt The input plaintext (16 bytes)
167 @param ct The output ciphertext (16 bytes)
168 @param skey The key as scheduled
169 @return CRYPT_OK if successful
170 */
171 #ifdef LTC_CLEAN_STACK
s_aesni_ecb_encrypt(const unsigned char * pt,unsigned char * ct,const symmetric_key * skey)172 static int s_aesni_ecb_encrypt(const unsigned char *pt, unsigned char *ct, const symmetric_key *skey)
173 #else
174 int aesni_ecb_encrypt(const unsigned char *pt, unsigned char *ct, const symmetric_key *skey)
175 #endif
176 {
177 int Nr, r;
178 const __m128i *skeys;
179 __m128i block;
180
181 LTC_ARGCHK(pt != NULL);
182 LTC_ARGCHK(ct != NULL);
183 LTC_ARGCHK(skey != NULL);
184
185 Nr = skey->rijndael.Nr;
186
187 if (Nr < 2 || Nr > 16) return CRYPT_INVALID_ROUNDS;
188
189 skeys = (__m128i*) skey->rijndael.eK;
190 block = _mm_loadu_si128((const __m128i*) (pt));
191
192 block = _mm_xor_si128(block, skeys[0]);
193 for (r = 1; r < Nr - 1; r += 2) {
194 block = _mm_aesenc_si128(block, skeys[r]);
195 block = _mm_aesenc_si128(block, skeys[r + 1]);
196 }
197 block = _mm_aesenc_si128(block, skeys[Nr - 1]);
198 block = _mm_aesenclast_si128(block, skeys[Nr]);
199
200 _mm_storeu_si128((__m128i*) ct, block);
201
202 return CRYPT_OK;
203 }
204
205 #ifdef LTC_CLEAN_STACK
aesni_ecb_encrypt(const unsigned char * pt,unsigned char * ct,const symmetric_key * skey)206 int aesni_ecb_encrypt(const unsigned char *pt, unsigned char *ct, const symmetric_key *skey)
207 {
208 int err = s_aesni_ecb_encrypt(pt, ct, skey);
209 burn_stack(sizeof(unsigned long)*8 + sizeof(unsigned long*) + sizeof(int)*2);
210 return err;
211 }
212 #endif
213
214
215 /**
216 Decrypts a block of text with AES
217 @param ct The input ciphertext (16 bytes)
218 @param pt The output plaintext (16 bytes)
219 @param skey The key as scheduled
220 @return CRYPT_OK if successful
221 */
222 #ifdef LTC_CLEAN_STACK
s_aesni_ecb_decrypt(const unsigned char * ct,unsigned char * pt,const symmetric_key * skey)223 static int s_aesni_ecb_decrypt(const unsigned char *ct, unsigned char *pt, const symmetric_key *skey)
224 #else
225 int aesni_ecb_decrypt(const unsigned char *ct, unsigned char *pt, const symmetric_key *skey)
226 #endif
227 {
228 int Nr, r;
229 const __m128i *skeys;
230 __m128i block;
231
232 LTC_ARGCHK(pt != NULL);
233 LTC_ARGCHK(ct != NULL);
234 LTC_ARGCHK(skey != NULL);
235
236 Nr = skey->rijndael.Nr;
237
238 if (Nr < 2 || Nr > 16) return CRYPT_INVALID_ROUNDS;
239
240 skeys = (__m128i*) skey->rijndael.dK;
241 block = _mm_loadu_si128((const __m128i*) (ct));
242
243 block = _mm_xor_si128(block, skeys[0]);
244 for (r = 1; r < Nr - 1; r += 2) {
245 block = _mm_aesdec_si128(block, skeys[r]);
246 block = _mm_aesdec_si128(block, skeys[r + 1]);
247 }
248 block = _mm_aesdec_si128(block, skeys[Nr - 1]);
249 block = _mm_aesdeclast_si128(block, skeys[Nr]);
250
251 _mm_storeu_si128((__m128i*) pt, block);
252
253 return CRYPT_OK;
254 }
255
256
257 #ifdef LTC_CLEAN_STACK
aesni_ecb_decrypt(const unsigned char * ct,unsigned char * pt,const symmetric_key * skey)258 int aesni_ecb_decrypt(const unsigned char *ct, unsigned char *pt, const symmetric_key *skey)
259 {
260 int err = s_aesni_ecb_decrypt(ct, pt, skey);
261 burn_stack(sizeof(unsigned long)*8 + sizeof(unsigned long*) + sizeof(int)*2);
262 return err;
263 }
264 #endif
265
266 /**
267 Performs a self-test of the AES block cipher
268 @return CRYPT_OK if functional, CRYPT_NOP if self-test has been disabled
269 */
aesni_test(void)270 int aesni_test(void)
271 {
272 #ifndef LTC_TEST
273 return CRYPT_NOP;
274 #else
275 int err;
276 static const struct {
277 int keylen;
278 unsigned char key[32], pt[16], ct[16];
279 } tests[] = {
280 { 16,
281 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
282 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
283 { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
284 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
285 { 0x69, 0xc4, 0xe0, 0xd8, 0x6a, 0x7b, 0x04, 0x30,
286 0xd8, 0xcd, 0xb7, 0x80, 0x70, 0xb4, 0xc5, 0x5a }
287 }, {
288 24,
289 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
290 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
291 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 },
292 { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
293 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
294 { 0xdd, 0xa9, 0x7c, 0xa4, 0x86, 0x4c, 0xdf, 0xe0,
295 0x6e, 0xaf, 0x70, 0xa0, 0xec, 0x0d, 0x71, 0x91 }
296 }, {
297 32,
298 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
299 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
300 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
301 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f },
302 { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
303 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
304 { 0x8e, 0xa2, 0xb7, 0xca, 0x51, 0x67, 0x45, 0xbf,
305 0xea, 0xfc, 0x49, 0x90, 0x4b, 0x49, 0x60, 0x89 }
306 }
307 };
308
309 symmetric_key key;
310 unsigned char tmp[2][16];
311 int i, y;
312
313 for (i = 0; i < (int)(sizeof(tests)/sizeof(tests[0])); i++) {
314 zeromem(&key, sizeof(key));
315 if ((err = aesni_setup(tests[i].key, tests[i].keylen, 0, &key)) != CRYPT_OK) {
316 return err;
317 }
318
319 aesni_ecb_encrypt(tests[i].pt, tmp[0], &key);
320 aesni_ecb_decrypt(tmp[0], tmp[1], &key);
321 if (compare_testvector(tmp[0], 16, tests[i].ct, 16, "AES-NI Encrypt", i) ||
322 compare_testvector(tmp[1], 16, tests[i].pt, 16, "AES-NI Decrypt", i)) {
323 return CRYPT_FAIL_TESTVECTOR;
324 }
325
326 /* now see if we can encrypt all zero bytes 1000 times, decrypt and come back where we started */
327 for (y = 0; y < 16; y++) tmp[0][y] = 0;
328 for (y = 0; y < 1000; y++) aesni_ecb_encrypt(tmp[0], tmp[0], &key);
329 for (y = 0; y < 1000; y++) aesni_ecb_decrypt(tmp[0], tmp[0], &key);
330 for (y = 0; y < 16; y++) if (tmp[0][y] != 0) return CRYPT_FAIL_TESTVECTOR;
331 }
332 return CRYPT_OK;
333 #endif
334 }
335
336
337 /** Terminate the context
338 @param skey The scheduled key
339 */
aesni_done(symmetric_key * skey)340 void aesni_done(symmetric_key *skey)
341 {
342 LTC_UNUSED_PARAM(skey);
343 }
344
345
346 /**
347 Gets suitable key size
348 @param keysize [in/out] The length of the recommended key (in bytes). This function will store the suitable size back in this variable.
349 @return CRYPT_OK if the input key size is acceptable.
350 */
aesni_keysize(int * keysize)351 int aesni_keysize(int *keysize)
352 {
353 LTC_ARGCHK(keysize != NULL);
354
355 if (*keysize < 16) {
356 return CRYPT_INVALID_KEYSIZE;
357 }
358 if (*keysize < 24) {
359 *keysize = 16;
360 return CRYPT_OK;
361 }
362 if (*keysize < 32) {
363 *keysize = 24;
364 return CRYPT_OK;
365 }
366 *keysize = 32;
367 return CRYPT_OK;
368 }
369
370 #endif
371
372