1 /* 2 * Core bignum functions 3 * 4 * Copyright The Mbed TLS Contributors 5 * SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 6 */ 7 8 #include "common.h" 9 10 #if defined(MBEDTLS_BIGNUM_C) 11 12 #include <string.h> 13 14 #include "mbedtls/error.h" 15 #include "mbedtls/platform_util.h" 16 #include "constant_time_internal.h" 17 18 #include "mbedtls/platform.h" 19 20 #include "bignum_core.h" 21 #include "bn_mul.h" 22 #include "constant_time_internal.h" 23 24 size_t mbedtls_mpi_core_clz(mbedtls_mpi_uint a) 25 { 26 #if defined(__has_builtin) 27 #if (MBEDTLS_MPI_UINT_MAX == UINT_MAX) && __has_builtin(__builtin_clz) 28 #define core_clz __builtin_clz 29 #elif (MBEDTLS_MPI_UINT_MAX == ULONG_MAX) && __has_builtin(__builtin_clzl) 30 #define core_clz __builtin_clzl 31 #elif (MBEDTLS_MPI_UINT_MAX == ULLONG_MAX) && __has_builtin(__builtin_clzll) 32 #define core_clz __builtin_clzll 33 #endif 34 #endif 35 #if defined(core_clz) 36 return (size_t) core_clz(a); 37 #else 38 size_t j; 39 mbedtls_mpi_uint mask = (mbedtls_mpi_uint) 1 << (biL - 1); 40 41 for (j = 0; j < biL; j++) { 42 if (a & mask) { 43 break; 44 } 45 46 mask >>= 1; 47 } 48 49 return j; 50 #endif 51 } 52 53 size_t mbedtls_mpi_core_bitlen(const mbedtls_mpi_uint *A, size_t A_limbs) 54 { 55 int i; 56 size_t j; 57 58 for (i = ((int) A_limbs) - 1; i >= 0; i--) { 59 if (A[i] != 0) { 60 j = biL - mbedtls_mpi_core_clz(A[i]); 61 return (i * biL) + j; 62 } 63 } 64 65 return 0; 66 } 67 68 static mbedtls_mpi_uint mpi_bigendian_to_host(mbedtls_mpi_uint a) 69 { 70 if (MBEDTLS_IS_BIG_ENDIAN) { 71 /* Nothing to do on bigendian systems. */ 72 return a; 73 } else { 74 #if defined(MBEDTLS_HAVE_INT32) 75 return (mbedtls_mpi_uint) MBEDTLS_BSWAP32(a); 76 #elif defined(MBEDTLS_HAVE_INT64) 77 return (mbedtls_mpi_uint) MBEDTLS_BSWAP64(a); 78 #endif 79 } 80 } 81 82 void mbedtls_mpi_core_bigendian_to_host(mbedtls_mpi_uint *A, 83 size_t A_limbs) 84 { 85 mbedtls_mpi_uint *cur_limb_left; 86 mbedtls_mpi_uint *cur_limb_right; 87 if (A_limbs == 0) { 88 return; 89 } 90 91 /* 92 * Traverse limbs and 93 * - adapt byte-order in each limb 94 * - swap the limbs themselves. 95 * For that, simultaneously traverse the limbs from left to right 96 * and from right to left, as long as the left index is not bigger 97 * than the right index (it's not a problem if limbs is odd and the 98 * indices coincide in the last iteration). 99 */ 100 for (cur_limb_left = A, cur_limb_right = A + (A_limbs - 1); 101 cur_limb_left <= cur_limb_right; 102 cur_limb_left++, cur_limb_right--) { 103 mbedtls_mpi_uint tmp; 104 /* Note that if cur_limb_left == cur_limb_right, 105 * this code effectively swaps the bytes only once. */ 106 tmp = mpi_bigendian_to_host(*cur_limb_left); 107 *cur_limb_left = mpi_bigendian_to_host(*cur_limb_right); 108 *cur_limb_right = tmp; 109 } 110 } 111 112 /* Whether min <= A, in constant time. 113 * A_limbs must be at least 1. */ 114 mbedtls_ct_condition_t mbedtls_mpi_core_uint_le_mpi(mbedtls_mpi_uint min, 115 const mbedtls_mpi_uint *A, 116 size_t A_limbs) 117 { 118 /* min <= least significant limb? */ 119 mbedtls_ct_condition_t min_le_lsl = mbedtls_ct_uint_ge(A[0], min); 120 121 /* limbs other than the least significant one are all zero? */ 122 mbedtls_ct_condition_t msll_mask = MBEDTLS_CT_FALSE; 123 for (size_t i = 1; i < A_limbs; i++) { 124 msll_mask = mbedtls_ct_bool_or(msll_mask, mbedtls_ct_bool(A[i])); 125 } 126 127 /* min <= A iff the lowest limb of A is >= min or the other limbs 128 * are not all zero. */ 129 return mbedtls_ct_bool_or(msll_mask, min_le_lsl); 130 } 131 132 mbedtls_ct_condition_t mbedtls_mpi_core_lt_ct(const mbedtls_mpi_uint *A, 133 const mbedtls_mpi_uint *B, 134 size_t limbs) 135 { 136 mbedtls_ct_condition_t ret = MBEDTLS_CT_FALSE, cond = MBEDTLS_CT_FALSE, done = MBEDTLS_CT_FALSE; 137 138 for (size_t i = limbs; i > 0; i--) { 139 /* 140 * If B[i - 1] < A[i - 1] then A < B is false and the result must 141 * remain 0. 142 * 143 * Again even if we can make a decision, we just mark the result and 144 * the fact that we are done and continue looping. 145 */ 146 cond = mbedtls_ct_uint_lt(B[i - 1], A[i - 1]); 147 done = mbedtls_ct_bool_or(done, cond); 148 149 /* 150 * If A[i - 1] < B[i - 1] then A < B is true. 151 * 152 * Again even if we can make a decision, we just mark the result and 153 * the fact that we are done and continue looping. 154 */ 155 cond = mbedtls_ct_uint_lt(A[i - 1], B[i - 1]); 156 ret = mbedtls_ct_bool_or(ret, mbedtls_ct_bool_and(cond, mbedtls_ct_bool_not(done))); 157 done = mbedtls_ct_bool_or(done, cond); 158 } 159 160 /* 161 * If all the limbs were equal, then the numbers are equal, A < B is false 162 * and leaving the result 0 is correct. 163 */ 164 165 return ret; 166 } 167 168 void mbedtls_mpi_core_cond_assign(mbedtls_mpi_uint *X, 169 const mbedtls_mpi_uint *A, 170 size_t limbs, 171 mbedtls_ct_condition_t assign) 172 { 173 if (X == A) { 174 return; 175 } 176 177 /* This function is very performance-sensitive for RSA. For this reason 178 * we have the loop below, instead of calling mbedtls_ct_memcpy_if 179 * (this is more optimal since here we don't have to handle the case where 180 * we copy awkwardly sized data). 181 */ 182 for (size_t i = 0; i < limbs; i++) { 183 X[i] = mbedtls_ct_mpi_uint_if(assign, A[i], X[i]); 184 } 185 } 186 187 void mbedtls_mpi_core_cond_swap(mbedtls_mpi_uint *X, 188 mbedtls_mpi_uint *Y, 189 size_t limbs, 190 mbedtls_ct_condition_t swap) 191 { 192 if (X == Y) { 193 return; 194 } 195 196 for (size_t i = 0; i < limbs; i++) { 197 mbedtls_mpi_uint tmp = X[i]; 198 X[i] = mbedtls_ct_mpi_uint_if(swap, Y[i], X[i]); 199 Y[i] = mbedtls_ct_mpi_uint_if(swap, tmp, Y[i]); 200 } 201 } 202 203 int mbedtls_mpi_core_read_le(mbedtls_mpi_uint *X, 204 size_t X_limbs, 205 const unsigned char *input, 206 size_t input_length) 207 { 208 const size_t limbs = CHARS_TO_LIMBS(input_length); 209 210 if (X_limbs < limbs) { 211 return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; 212 } 213 214 if (X != NULL) { 215 memset(X, 0, X_limbs * ciL); 216 217 for (size_t i = 0; i < input_length; i++) { 218 size_t offset = ((i % ciL) << 3); 219 X[i / ciL] |= ((mbedtls_mpi_uint) input[i]) << offset; 220 } 221 } 222 223 return 0; 224 } 225 226 int mbedtls_mpi_core_read_be(mbedtls_mpi_uint *X, 227 size_t X_limbs, 228 const unsigned char *input, 229 size_t input_length) 230 { 231 const size_t limbs = CHARS_TO_LIMBS(input_length); 232 233 if (X_limbs < limbs) { 234 return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; 235 } 236 237 /* If X_limbs is 0, input_length must also be 0 (from previous test). 238 * Nothing to do. */ 239 if (X_limbs == 0) { 240 return 0; 241 } 242 243 memset(X, 0, X_limbs * ciL); 244 245 /* memcpy() with (NULL, 0) is undefined behaviour */ 246 if (input_length != 0) { 247 size_t overhead = (X_limbs * ciL) - input_length; 248 unsigned char *Xp = (unsigned char *) X; 249 memcpy(Xp + overhead, input, input_length); 250 } 251 252 mbedtls_mpi_core_bigendian_to_host(X, X_limbs); 253 254 return 0; 255 } 256 257 int mbedtls_mpi_core_write_le(const mbedtls_mpi_uint *A, 258 size_t A_limbs, 259 unsigned char *output, 260 size_t output_length) 261 { 262 size_t stored_bytes = A_limbs * ciL; 263 size_t bytes_to_copy; 264 265 if (stored_bytes < output_length) { 266 bytes_to_copy = stored_bytes; 267 } else { 268 bytes_to_copy = output_length; 269 270 /* The output buffer is smaller than the allocated size of A. 271 * However A may fit if its leading bytes are zero. */ 272 for (size_t i = bytes_to_copy; i < stored_bytes; i++) { 273 if (GET_BYTE(A, i) != 0) { 274 return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; 275 } 276 } 277 } 278 279 for (size_t i = 0; i < bytes_to_copy; i++) { 280 output[i] = GET_BYTE(A, i); 281 } 282 283 if (stored_bytes < output_length) { 284 /* Write trailing 0 bytes */ 285 memset(output + stored_bytes, 0, output_length - stored_bytes); 286 } 287 288 return 0; 289 } 290 291 int mbedtls_mpi_core_write_be(const mbedtls_mpi_uint *X, 292 size_t X_limbs, 293 unsigned char *output, 294 size_t output_length) 295 { 296 size_t stored_bytes; 297 size_t bytes_to_copy; 298 unsigned char *p; 299 300 stored_bytes = X_limbs * ciL; 301 302 if (stored_bytes < output_length) { 303 /* There is enough space in the output buffer. Write initial 304 * null bytes and record the position at which to start 305 * writing the significant bytes. In this case, the execution 306 * trace of this function does not depend on the value of the 307 * number. */ 308 bytes_to_copy = stored_bytes; 309 p = output + output_length - stored_bytes; 310 memset(output, 0, output_length - stored_bytes); 311 } else { 312 /* The output buffer is smaller than the allocated size of X. 313 * However X may fit if its leading bytes are zero. */ 314 bytes_to_copy = output_length; 315 p = output; 316 for (size_t i = bytes_to_copy; i < stored_bytes; i++) { 317 if (GET_BYTE(X, i) != 0) { 318 return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; 319 } 320 } 321 } 322 323 for (size_t i = 0; i < bytes_to_copy; i++) { 324 p[bytes_to_copy - i - 1] = GET_BYTE(X, i); 325 } 326 327 return 0; 328 } 329 330 void mbedtls_mpi_core_shift_r(mbedtls_mpi_uint *X, size_t limbs, 331 size_t count) 332 { 333 size_t i, v0, v1; 334 mbedtls_mpi_uint r0 = 0, r1; 335 336 v0 = count / biL; 337 v1 = count & (biL - 1); 338 339 if (v0 > limbs || (v0 == limbs && v1 > 0)) { 340 memset(X, 0, limbs * ciL); 341 return; 342 } 343 344 /* 345 * shift by count / limb_size 346 */ 347 if (v0 > 0) { 348 for (i = 0; i < limbs - v0; i++) { 349 X[i] = X[i + v0]; 350 } 351 352 for (; i < limbs; i++) { 353 X[i] = 0; 354 } 355 } 356 357 /* 358 * shift by count % limb_size 359 */ 360 if (v1 > 0) { 361 for (i = limbs; i > 0; i--) { 362 r1 = X[i - 1] << (biL - v1); 363 X[i - 1] >>= v1; 364 X[i - 1] |= r0; 365 r0 = r1; 366 } 367 } 368 } 369 370 void mbedtls_mpi_core_shift_l(mbedtls_mpi_uint *X, size_t limbs, 371 size_t count) 372 { 373 size_t i, v0, v1; 374 mbedtls_mpi_uint r0 = 0, r1; 375 376 v0 = count / (biL); 377 v1 = count & (biL - 1); 378 379 /* 380 * shift by count / limb_size 381 */ 382 if (v0 > 0) { 383 for (i = limbs; i > v0; i--) { 384 X[i - 1] = X[i - v0 - 1]; 385 } 386 387 for (; i > 0; i--) { 388 X[i - 1] = 0; 389 } 390 } 391 392 /* 393 * shift by count % limb_size 394 */ 395 if (v1 > 0) { 396 for (i = v0; i < limbs; i++) { 397 r1 = X[i] >> (biL - v1); 398 X[i] <<= v1; 399 X[i] |= r0; 400 r0 = r1; 401 } 402 } 403 } 404 405 mbedtls_mpi_uint mbedtls_mpi_core_add(mbedtls_mpi_uint *X, 406 const mbedtls_mpi_uint *A, 407 const mbedtls_mpi_uint *B, 408 size_t limbs) 409 { 410 mbedtls_mpi_uint c = 0; 411 412 for (size_t i = 0; i < limbs; i++) { 413 mbedtls_mpi_uint t = c + A[i]; 414 c = (t < A[i]); 415 t += B[i]; 416 c += (t < B[i]); 417 X[i] = t; 418 } 419 420 return c; 421 } 422 423 mbedtls_mpi_uint mbedtls_mpi_core_add_if(mbedtls_mpi_uint *X, 424 const mbedtls_mpi_uint *A, 425 size_t limbs, 426 unsigned cond) 427 { 428 mbedtls_mpi_uint c = 0; 429 430 mbedtls_ct_condition_t do_add = mbedtls_ct_bool(cond); 431 432 for (size_t i = 0; i < limbs; i++) { 433 mbedtls_mpi_uint add = mbedtls_ct_mpi_uint_if_else_0(do_add, A[i]); 434 mbedtls_mpi_uint t = c + X[i]; 435 c = (t < X[i]); 436 t += add; 437 c += (t < add); 438 X[i] = t; 439 } 440 441 return c; 442 } 443 444 mbedtls_mpi_uint mbedtls_mpi_core_sub(mbedtls_mpi_uint *X, 445 const mbedtls_mpi_uint *A, 446 const mbedtls_mpi_uint *B, 447 size_t limbs) 448 { 449 mbedtls_mpi_uint c = 0; 450 451 for (size_t i = 0; i < limbs; i++) { 452 mbedtls_mpi_uint z = (A[i] < c); 453 mbedtls_mpi_uint t = A[i] - c; 454 c = (t < B[i]) + z; 455 X[i] = t - B[i]; 456 } 457 458 return c; 459 } 460 461 mbedtls_mpi_uint mbedtls_mpi_core_mla(mbedtls_mpi_uint *d, size_t d_len, 462 const mbedtls_mpi_uint *s, size_t s_len, 463 mbedtls_mpi_uint b) 464 { 465 mbedtls_mpi_uint c = 0; /* carry */ 466 /* 467 * It is a documented precondition of this function that d_len >= s_len. 468 * If that's not the case, we swap these round: this turns what would be 469 * a buffer overflow into an incorrect result. 470 */ 471 if (d_len < s_len) { 472 s_len = d_len; 473 } 474 size_t excess_len = d_len - s_len; 475 size_t steps_x8 = s_len / 8; 476 size_t steps_x1 = s_len & 7; 477 478 while (steps_x8--) { 479 MULADDC_X8_INIT 480 MULADDC_X8_CORE 481 MULADDC_X8_STOP 482 } 483 484 while (steps_x1--) { 485 MULADDC_X1_INIT 486 MULADDC_X1_CORE 487 MULADDC_X1_STOP 488 } 489 490 while (excess_len--) { 491 *d += c; 492 c = (*d < c); 493 d++; 494 } 495 496 return c; 497 } 498 499 void mbedtls_mpi_core_mul(mbedtls_mpi_uint *X, 500 const mbedtls_mpi_uint *A, size_t A_limbs, 501 const mbedtls_mpi_uint *B, size_t B_limbs) 502 { 503 memset(X, 0, (A_limbs + B_limbs) * ciL); 504 505 for (size_t i = 0; i < B_limbs; i++) { 506 (void) mbedtls_mpi_core_mla(X + i, A_limbs + 1, A, A_limbs, B[i]); 507 } 508 } 509 510 /* 511 * Fast Montgomery initialization (thanks to Tom St Denis). 512 */ 513 mbedtls_mpi_uint mbedtls_mpi_core_montmul_init(const mbedtls_mpi_uint *N) 514 { 515 mbedtls_mpi_uint x = N[0]; 516 517 x += ((N[0] + 2) & 4) << 1; 518 519 for (unsigned int i = biL; i >= 8; i /= 2) { 520 x *= (2 - (N[0] * x)); 521 } 522 523 return ~x + 1; 524 } 525 526 void mbedtls_mpi_core_montmul(mbedtls_mpi_uint *X, 527 const mbedtls_mpi_uint *A, 528 const mbedtls_mpi_uint *B, 529 size_t B_limbs, 530 const mbedtls_mpi_uint *N, 531 size_t AN_limbs, 532 mbedtls_mpi_uint mm, 533 mbedtls_mpi_uint *T) 534 { 535 memset(T, 0, (2 * AN_limbs + 1) * ciL); 536 537 for (size_t i = 0; i < AN_limbs; i++) { 538 /* T = (T + u0*B + u1*N) / 2^biL */ 539 mbedtls_mpi_uint u0 = A[i]; 540 mbedtls_mpi_uint u1 = (T[0] + u0 * B[0]) * mm; 541 542 (void) mbedtls_mpi_core_mla(T, AN_limbs + 2, B, B_limbs, u0); 543 (void) mbedtls_mpi_core_mla(T, AN_limbs + 2, N, AN_limbs, u1); 544 545 T++; 546 } 547 548 /* 549 * The result we want is (T >= N) ? T - N : T. 550 * 551 * For better constant-time properties in this function, we always do the 552 * subtraction, with the result in X. 553 * 554 * We also look to see if there was any carry in the final additions in the 555 * loop above. 556 */ 557 558 mbedtls_mpi_uint carry = T[AN_limbs]; 559 mbedtls_mpi_uint borrow = mbedtls_mpi_core_sub(X, T, N, AN_limbs); 560 561 /* 562 * Using R as the Montgomery radix (auxiliary modulus) i.e. 2^(biL*AN_limbs): 563 * 564 * T can be in one of 3 ranges: 565 * 566 * 1) T < N : (carry, borrow) = (0, 1): we want T 567 * 2) N <= T < R : (carry, borrow) = (0, 0): we want X 568 * 3) T >= R : (carry, borrow) = (1, 1): we want X 569 * 570 * and (carry, borrow) = (1, 0) can't happen. 571 * 572 * So the correct return value is already in X if (carry ^ borrow) = 0, 573 * but is in (the lower AN_limbs limbs of) T if (carry ^ borrow) = 1. 574 */ 575 mbedtls_ct_memcpy_if(mbedtls_ct_bool(carry ^ borrow), 576 (unsigned char *) X, 577 (unsigned char *) T, 578 NULL, 579 AN_limbs * sizeof(mbedtls_mpi_uint)); 580 } 581 582 int mbedtls_mpi_core_get_mont_r2_unsafe(mbedtls_mpi *X, 583 const mbedtls_mpi *N) 584 { 585 int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; 586 587 MBEDTLS_MPI_CHK(mbedtls_mpi_lset(X, 1)); 588 MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(X, N->n * 2 * biL)); 589 MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(X, X, N)); 590 MBEDTLS_MPI_CHK(mbedtls_mpi_shrink(X, N->n)); 591 592 cleanup: 593 return ret; 594 } 595 596 MBEDTLS_STATIC_TESTABLE 597 void mbedtls_mpi_core_ct_uint_table_lookup(mbedtls_mpi_uint *dest, 598 const mbedtls_mpi_uint *table, 599 size_t limbs, 600 size_t count, 601 size_t index) 602 { 603 for (size_t i = 0; i < count; i++, table += limbs) { 604 mbedtls_ct_condition_t assign = mbedtls_ct_uint_eq(i, index); 605 mbedtls_mpi_core_cond_assign(dest, table, limbs, assign); 606 } 607 } 608 609 /* Fill X with n_bytes random bytes. 610 * X must already have room for those bytes. 611 * The ordering of the bytes returned from the RNG is suitable for 612 * deterministic ECDSA (see RFC 6979 §3.3 and the specification of 613 * mbedtls_mpi_core_random()). 614 */ 615 int mbedtls_mpi_core_fill_random( 616 mbedtls_mpi_uint *X, size_t X_limbs, 617 size_t n_bytes, 618 int (*f_rng)(void *, unsigned char *, size_t), void *p_rng) 619 { 620 int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; 621 const size_t limbs = CHARS_TO_LIMBS(n_bytes); 622 const size_t overhead = (limbs * ciL) - n_bytes; 623 624 if (X_limbs < limbs) { 625 return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; 626 } 627 628 memset(X, 0, overhead); 629 memset((unsigned char *) X + limbs * ciL, 0, (X_limbs - limbs) * ciL); 630 MBEDTLS_MPI_CHK(f_rng(p_rng, (unsigned char *) X + overhead, n_bytes)); 631 mbedtls_mpi_core_bigendian_to_host(X, limbs); 632 633 cleanup: 634 return ret; 635 } 636 637 int mbedtls_mpi_core_random(mbedtls_mpi_uint *X, 638 mbedtls_mpi_uint min, 639 const mbedtls_mpi_uint *N, 640 size_t limbs, 641 int (*f_rng)(void *, unsigned char *, size_t), 642 void *p_rng) 643 { 644 mbedtls_ct_condition_t ge_lower = MBEDTLS_CT_TRUE, lt_upper = MBEDTLS_CT_FALSE; 645 size_t n_bits = mbedtls_mpi_core_bitlen(N, limbs); 646 size_t n_bytes = (n_bits + 7) / 8; 647 int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; 648 649 /* 650 * When min == 0, each try has at worst a probability 1/2 of failing 651 * (the msb has a probability 1/2 of being 0, and then the result will 652 * be < N), so after 30 tries failure probability is a most 2**(-30). 653 * 654 * When N is just below a power of 2, as is the case when generating 655 * a random scalar on most elliptic curves, 1 try is enough with 656 * overwhelming probability. When N is just above a power of 2, 657 * as when generating a random scalar on secp224k1, each try has 658 * a probability of failing that is almost 1/2. 659 * 660 * The probabilities are almost the same if min is nonzero but negligible 661 * compared to N. This is always the case when N is crypto-sized, but 662 * it's convenient to support small N for testing purposes. When N 663 * is small, use a higher repeat count, otherwise the probability of 664 * failure is macroscopic. 665 */ 666 int count = (n_bytes > 4 ? 30 : 250); 667 668 /* 669 * Match the procedure given in RFC 6979 §3.3 (deterministic ECDSA) 670 * when f_rng is a suitably parametrized instance of HMAC_DRBG: 671 * - use the same byte ordering; 672 * - keep the leftmost n_bits bits of the generated octet string; 673 * - try until result is in the desired range. 674 * This also avoids any bias, which is especially important for ECDSA. 675 */ 676 do { 677 MBEDTLS_MPI_CHK(mbedtls_mpi_core_fill_random(X, limbs, 678 n_bytes, 679 f_rng, p_rng)); 680 mbedtls_mpi_core_shift_r(X, limbs, 8 * n_bytes - n_bits); 681 682 if (--count == 0) { 683 ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE; 684 goto cleanup; 685 } 686 687 ge_lower = mbedtls_mpi_core_uint_le_mpi(min, X, limbs); 688 lt_upper = mbedtls_mpi_core_lt_ct(X, N, limbs); 689 } while (mbedtls_ct_bool_and(ge_lower, lt_upper) == MBEDTLS_CT_FALSE); 690 691 cleanup: 692 return ret; 693 } 694 695 static size_t exp_mod_get_window_size(size_t Ebits) 696 { 697 #if MBEDTLS_MPI_WINDOW_SIZE >= 6 698 return (Ebits > 671) ? 6 : (Ebits > 239) ? 5 : (Ebits > 79) ? 4 : 1; 699 #elif MBEDTLS_MPI_WINDOW_SIZE == 5 700 return (Ebits > 239) ? 5 : (Ebits > 79) ? 4 : 1; 701 #elif MBEDTLS_MPI_WINDOW_SIZE > 1 702 return (Ebits > 79) ? MBEDTLS_MPI_WINDOW_SIZE : 1; 703 #else 704 (void) Ebits; 705 return 1; 706 #endif 707 } 708 709 size_t mbedtls_mpi_core_exp_mod_working_limbs(size_t AN_limbs, size_t E_limbs) 710 { 711 const size_t wsize = exp_mod_get_window_size(E_limbs * biL); 712 const size_t welem = ((size_t) 1) << wsize; 713 714 /* How big does each part of the working memory pool need to be? */ 715 const size_t table_limbs = welem * AN_limbs; 716 const size_t select_limbs = AN_limbs; 717 const size_t temp_limbs = 2 * AN_limbs + 1; 718 719 return table_limbs + select_limbs + temp_limbs; 720 } 721 722 static void exp_mod_precompute_window(const mbedtls_mpi_uint *A, 723 const mbedtls_mpi_uint *N, 724 size_t AN_limbs, 725 mbedtls_mpi_uint mm, 726 const mbedtls_mpi_uint *RR, 727 size_t welem, 728 mbedtls_mpi_uint *Wtable, 729 mbedtls_mpi_uint *temp) 730 { 731 /* W[0] = 1 (in Montgomery presentation) */ 732 memset(Wtable, 0, AN_limbs * ciL); 733 Wtable[0] = 1; 734 mbedtls_mpi_core_montmul(Wtable, Wtable, RR, AN_limbs, N, AN_limbs, mm, temp); 735 736 /* W[1] = A (already in Montgomery presentation) */ 737 mbedtls_mpi_uint *W1 = Wtable + AN_limbs; 738 memcpy(W1, A, AN_limbs * ciL); 739 740 /* W[i+1] = W[i] * W[1], i >= 2 */ 741 mbedtls_mpi_uint *Wprev = W1; 742 for (size_t i = 2; i < welem; i++) { 743 mbedtls_mpi_uint *Wcur = Wprev + AN_limbs; 744 mbedtls_mpi_core_montmul(Wcur, Wprev, W1, AN_limbs, N, AN_limbs, mm, temp); 745 Wprev = Wcur; 746 } 747 } 748 749 #if defined(MBEDTLS_TEST_HOOKS) && !defined(MBEDTLS_THREADING_C) 750 // Set to a default that is neither MBEDTLS_MPI_IS_PUBLIC nor MBEDTLS_MPI_IS_SECRET 751 int mbedtls_mpi_optionally_safe_codepath = MBEDTLS_MPI_IS_PUBLIC + MBEDTLS_MPI_IS_SECRET + 1; 752 #endif 753 754 /* 755 * This function calculates the indices of the exponent where the exponentiation algorithm should 756 * start processing. 757 * 758 * Warning! If the parameter E_public has MBEDTLS_MPI_IS_PUBLIC as its value, 759 * this function is not constant time with respect to the exponent (parameter E). 760 */ 761 static inline void exp_mod_calc_first_bit_optionally_safe(const mbedtls_mpi_uint *E, 762 size_t E_limbs, 763 int E_public, 764 size_t *E_limb_index, 765 size_t *E_bit_index) 766 { 767 if (E_public == MBEDTLS_MPI_IS_PUBLIC) { 768 /* 769 * Skip leading zero bits. 770 */ 771 size_t E_bits = mbedtls_mpi_core_bitlen(E, E_limbs); 772 if (E_bits == 0) { 773 /* 774 * If E is 0 mbedtls_mpi_core_bitlen() returns 0. Even if that is the case, we will want 775 * to represent it as a single 0 bit and as such the bitlength will be 1. 776 */ 777 E_bits = 1; 778 } 779 780 *E_limb_index = E_bits / biL; 781 *E_bit_index = E_bits % biL; 782 783 #if defined(MBEDTLS_TEST_HOOKS) && !defined(MBEDTLS_THREADING_C) 784 mbedtls_mpi_optionally_safe_codepath = MBEDTLS_MPI_IS_PUBLIC; 785 #endif 786 } else { 787 /* 788 * Here we need to be constant time with respect to E and can't do anything better than 789 * start at the first allocated bit. 790 */ 791 *E_limb_index = E_limbs; 792 *E_bit_index = 0; 793 #if defined(MBEDTLS_TEST_HOOKS) && !defined(MBEDTLS_THREADING_C) 794 // Only mark the codepath safe if there wasn't an unsafe codepath before 795 if (mbedtls_mpi_optionally_safe_codepath != MBEDTLS_MPI_IS_PUBLIC) { 796 mbedtls_mpi_optionally_safe_codepath = MBEDTLS_MPI_IS_SECRET; 797 } 798 #endif 799 } 800 } 801 802 /* 803 * Warning! If the parameter window_public has MBEDTLS_MPI_IS_PUBLIC as its value, this function is 804 * not constant time with respect to the window parameter and consequently the exponent of the 805 * exponentiation (parameter E of mbedtls_mpi_core_exp_mod_optionally_safe). 806 */ 807 static inline void exp_mod_table_lookup_optionally_safe(mbedtls_mpi_uint *Wselect, 808 mbedtls_mpi_uint *Wtable, 809 size_t AN_limbs, size_t welem, 810 mbedtls_mpi_uint window, 811 int window_public) 812 { 813 if (window_public == MBEDTLS_MPI_IS_PUBLIC) { 814 memcpy(Wselect, Wtable + window * AN_limbs, AN_limbs * ciL); 815 #if defined(MBEDTLS_TEST_HOOKS) && !defined(MBEDTLS_THREADING_C) 816 mbedtls_mpi_optionally_safe_codepath = MBEDTLS_MPI_IS_PUBLIC; 817 #endif 818 } else { 819 /* Select Wtable[window] without leaking window through 820 * memory access patterns. */ 821 mbedtls_mpi_core_ct_uint_table_lookup(Wselect, Wtable, 822 AN_limbs, welem, window); 823 #if defined(MBEDTLS_TEST_HOOKS) && !defined(MBEDTLS_THREADING_C) 824 // Only mark the codepath safe if there wasn't an unsafe codepath before 825 if (mbedtls_mpi_optionally_safe_codepath != MBEDTLS_MPI_IS_PUBLIC) { 826 mbedtls_mpi_optionally_safe_codepath = MBEDTLS_MPI_IS_SECRET; 827 } 828 #endif 829 } 830 } 831 832 /* Exponentiation: X := A^E mod N. 833 * 834 * Warning! If the parameter E_public has MBEDTLS_MPI_IS_PUBLIC as its value, 835 * this function is not constant time with respect to the exponent (parameter E). 836 * 837 * A must already be in Montgomery form. 838 * 839 * As in other bignum functions, assume that AN_limbs and E_limbs are nonzero. 840 * 841 * RR must contain 2^{2*biL} mod N. 842 * 843 * The algorithm is a variant of Left-to-right k-ary exponentiation: HAC 14.82 844 * (The difference is that the body in our loop processes a single bit instead 845 * of a full window.) 846 */ 847 static void mbedtls_mpi_core_exp_mod_optionally_safe(mbedtls_mpi_uint *X, 848 const mbedtls_mpi_uint *A, 849 const mbedtls_mpi_uint *N, 850 size_t AN_limbs, 851 const mbedtls_mpi_uint *E, 852 size_t E_limbs, 853 int E_public, 854 const mbedtls_mpi_uint *RR, 855 mbedtls_mpi_uint *T) 856 { 857 /* We'll process the bits of E from most significant 858 * (limb_index=E_limbs-1, E_bit_index=biL-1) to least significant 859 * (limb_index=0, E_bit_index=0). */ 860 size_t E_limb_index; 861 size_t E_bit_index; 862 exp_mod_calc_first_bit_optionally_safe(E, E_limbs, E_public, 863 &E_limb_index, &E_bit_index); 864 865 const size_t wsize = exp_mod_get_window_size(E_limb_index * biL); 866 const size_t welem = ((size_t) 1) << wsize; 867 868 /* This is how we will use the temporary storage T, which must have space 869 * for table_limbs, select_limbs and (2 * AN_limbs + 1) for montmul. */ 870 const size_t table_limbs = welem * AN_limbs; 871 const size_t select_limbs = AN_limbs; 872 873 /* Pointers to specific parts of the temporary working memory pool */ 874 mbedtls_mpi_uint *const Wtable = T; 875 mbedtls_mpi_uint *const Wselect = Wtable + table_limbs; 876 mbedtls_mpi_uint *const temp = Wselect + select_limbs; 877 878 /* 879 * Window precomputation 880 */ 881 882 const mbedtls_mpi_uint mm = mbedtls_mpi_core_montmul_init(N); 883 884 /* Set Wtable[i] = A^i (in Montgomery representation) */ 885 exp_mod_precompute_window(A, N, AN_limbs, 886 mm, RR, 887 welem, Wtable, temp); 888 889 /* 890 * Fixed window exponentiation 891 */ 892 893 /* X = 1 (in Montgomery presentation) initially */ 894 memcpy(X, Wtable, AN_limbs * ciL); 895 896 /* At any given time, window contains window_bits bits from E. 897 * window_bits can go up to wsize. */ 898 size_t window_bits = 0; 899 mbedtls_mpi_uint window = 0; 900 901 do { 902 /* Square */ 903 mbedtls_mpi_core_montmul(X, X, X, AN_limbs, N, AN_limbs, mm, temp); 904 905 /* Move to the next bit of the exponent */ 906 if (E_bit_index == 0) { 907 --E_limb_index; 908 E_bit_index = biL - 1; 909 } else { 910 --E_bit_index; 911 } 912 /* Insert next exponent bit into window */ 913 ++window_bits; 914 window <<= 1; 915 window |= (E[E_limb_index] >> E_bit_index) & 1; 916 917 /* Clear window if it's full. Also clear the window at the end, 918 * when we've finished processing the exponent. */ 919 if (window_bits == wsize || 920 (E_bit_index == 0 && E_limb_index == 0)) { 921 922 exp_mod_table_lookup_optionally_safe(Wselect, Wtable, AN_limbs, welem, 923 window, E_public); 924 /* Multiply X by the selected element. */ 925 mbedtls_mpi_core_montmul(X, X, Wselect, AN_limbs, N, AN_limbs, mm, 926 temp); 927 window = 0; 928 window_bits = 0; 929 } 930 } while (!(E_bit_index == 0 && E_limb_index == 0)); 931 } 932 933 void mbedtls_mpi_core_exp_mod(mbedtls_mpi_uint *X, 934 const mbedtls_mpi_uint *A, 935 const mbedtls_mpi_uint *N, size_t AN_limbs, 936 const mbedtls_mpi_uint *E, size_t E_limbs, 937 const mbedtls_mpi_uint *RR, 938 mbedtls_mpi_uint *T) 939 { 940 mbedtls_mpi_core_exp_mod_optionally_safe(X, 941 A, 942 N, 943 AN_limbs, 944 E, 945 E_limbs, 946 MBEDTLS_MPI_IS_SECRET, 947 RR, 948 T); 949 } 950 951 void mbedtls_mpi_core_exp_mod_unsafe(mbedtls_mpi_uint *X, 952 const mbedtls_mpi_uint *A, 953 const mbedtls_mpi_uint *N, size_t AN_limbs, 954 const mbedtls_mpi_uint *E, size_t E_limbs, 955 const mbedtls_mpi_uint *RR, 956 mbedtls_mpi_uint *T) 957 { 958 mbedtls_mpi_core_exp_mod_optionally_safe(X, 959 A, 960 N, 961 AN_limbs, 962 E, 963 E_limbs, 964 MBEDTLS_MPI_IS_PUBLIC, 965 RR, 966 T); 967 } 968 969 mbedtls_mpi_uint mbedtls_mpi_core_sub_int(mbedtls_mpi_uint *X, 970 const mbedtls_mpi_uint *A, 971 mbedtls_mpi_uint c, /* doubles as carry */ 972 size_t limbs) 973 { 974 for (size_t i = 0; i < limbs; i++) { 975 mbedtls_mpi_uint s = A[i]; 976 mbedtls_mpi_uint t = s - c; 977 c = (t > s); 978 X[i] = t; 979 } 980 981 return c; 982 } 983 984 mbedtls_ct_condition_t mbedtls_mpi_core_check_zero_ct(const mbedtls_mpi_uint *A, 985 size_t limbs) 986 { 987 volatile const mbedtls_mpi_uint *force_read_A = A; 988 mbedtls_mpi_uint bits = 0; 989 990 for (size_t i = 0; i < limbs; i++) { 991 bits |= force_read_A[i]; 992 } 993 994 return mbedtls_ct_bool(bits); 995 } 996 997 void mbedtls_mpi_core_to_mont_rep(mbedtls_mpi_uint *X, 998 const mbedtls_mpi_uint *A, 999 const mbedtls_mpi_uint *N, 1000 size_t AN_limbs, 1001 mbedtls_mpi_uint mm, 1002 const mbedtls_mpi_uint *rr, 1003 mbedtls_mpi_uint *T) 1004 { 1005 mbedtls_mpi_core_montmul(X, A, rr, AN_limbs, N, AN_limbs, mm, T); 1006 } 1007 1008 void mbedtls_mpi_core_from_mont_rep(mbedtls_mpi_uint *X, 1009 const mbedtls_mpi_uint *A, 1010 const mbedtls_mpi_uint *N, 1011 size_t AN_limbs, 1012 mbedtls_mpi_uint mm, 1013 mbedtls_mpi_uint *T) 1014 { 1015 const mbedtls_mpi_uint Rinv = 1; /* 1/R in Mont. rep => 1 */ 1016 1017 mbedtls_mpi_core_montmul(X, A, &Rinv, 1, N, AN_limbs, mm, T); 1018 } 1019 1020 #endif /* MBEDTLS_BIGNUM_C */ 1021