1 /* 2 * Core bignum functions 3 * 4 * Copyright The Mbed TLS Contributors 5 * SPDX-License-Identifier: Apache-2.0 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); you may 8 * not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 15 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 20 #include "common.h" 21 22 #if defined(MBEDTLS_BIGNUM_C) 23 24 #include <string.h> 25 26 #include "mbedtls/error.h" 27 #include "mbedtls/platform_util.h" 28 #include "constant_time_internal.h" 29 30 #include "mbedtls/platform.h" 31 32 #include "bignum_core.h" 33 #include "bn_mul.h" 34 #include "constant_time_internal.h" 35 36 size_t mbedtls_mpi_core_clz(mbedtls_mpi_uint a) 37 { 38 size_t j; 39 mbedtls_mpi_uint mask = (mbedtls_mpi_uint) 1 << (biL - 1); 40 41 for (j = 0; j < biL; j++) { 42 if (a & mask) { 43 break; 44 } 45 46 mask >>= 1; 47 } 48 49 return j; 50 } 51 52 size_t mbedtls_mpi_core_bitlen(const mbedtls_mpi_uint *A, size_t A_limbs) 53 { 54 size_t i, j; 55 56 if (A_limbs == 0) { 57 return 0; 58 } 59 60 for (i = A_limbs - 1; i > 0; i--) { 61 if (A[i] != 0) { 62 break; 63 } 64 } 65 66 j = biL - mbedtls_mpi_core_clz(A[i]); 67 68 return (i * biL) + j; 69 } 70 71 /* Convert a big-endian byte array aligned to the size of mbedtls_mpi_uint 72 * into the storage form used by mbedtls_mpi. */ 73 static mbedtls_mpi_uint mpi_bigendian_to_host_c(mbedtls_mpi_uint a) 74 { 75 uint8_t i; 76 unsigned char *a_ptr; 77 mbedtls_mpi_uint tmp = 0; 78 79 for (i = 0, a_ptr = (unsigned char *) &a; i < ciL; i++, a_ptr++) { 80 tmp <<= CHAR_BIT; 81 tmp |= (mbedtls_mpi_uint) *a_ptr; 82 } 83 84 return tmp; 85 } 86 87 static mbedtls_mpi_uint mpi_bigendian_to_host(mbedtls_mpi_uint a) 88 { 89 if (MBEDTLS_IS_BIG_ENDIAN) { 90 /* Nothing to do on bigendian systems. */ 91 return a; 92 } else { 93 switch (sizeof(mbedtls_mpi_uint)) { 94 case 4: 95 return (mbedtls_mpi_uint) MBEDTLS_BSWAP32((uint32_t) a); 96 case 8: 97 return (mbedtls_mpi_uint) MBEDTLS_BSWAP64((uint64_t) a); 98 } 99 100 /* Fall back to C-based reordering if we don't know the byte order 101 * or we couldn't use a compiler-specific builtin. */ 102 return mpi_bigendian_to_host_c(a); 103 } 104 } 105 106 void mbedtls_mpi_core_bigendian_to_host(mbedtls_mpi_uint *A, 107 size_t A_limbs) 108 { 109 mbedtls_mpi_uint *cur_limb_left; 110 mbedtls_mpi_uint *cur_limb_right; 111 if (A_limbs == 0) { 112 return; 113 } 114 115 /* 116 * Traverse limbs and 117 * - adapt byte-order in each limb 118 * - swap the limbs themselves. 119 * For that, simultaneously traverse the limbs from left to right 120 * and from right to left, as long as the left index is not bigger 121 * than the right index (it's not a problem if limbs is odd and the 122 * indices coincide in the last iteration). 123 */ 124 for (cur_limb_left = A, cur_limb_right = A + (A_limbs - 1); 125 cur_limb_left <= cur_limb_right; 126 cur_limb_left++, cur_limb_right--) { 127 mbedtls_mpi_uint tmp; 128 /* Note that if cur_limb_left == cur_limb_right, 129 * this code effectively swaps the bytes only once. */ 130 tmp = mpi_bigendian_to_host(*cur_limb_left); 131 *cur_limb_left = mpi_bigendian_to_host(*cur_limb_right); 132 *cur_limb_right = tmp; 133 } 134 } 135 136 /* Whether min <= A, in constant time. 137 * A_limbs must be at least 1. */ 138 unsigned mbedtls_mpi_core_uint_le_mpi(mbedtls_mpi_uint min, 139 const mbedtls_mpi_uint *A, 140 size_t A_limbs) 141 { 142 /* min <= least significant limb? */ 143 unsigned min_le_lsl = 1 ^ mbedtls_ct_mpi_uint_lt(A[0], min); 144 145 /* limbs other than the least significant one are all zero? */ 146 mbedtls_mpi_uint msll_mask = 0; 147 for (size_t i = 1; i < A_limbs; i++) { 148 msll_mask |= A[i]; 149 } 150 /* The most significant limbs of A are not all zero iff msll_mask != 0. */ 151 unsigned msll_nonzero = mbedtls_ct_mpi_uint_mask(msll_mask) & 1; 152 153 /* min <= A iff the lowest limb of A is >= min or the other limbs 154 * are not all zero. */ 155 return min_le_lsl | msll_nonzero; 156 } 157 158 void mbedtls_mpi_core_cond_assign(mbedtls_mpi_uint *X, 159 const mbedtls_mpi_uint *A, 160 size_t limbs, 161 unsigned char assign) 162 { 163 if (X == A) { 164 return; 165 } 166 167 mbedtls_ct_mpi_uint_cond_assign(limbs, X, A, assign); 168 } 169 170 void mbedtls_mpi_core_cond_swap(mbedtls_mpi_uint *X, 171 mbedtls_mpi_uint *Y, 172 size_t limbs, 173 unsigned char swap) 174 { 175 if (X == Y) { 176 return; 177 } 178 179 /* all-bits 1 if swap is 1, all-bits 0 if swap is 0 */ 180 mbedtls_mpi_uint limb_mask = mbedtls_ct_mpi_uint_mask(swap); 181 182 for (size_t i = 0; i < limbs; i++) { 183 mbedtls_mpi_uint tmp = X[i]; 184 X[i] = (X[i] & ~limb_mask) | (Y[i] & limb_mask); 185 Y[i] = (Y[i] & ~limb_mask) | (tmp & limb_mask); 186 } 187 } 188 189 int mbedtls_mpi_core_read_le(mbedtls_mpi_uint *X, 190 size_t X_limbs, 191 const unsigned char *input, 192 size_t input_length) 193 { 194 const size_t limbs = CHARS_TO_LIMBS(input_length); 195 196 if (X_limbs < limbs) { 197 return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; 198 } 199 200 if (X != NULL) { 201 memset(X, 0, X_limbs * ciL); 202 203 for (size_t i = 0; i < input_length; i++) { 204 size_t offset = ((i % ciL) << 3); 205 X[i / ciL] |= ((mbedtls_mpi_uint) input[i]) << offset; 206 } 207 } 208 209 return 0; 210 } 211 212 int mbedtls_mpi_core_read_be(mbedtls_mpi_uint *X, 213 size_t X_limbs, 214 const unsigned char *input, 215 size_t input_length) 216 { 217 const size_t limbs = CHARS_TO_LIMBS(input_length); 218 219 if (X_limbs < limbs) { 220 return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; 221 } 222 223 /* If X_limbs is 0, input_length must also be 0 (from previous test). 224 * Nothing to do. */ 225 if (X_limbs == 0) { 226 return 0; 227 } 228 229 memset(X, 0, X_limbs * ciL); 230 231 /* memcpy() with (NULL, 0) is undefined behaviour */ 232 if (input_length != 0) { 233 size_t overhead = (X_limbs * ciL) - input_length; 234 unsigned char *Xp = (unsigned char *) X; 235 memcpy(Xp + overhead, input, input_length); 236 } 237 238 mbedtls_mpi_core_bigendian_to_host(X, X_limbs); 239 240 return 0; 241 } 242 243 int mbedtls_mpi_core_write_le(const mbedtls_mpi_uint *A, 244 size_t A_limbs, 245 unsigned char *output, 246 size_t output_length) 247 { 248 size_t stored_bytes = A_limbs * ciL; 249 size_t bytes_to_copy; 250 251 if (stored_bytes < output_length) { 252 bytes_to_copy = stored_bytes; 253 } else { 254 bytes_to_copy = output_length; 255 256 /* The output buffer is smaller than the allocated size of A. 257 * However A may fit if its leading bytes are zero. */ 258 for (size_t i = bytes_to_copy; i < stored_bytes; i++) { 259 if (GET_BYTE(A, i) != 0) { 260 return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; 261 } 262 } 263 } 264 265 for (size_t i = 0; i < bytes_to_copy; i++) { 266 output[i] = GET_BYTE(A, i); 267 } 268 269 if (stored_bytes < output_length) { 270 /* Write trailing 0 bytes */ 271 memset(output + stored_bytes, 0, output_length - stored_bytes); 272 } 273 274 return 0; 275 } 276 277 int mbedtls_mpi_core_write_be(const mbedtls_mpi_uint *X, 278 size_t X_limbs, 279 unsigned char *output, 280 size_t output_length) 281 { 282 size_t stored_bytes; 283 size_t bytes_to_copy; 284 unsigned char *p; 285 286 stored_bytes = X_limbs * ciL; 287 288 if (stored_bytes < output_length) { 289 /* There is enough space in the output buffer. Write initial 290 * null bytes and record the position at which to start 291 * writing the significant bytes. In this case, the execution 292 * trace of this function does not depend on the value of the 293 * number. */ 294 bytes_to_copy = stored_bytes; 295 p = output + output_length - stored_bytes; 296 memset(output, 0, output_length - stored_bytes); 297 } else { 298 /* The output buffer is smaller than the allocated size of X. 299 * However X may fit if its leading bytes are zero. */ 300 bytes_to_copy = output_length; 301 p = output; 302 for (size_t i = bytes_to_copy; i < stored_bytes; i++) { 303 if (GET_BYTE(X, i) != 0) { 304 return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; 305 } 306 } 307 } 308 309 for (size_t i = 0; i < bytes_to_copy; i++) { 310 p[bytes_to_copy - i - 1] = GET_BYTE(X, i); 311 } 312 313 return 0; 314 } 315 316 void mbedtls_mpi_core_shift_r(mbedtls_mpi_uint *X, size_t limbs, 317 size_t count) 318 { 319 size_t i, v0, v1; 320 mbedtls_mpi_uint r0 = 0, r1; 321 322 v0 = count / biL; 323 v1 = count & (biL - 1); 324 325 if (v0 > limbs || (v0 == limbs && v1 > 0)) { 326 memset(X, 0, limbs * ciL); 327 return; 328 } 329 330 /* 331 * shift by count / limb_size 332 */ 333 if (v0 > 0) { 334 for (i = 0; i < limbs - v0; i++) { 335 X[i] = X[i + v0]; 336 } 337 338 for (; i < limbs; i++) { 339 X[i] = 0; 340 } 341 } 342 343 /* 344 * shift by count % limb_size 345 */ 346 if (v1 > 0) { 347 for (i = limbs; i > 0; i--) { 348 r1 = X[i - 1] << (biL - v1); 349 X[i - 1] >>= v1; 350 X[i - 1] |= r0; 351 r0 = r1; 352 } 353 } 354 } 355 356 mbedtls_mpi_uint mbedtls_mpi_core_add(mbedtls_mpi_uint *X, 357 const mbedtls_mpi_uint *A, 358 const mbedtls_mpi_uint *B, 359 size_t limbs) 360 { 361 mbedtls_mpi_uint c = 0; 362 363 for (size_t i = 0; i < limbs; i++) { 364 mbedtls_mpi_uint t = c + A[i]; 365 c = (t < A[i]); 366 t += B[i]; 367 c += (t < B[i]); 368 X[i] = t; 369 } 370 371 return c; 372 } 373 374 mbedtls_mpi_uint mbedtls_mpi_core_add_if(mbedtls_mpi_uint *X, 375 const mbedtls_mpi_uint *A, 376 size_t limbs, 377 unsigned cond) 378 { 379 mbedtls_mpi_uint c = 0; 380 381 /* all-bits 0 if cond is 0, all-bits 1 if cond is non-0 */ 382 const mbedtls_mpi_uint mask = mbedtls_ct_mpi_uint_mask(cond); 383 384 for (size_t i = 0; i < limbs; i++) { 385 mbedtls_mpi_uint add = mask & A[i]; 386 mbedtls_mpi_uint t = c + X[i]; 387 c = (t < X[i]); 388 t += add; 389 c += (t < add); 390 X[i] = t; 391 } 392 393 return c; 394 } 395 396 mbedtls_mpi_uint mbedtls_mpi_core_sub(mbedtls_mpi_uint *X, 397 const mbedtls_mpi_uint *A, 398 const mbedtls_mpi_uint *B, 399 size_t limbs) 400 { 401 mbedtls_mpi_uint c = 0; 402 403 for (size_t i = 0; i < limbs; i++) { 404 mbedtls_mpi_uint z = (A[i] < c); 405 mbedtls_mpi_uint t = A[i] - c; 406 c = (t < B[i]) + z; 407 X[i] = t - B[i]; 408 } 409 410 return c; 411 } 412 413 mbedtls_mpi_uint mbedtls_mpi_core_mla(mbedtls_mpi_uint *d, size_t d_len, 414 const mbedtls_mpi_uint *s, size_t s_len, 415 mbedtls_mpi_uint b) 416 { 417 mbedtls_mpi_uint c = 0; /* carry */ 418 /* 419 * It is a documented precondition of this function that d_len >= s_len. 420 * If that's not the case, we swap these round: this turns what would be 421 * a buffer overflow into an incorrect result. 422 */ 423 if (d_len < s_len) { 424 s_len = d_len; 425 } 426 size_t excess_len = d_len - s_len; 427 size_t steps_x8 = s_len / 8; 428 size_t steps_x1 = s_len & 7; 429 430 while (steps_x8--) { 431 MULADDC_X8_INIT 432 MULADDC_X8_CORE 433 MULADDC_X8_STOP 434 } 435 436 while (steps_x1--) { 437 MULADDC_X1_INIT 438 MULADDC_X1_CORE 439 MULADDC_X1_STOP 440 } 441 442 while (excess_len--) { 443 *d += c; 444 c = (*d < c); 445 d++; 446 } 447 448 return c; 449 } 450 451 /* 452 * Fast Montgomery initialization (thanks to Tom St Denis). 453 */ 454 mbedtls_mpi_uint mbedtls_mpi_core_montmul_init(const mbedtls_mpi_uint *N) 455 { 456 mbedtls_mpi_uint x = N[0]; 457 458 x += ((N[0] + 2) & 4) << 1; 459 460 for (unsigned int i = biL; i >= 8; i /= 2) { 461 x *= (2 - (N[0] * x)); 462 } 463 464 return ~x + 1; 465 } 466 467 void mbedtls_mpi_core_montmul(mbedtls_mpi_uint *X, 468 const mbedtls_mpi_uint *A, 469 const mbedtls_mpi_uint *B, 470 size_t B_limbs, 471 const mbedtls_mpi_uint *N, 472 size_t AN_limbs, 473 mbedtls_mpi_uint mm, 474 mbedtls_mpi_uint *T) 475 { 476 memset(T, 0, (2 * AN_limbs + 1) * ciL); 477 478 for (size_t i = 0; i < AN_limbs; i++) { 479 /* T = (T + u0*B + u1*N) / 2^biL */ 480 mbedtls_mpi_uint u0 = A[i]; 481 mbedtls_mpi_uint u1 = (T[0] + u0 * B[0]) * mm; 482 483 (void) mbedtls_mpi_core_mla(T, AN_limbs + 2, B, B_limbs, u0); 484 (void) mbedtls_mpi_core_mla(T, AN_limbs + 2, N, AN_limbs, u1); 485 486 T++; 487 } 488 489 /* 490 * The result we want is (T >= N) ? T - N : T. 491 * 492 * For better constant-time properties in this function, we always do the 493 * subtraction, with the result in X. 494 * 495 * We also look to see if there was any carry in the final additions in the 496 * loop above. 497 */ 498 499 mbedtls_mpi_uint carry = T[AN_limbs]; 500 mbedtls_mpi_uint borrow = mbedtls_mpi_core_sub(X, T, N, AN_limbs); 501 502 /* 503 * Using R as the Montgomery radix (auxiliary modulus) i.e. 2^(biL*AN_limbs): 504 * 505 * T can be in one of 3 ranges: 506 * 507 * 1) T < N : (carry, borrow) = (0, 1): we want T 508 * 2) N <= T < R : (carry, borrow) = (0, 0): we want X 509 * 3) T >= R : (carry, borrow) = (1, 1): we want X 510 * 511 * and (carry, borrow) = (1, 0) can't happen. 512 * 513 * So the correct return value is already in X if (carry ^ borrow) = 0, 514 * but is in (the lower AN_limbs limbs of) T if (carry ^ borrow) = 1. 515 */ 516 mbedtls_ct_mpi_uint_cond_assign(AN_limbs, X, T, (unsigned char) (carry ^ borrow)); 517 } 518 519 int mbedtls_mpi_core_get_mont_r2_unsafe(mbedtls_mpi *X, 520 const mbedtls_mpi *N) 521 { 522 int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; 523 524 MBEDTLS_MPI_CHK(mbedtls_mpi_lset(X, 1)); 525 MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(X, N->n * 2 * biL)); 526 MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(X, X, N)); 527 MBEDTLS_MPI_CHK(mbedtls_mpi_shrink(X, N->n)); 528 529 cleanup: 530 return ret; 531 } 532 533 MBEDTLS_STATIC_TESTABLE 534 void mbedtls_mpi_core_ct_uint_table_lookup(mbedtls_mpi_uint *dest, 535 const mbedtls_mpi_uint *table, 536 size_t limbs, 537 size_t count, 538 size_t index) 539 { 540 for (size_t i = 0; i < count; i++, table += limbs) { 541 unsigned char assign = mbedtls_ct_size_bool_eq(i, index); 542 mbedtls_mpi_core_cond_assign(dest, table, limbs, assign); 543 } 544 } 545 546 /* Fill X with n_bytes random bytes. 547 * X must already have room for those bytes. 548 * The ordering of the bytes returned from the RNG is suitable for 549 * deterministic ECDSA (see RFC 6979 §3.3 and the specification of 550 * mbedtls_mpi_core_random()). 551 */ 552 int mbedtls_mpi_core_fill_random( 553 mbedtls_mpi_uint *X, size_t X_limbs, 554 size_t n_bytes, 555 int (*f_rng)(void *, unsigned char *, size_t), void *p_rng) 556 { 557 int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; 558 const size_t limbs = CHARS_TO_LIMBS(n_bytes); 559 const size_t overhead = (limbs * ciL) - n_bytes; 560 561 if (X_limbs < limbs) { 562 return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; 563 } 564 565 memset(X, 0, overhead); 566 memset((unsigned char *) X + limbs * ciL, 0, (X_limbs - limbs) * ciL); 567 MBEDTLS_MPI_CHK(f_rng(p_rng, (unsigned char *) X + overhead, n_bytes)); 568 mbedtls_mpi_core_bigendian_to_host(X, limbs); 569 570 cleanup: 571 return ret; 572 } 573 574 int mbedtls_mpi_core_random(mbedtls_mpi_uint *X, 575 mbedtls_mpi_uint min, 576 const mbedtls_mpi_uint *N, 577 size_t limbs, 578 int (*f_rng)(void *, unsigned char *, size_t), 579 void *p_rng) 580 { 581 unsigned ge_lower = 1, lt_upper = 0; 582 size_t n_bits = mbedtls_mpi_core_bitlen(N, limbs); 583 size_t n_bytes = (n_bits + 7) / 8; 584 int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; 585 586 /* 587 * When min == 0, each try has at worst a probability 1/2 of failing 588 * (the msb has a probability 1/2 of being 0, and then the result will 589 * be < N), so after 30 tries failure probability is a most 2**(-30). 590 * 591 * When N is just below a power of 2, as is the case when generating 592 * a random scalar on most elliptic curves, 1 try is enough with 593 * overwhelming probability. When N is just above a power of 2, 594 * as when generating a random scalar on secp224k1, each try has 595 * a probability of failing that is almost 1/2. 596 * 597 * The probabilities are almost the same if min is nonzero but negligible 598 * compared to N. This is always the case when N is crypto-sized, but 599 * it's convenient to support small N for testing purposes. When N 600 * is small, use a higher repeat count, otherwise the probability of 601 * failure is macroscopic. 602 */ 603 int count = (n_bytes > 4 ? 30 : 250); 604 605 /* 606 * Match the procedure given in RFC 6979 §3.3 (deterministic ECDSA) 607 * when f_rng is a suitably parametrized instance of HMAC_DRBG: 608 * - use the same byte ordering; 609 * - keep the leftmost n_bits bits of the generated octet string; 610 * - try until result is in the desired range. 611 * This also avoids any bias, which is especially important for ECDSA. 612 */ 613 do { 614 MBEDTLS_MPI_CHK(mbedtls_mpi_core_fill_random(X, limbs, 615 n_bytes, 616 f_rng, p_rng)); 617 mbedtls_mpi_core_shift_r(X, limbs, 8 * n_bytes - n_bits); 618 619 if (--count == 0) { 620 ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE; 621 goto cleanup; 622 } 623 624 ge_lower = mbedtls_mpi_core_uint_le_mpi(min, X, limbs); 625 lt_upper = mbedtls_mpi_core_lt_ct(X, N, limbs); 626 } while (ge_lower == 0 || lt_upper == 0); 627 628 cleanup: 629 return ret; 630 } 631 632 /* BEGIN MERGE SLOT 1 */ 633 634 static size_t exp_mod_get_window_size(size_t Ebits) 635 { 636 size_t wsize = (Ebits > 671) ? 6 : (Ebits > 239) ? 5 : 637 (Ebits > 79) ? 4 : 1; 638 639 #if (MBEDTLS_MPI_WINDOW_SIZE < 6) 640 if (wsize > MBEDTLS_MPI_WINDOW_SIZE) { 641 wsize = MBEDTLS_MPI_WINDOW_SIZE; 642 } 643 #endif 644 645 return wsize; 646 } 647 648 size_t mbedtls_mpi_core_exp_mod_working_limbs(size_t AN_limbs, size_t E_limbs) 649 { 650 const size_t wsize = exp_mod_get_window_size(E_limbs * biL); 651 const size_t welem = ((size_t) 1) << wsize; 652 653 /* How big does each part of the working memory pool need to be? */ 654 const size_t table_limbs = welem * AN_limbs; 655 const size_t select_limbs = AN_limbs; 656 const size_t temp_limbs = 2 * AN_limbs + 1; 657 658 return table_limbs + select_limbs + temp_limbs; 659 } 660 661 static void exp_mod_precompute_window(const mbedtls_mpi_uint *A, 662 const mbedtls_mpi_uint *N, 663 size_t AN_limbs, 664 mbedtls_mpi_uint mm, 665 const mbedtls_mpi_uint *RR, 666 size_t welem, 667 mbedtls_mpi_uint *Wtable, 668 mbedtls_mpi_uint *temp) 669 { 670 /* W[0] = 1 (in Montgomery presentation) */ 671 memset(Wtable, 0, AN_limbs * ciL); 672 Wtable[0] = 1; 673 mbedtls_mpi_core_montmul(Wtable, Wtable, RR, AN_limbs, N, AN_limbs, mm, temp); 674 675 /* W[1] = A (already in Montgomery presentation) */ 676 mbedtls_mpi_uint *W1 = Wtable + AN_limbs; 677 memcpy(W1, A, AN_limbs * ciL); 678 679 /* W[i+1] = W[i] * W[1], i >= 2 */ 680 mbedtls_mpi_uint *Wprev = W1; 681 for (size_t i = 2; i < welem; i++) { 682 mbedtls_mpi_uint *Wcur = Wprev + AN_limbs; 683 mbedtls_mpi_core_montmul(Wcur, Wprev, W1, AN_limbs, N, AN_limbs, mm, temp); 684 Wprev = Wcur; 685 } 686 } 687 688 /* Exponentiation: X := A^E mod N. 689 * 690 * A must already be in Montgomery form. 691 * 692 * As in other bignum functions, assume that AN_limbs and E_limbs are nonzero. 693 * 694 * RR must contain 2^{2*biL} mod N. 695 * 696 * The algorithm is a variant of Left-to-right k-ary exponentiation: HAC 14.82 697 * (The difference is that the body in our loop processes a single bit instead 698 * of a full window.) 699 */ 700 void mbedtls_mpi_core_exp_mod(mbedtls_mpi_uint *X, 701 const mbedtls_mpi_uint *A, 702 const mbedtls_mpi_uint *N, 703 size_t AN_limbs, 704 const mbedtls_mpi_uint *E, 705 size_t E_limbs, 706 const mbedtls_mpi_uint *RR, 707 mbedtls_mpi_uint *T) 708 { 709 const size_t wsize = exp_mod_get_window_size(E_limbs * biL); 710 const size_t welem = ((size_t) 1) << wsize; 711 712 /* This is how we will use the temporary storage T, which must have space 713 * for table_limbs, select_limbs and (2 * AN_limbs + 1) for montmul. */ 714 const size_t table_limbs = welem * AN_limbs; 715 const size_t select_limbs = AN_limbs; 716 717 /* Pointers to specific parts of the temporary working memory pool */ 718 mbedtls_mpi_uint *const Wtable = T; 719 mbedtls_mpi_uint *const Wselect = Wtable + table_limbs; 720 mbedtls_mpi_uint *const temp = Wselect + select_limbs; 721 722 /* 723 * Window precomputation 724 */ 725 726 const mbedtls_mpi_uint mm = mbedtls_mpi_core_montmul_init(N); 727 728 /* Set Wtable[i] = A^(2^i) (in Montgomery representation) */ 729 exp_mod_precompute_window(A, N, AN_limbs, 730 mm, RR, 731 welem, Wtable, temp); 732 733 /* 734 * Fixed window exponentiation 735 */ 736 737 /* X = 1 (in Montgomery presentation) initially */ 738 memcpy(X, Wtable, AN_limbs * ciL); 739 740 /* We'll process the bits of E from most significant 741 * (limb_index=E_limbs-1, E_bit_index=biL-1) to least significant 742 * (limb_index=0, E_bit_index=0). */ 743 size_t E_limb_index = E_limbs; 744 size_t E_bit_index = 0; 745 /* At any given time, window contains window_bits bits from E. 746 * window_bits can go up to wsize. */ 747 size_t window_bits = 0; 748 mbedtls_mpi_uint window = 0; 749 750 do { 751 /* Square */ 752 mbedtls_mpi_core_montmul(X, X, X, AN_limbs, N, AN_limbs, mm, temp); 753 754 /* Move to the next bit of the exponent */ 755 if (E_bit_index == 0) { 756 --E_limb_index; 757 E_bit_index = biL - 1; 758 } else { 759 --E_bit_index; 760 } 761 /* Insert next exponent bit into window */ 762 ++window_bits; 763 window <<= 1; 764 window |= (E[E_limb_index] >> E_bit_index) & 1; 765 766 /* Clear window if it's full. Also clear the window at the end, 767 * when we've finished processing the exponent. */ 768 if (window_bits == wsize || 769 (E_bit_index == 0 && E_limb_index == 0)) { 770 /* Select Wtable[window] without leaking window through 771 * memory access patterns. */ 772 mbedtls_mpi_core_ct_uint_table_lookup(Wselect, Wtable, 773 AN_limbs, welem, window); 774 /* Multiply X by the selected element. */ 775 mbedtls_mpi_core_montmul(X, X, Wselect, AN_limbs, N, AN_limbs, mm, 776 temp); 777 window = 0; 778 window_bits = 0; 779 } 780 } while (!(E_bit_index == 0 && E_limb_index == 0)); 781 } 782 783 /* END MERGE SLOT 1 */ 784 785 /* BEGIN MERGE SLOT 2 */ 786 787 /* END MERGE SLOT 2 */ 788 789 /* BEGIN MERGE SLOT 3 */ 790 791 mbedtls_mpi_uint mbedtls_mpi_core_sub_int(mbedtls_mpi_uint *X, 792 const mbedtls_mpi_uint *A, 793 mbedtls_mpi_uint c, /* doubles as carry */ 794 size_t limbs) 795 { 796 for (size_t i = 0; i < limbs; i++) { 797 mbedtls_mpi_uint s = A[i]; 798 mbedtls_mpi_uint t = s - c; 799 c = (t > s); 800 X[i] = t; 801 } 802 803 return c; 804 } 805 806 mbedtls_mpi_uint mbedtls_mpi_core_check_zero_ct(const mbedtls_mpi_uint *A, 807 size_t limbs) 808 { 809 mbedtls_mpi_uint bits = 0; 810 811 for (size_t i = 0; i < limbs; i++) { 812 bits |= A[i]; 813 } 814 815 return bits; 816 } 817 818 void mbedtls_mpi_core_to_mont_rep(mbedtls_mpi_uint *X, 819 const mbedtls_mpi_uint *A, 820 const mbedtls_mpi_uint *N, 821 size_t AN_limbs, 822 mbedtls_mpi_uint mm, 823 const mbedtls_mpi_uint *rr, 824 mbedtls_mpi_uint *T) 825 { 826 mbedtls_mpi_core_montmul(X, A, rr, AN_limbs, N, AN_limbs, mm, T); 827 } 828 829 void mbedtls_mpi_core_from_mont_rep(mbedtls_mpi_uint *X, 830 const mbedtls_mpi_uint *A, 831 const mbedtls_mpi_uint *N, 832 size_t AN_limbs, 833 mbedtls_mpi_uint mm, 834 mbedtls_mpi_uint *T) 835 { 836 const mbedtls_mpi_uint Rinv = 1; /* 1/R in Mont. rep => 1 */ 837 838 mbedtls_mpi_core_montmul(X, A, &Rinv, 1, N, AN_limbs, mm, T); 839 } 840 841 /* END MERGE SLOT 3 */ 842 843 /* BEGIN MERGE SLOT 4 */ 844 845 /* END MERGE SLOT 4 */ 846 847 /* BEGIN MERGE SLOT 5 */ 848 849 /* END MERGE SLOT 5 */ 850 851 /* BEGIN MERGE SLOT 6 */ 852 853 /* END MERGE SLOT 6 */ 854 855 /* BEGIN MERGE SLOT 7 */ 856 857 /* END MERGE SLOT 7 */ 858 859 /* BEGIN MERGE SLOT 8 */ 860 861 /* END MERGE SLOT 8 */ 862 863 /* BEGIN MERGE SLOT 9 */ 864 865 /* END MERGE SLOT 9 */ 866 867 /* BEGIN MERGE SLOT 10 */ 868 869 /* END MERGE SLOT 10 */ 870 871 #endif /* MBEDTLS_BIGNUM_C */ 872