1 /** 2 * \file poly1305.c 3 * 4 * \brief Poly1305 authentication algorithm. 5 * 6 * Copyright The Mbed TLS Contributors 7 * SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 8 */ 9 #include "common.h" 10 11 #if defined(MBEDTLS_POLY1305_C) 12 13 #include "mbedtls/poly1305.h" 14 #include "mbedtls/platform_util.h" 15 #include "mbedtls/error.h" 16 17 #include <string.h> 18 19 #include "mbedtls/platform.h" 20 21 #if !defined(MBEDTLS_POLY1305_ALT) 22 23 #define POLY1305_BLOCK_SIZE_BYTES (16U) 24 25 /* 26 * Our implementation is tuned for 32-bit platforms with a 64-bit multiplier. 27 * However we provided an alternative for platforms without such a multiplier. 28 */ 29 #if defined(MBEDTLS_NO_64BIT_MULTIPLICATION) 30 static uint64_t mul64(uint32_t a, uint32_t b) 31 { 32 /* a = al + 2**16 ah, b = bl + 2**16 bh */ 33 const uint16_t al = (uint16_t) a; 34 const uint16_t bl = (uint16_t) b; 35 const uint16_t ah = a >> 16; 36 const uint16_t bh = b >> 16; 37 38 /* ab = al*bl + 2**16 (ah*bl + bl*bh) + 2**32 ah*bh */ 39 const uint32_t lo = (uint32_t) al * bl; 40 const uint64_t me = (uint64_t) ((uint32_t) ah * bl) + (uint32_t) al * bh; 41 const uint32_t hi = (uint32_t) ah * bh; 42 43 return lo + (me << 16) + ((uint64_t) hi << 32); 44 } 45 #else 46 static inline uint64_t mul64(uint32_t a, uint32_t b) 47 { 48 return (uint64_t) a * b; 49 } 50 #endif 51 52 53 /** 54 * \brief Process blocks with Poly1305. 55 * 56 * \param ctx The Poly1305 context. 57 * \param nblocks Number of blocks to process. Note that this 58 * function only processes full blocks. 59 * \param input Buffer containing the input block(s). 60 * \param needs_padding Set to 0 if the padding bit has already been 61 * applied to the input data before calling this 62 * function. Otherwise, set this parameter to 1. 63 */ 64 static void poly1305_process(mbedtls_poly1305_context *ctx, 65 size_t nblocks, 66 const unsigned char *input, 67 uint32_t needs_padding) 68 { 69 uint64_t d0, d1, d2, d3; 70 uint32_t acc0, acc1, acc2, acc3, acc4; 71 uint32_t r0, r1, r2, r3; 72 uint32_t rs1, rs2, rs3; 73 size_t offset = 0U; 74 size_t i; 75 76 r0 = ctx->r[0]; 77 r1 = ctx->r[1]; 78 r2 = ctx->r[2]; 79 r3 = ctx->r[3]; 80 81 rs1 = r1 + (r1 >> 2U); 82 rs2 = r2 + (r2 >> 2U); 83 rs3 = r3 + (r3 >> 2U); 84 85 acc0 = ctx->acc[0]; 86 acc1 = ctx->acc[1]; 87 acc2 = ctx->acc[2]; 88 acc3 = ctx->acc[3]; 89 acc4 = ctx->acc[4]; 90 91 /* Process full blocks */ 92 for (i = 0U; i < nblocks; i++) { 93 /* The input block is treated as a 128-bit little-endian integer */ 94 d0 = MBEDTLS_GET_UINT32_LE(input, offset + 0); 95 d1 = MBEDTLS_GET_UINT32_LE(input, offset + 4); 96 d2 = MBEDTLS_GET_UINT32_LE(input, offset + 8); 97 d3 = MBEDTLS_GET_UINT32_LE(input, offset + 12); 98 99 /* Compute: acc += (padded) block as a 130-bit integer */ 100 d0 += (uint64_t) acc0; 101 d1 += (uint64_t) acc1 + (d0 >> 32U); 102 d2 += (uint64_t) acc2 + (d1 >> 32U); 103 d3 += (uint64_t) acc3 + (d2 >> 32U); 104 acc0 = (uint32_t) d0; 105 acc1 = (uint32_t) d1; 106 acc2 = (uint32_t) d2; 107 acc3 = (uint32_t) d3; 108 acc4 += (uint32_t) (d3 >> 32U) + needs_padding; 109 110 /* Compute: acc *= r */ 111 d0 = mul64(acc0, r0) + 112 mul64(acc1, rs3) + 113 mul64(acc2, rs2) + 114 mul64(acc3, rs1); 115 d1 = mul64(acc0, r1) + 116 mul64(acc1, r0) + 117 mul64(acc2, rs3) + 118 mul64(acc3, rs2) + 119 mul64(acc4, rs1); 120 d2 = mul64(acc0, r2) + 121 mul64(acc1, r1) + 122 mul64(acc2, r0) + 123 mul64(acc3, rs3) + 124 mul64(acc4, rs2); 125 d3 = mul64(acc0, r3) + 126 mul64(acc1, r2) + 127 mul64(acc2, r1) + 128 mul64(acc3, r0) + 129 mul64(acc4, rs3); 130 acc4 *= r0; 131 132 /* Compute: acc %= (2^130 - 5) (partial remainder) */ 133 d1 += (d0 >> 32); 134 d2 += (d1 >> 32); 135 d3 += (d2 >> 32); 136 acc0 = (uint32_t) d0; 137 acc1 = (uint32_t) d1; 138 acc2 = (uint32_t) d2; 139 acc3 = (uint32_t) d3; 140 acc4 = (uint32_t) (d3 >> 32) + acc4; 141 142 d0 = (uint64_t) acc0 + (acc4 >> 2) + (acc4 & 0xFFFFFFFCU); 143 acc4 &= 3U; 144 acc0 = (uint32_t) d0; 145 d0 = (uint64_t) acc1 + (d0 >> 32U); 146 acc1 = (uint32_t) d0; 147 d0 = (uint64_t) acc2 + (d0 >> 32U); 148 acc2 = (uint32_t) d0; 149 d0 = (uint64_t) acc3 + (d0 >> 32U); 150 acc3 = (uint32_t) d0; 151 d0 = (uint64_t) acc4 + (d0 >> 32U); 152 acc4 = (uint32_t) d0; 153 154 offset += POLY1305_BLOCK_SIZE_BYTES; 155 } 156 157 ctx->acc[0] = acc0; 158 ctx->acc[1] = acc1; 159 ctx->acc[2] = acc2; 160 ctx->acc[3] = acc3; 161 ctx->acc[4] = acc4; 162 } 163 164 /** 165 * \brief Compute the Poly1305 MAC 166 * 167 * \param ctx The Poly1305 context. 168 * \param mac The buffer to where the MAC is written. Must be 169 * big enough to contain the 16-byte MAC. 170 */ 171 static void poly1305_compute_mac(const mbedtls_poly1305_context *ctx, 172 unsigned char mac[16]) 173 { 174 uint64_t d; 175 uint32_t g0, g1, g2, g3, g4; 176 uint32_t acc0, acc1, acc2, acc3, acc4; 177 uint32_t mask; 178 uint32_t mask_inv; 179 180 acc0 = ctx->acc[0]; 181 acc1 = ctx->acc[1]; 182 acc2 = ctx->acc[2]; 183 acc3 = ctx->acc[3]; 184 acc4 = ctx->acc[4]; 185 186 /* Before adding 's' we ensure that the accumulator is mod 2^130 - 5. 187 * We do this by calculating acc - (2^130 - 5), then checking if 188 * the 131st bit is set. If it is, then reduce: acc -= (2^130 - 5) 189 */ 190 191 /* Calculate acc + -(2^130 - 5) */ 192 d = ((uint64_t) acc0 + 5U); 193 g0 = (uint32_t) d; 194 d = ((uint64_t) acc1 + (d >> 32)); 195 g1 = (uint32_t) d; 196 d = ((uint64_t) acc2 + (d >> 32)); 197 g2 = (uint32_t) d; 198 d = ((uint64_t) acc3 + (d >> 32)); 199 g3 = (uint32_t) d; 200 g4 = acc4 + (uint32_t) (d >> 32U); 201 202 /* mask == 0xFFFFFFFF if 131st bit is set, otherwise mask == 0 */ 203 mask = (uint32_t) 0U - (g4 >> 2U); 204 mask_inv = ~mask; 205 206 /* If 131st bit is set then acc=g, otherwise, acc is unmodified */ 207 acc0 = (acc0 & mask_inv) | (g0 & mask); 208 acc1 = (acc1 & mask_inv) | (g1 & mask); 209 acc2 = (acc2 & mask_inv) | (g2 & mask); 210 acc3 = (acc3 & mask_inv) | (g3 & mask); 211 212 /* Add 's' */ 213 d = (uint64_t) acc0 + ctx->s[0]; 214 acc0 = (uint32_t) d; 215 d = (uint64_t) acc1 + ctx->s[1] + (d >> 32U); 216 acc1 = (uint32_t) d; 217 d = (uint64_t) acc2 + ctx->s[2] + (d >> 32U); 218 acc2 = (uint32_t) d; 219 acc3 += ctx->s[3] + (uint32_t) (d >> 32U); 220 221 /* Compute MAC (128 least significant bits of the accumulator) */ 222 MBEDTLS_PUT_UINT32_LE(acc0, mac, 0); 223 MBEDTLS_PUT_UINT32_LE(acc1, mac, 4); 224 MBEDTLS_PUT_UINT32_LE(acc2, mac, 8); 225 MBEDTLS_PUT_UINT32_LE(acc3, mac, 12); 226 } 227 228 void mbedtls_poly1305_init(mbedtls_poly1305_context *ctx) 229 { 230 mbedtls_platform_zeroize(ctx, sizeof(mbedtls_poly1305_context)); 231 } 232 233 void mbedtls_poly1305_free(mbedtls_poly1305_context *ctx) 234 { 235 if (ctx == NULL) { 236 return; 237 } 238 239 mbedtls_platform_zeroize(ctx, sizeof(mbedtls_poly1305_context)); 240 } 241 242 int mbedtls_poly1305_starts(mbedtls_poly1305_context *ctx, 243 const unsigned char key[32]) 244 { 245 /* r &= 0x0ffffffc0ffffffc0ffffffc0fffffff */ 246 ctx->r[0] = MBEDTLS_GET_UINT32_LE(key, 0) & 0x0FFFFFFFU; 247 ctx->r[1] = MBEDTLS_GET_UINT32_LE(key, 4) & 0x0FFFFFFCU; 248 ctx->r[2] = MBEDTLS_GET_UINT32_LE(key, 8) & 0x0FFFFFFCU; 249 ctx->r[3] = MBEDTLS_GET_UINT32_LE(key, 12) & 0x0FFFFFFCU; 250 251 ctx->s[0] = MBEDTLS_GET_UINT32_LE(key, 16); 252 ctx->s[1] = MBEDTLS_GET_UINT32_LE(key, 20); 253 ctx->s[2] = MBEDTLS_GET_UINT32_LE(key, 24); 254 ctx->s[3] = MBEDTLS_GET_UINT32_LE(key, 28); 255 256 /* Initial accumulator state */ 257 ctx->acc[0] = 0U; 258 ctx->acc[1] = 0U; 259 ctx->acc[2] = 0U; 260 ctx->acc[3] = 0U; 261 ctx->acc[4] = 0U; 262 263 /* Queue initially empty */ 264 mbedtls_platform_zeroize(ctx->queue, sizeof(ctx->queue)); 265 ctx->queue_len = 0U; 266 267 return 0; 268 } 269 270 int mbedtls_poly1305_update(mbedtls_poly1305_context *ctx, 271 const unsigned char *input, 272 size_t ilen) 273 { 274 size_t offset = 0U; 275 size_t remaining = ilen; 276 size_t queue_free_len; 277 size_t nblocks; 278 279 if ((remaining > 0U) && (ctx->queue_len > 0U)) { 280 queue_free_len = (POLY1305_BLOCK_SIZE_BYTES - ctx->queue_len); 281 282 if (ilen < queue_free_len) { 283 /* Not enough data to complete the block. 284 * Store this data with the other leftovers. 285 */ 286 memcpy(&ctx->queue[ctx->queue_len], 287 input, 288 ilen); 289 290 ctx->queue_len += ilen; 291 292 remaining = 0U; 293 } else { 294 /* Enough data to produce a complete block */ 295 memcpy(&ctx->queue[ctx->queue_len], 296 input, 297 queue_free_len); 298 299 ctx->queue_len = 0U; 300 301 poly1305_process(ctx, 1U, ctx->queue, 1U); /* add padding bit */ 302 303 offset += queue_free_len; 304 remaining -= queue_free_len; 305 } 306 } 307 308 if (remaining >= POLY1305_BLOCK_SIZE_BYTES) { 309 nblocks = remaining / POLY1305_BLOCK_SIZE_BYTES; 310 311 poly1305_process(ctx, nblocks, &input[offset], 1U); 312 313 offset += nblocks * POLY1305_BLOCK_SIZE_BYTES; 314 remaining %= POLY1305_BLOCK_SIZE_BYTES; 315 } 316 317 if (remaining > 0U) { 318 /* Store partial block */ 319 ctx->queue_len = remaining; 320 memcpy(ctx->queue, &input[offset], remaining); 321 } 322 323 return 0; 324 } 325 326 int mbedtls_poly1305_finish(mbedtls_poly1305_context *ctx, 327 unsigned char mac[16]) 328 { 329 /* Process any leftover data */ 330 if (ctx->queue_len > 0U) { 331 /* Add padding bit */ 332 ctx->queue[ctx->queue_len] = 1U; 333 ctx->queue_len++; 334 335 /* Pad with zeroes */ 336 memset(&ctx->queue[ctx->queue_len], 337 0, 338 POLY1305_BLOCK_SIZE_BYTES - ctx->queue_len); 339 340 poly1305_process(ctx, 1U, /* Process 1 block */ 341 ctx->queue, 0U); /* Already padded above */ 342 } 343 344 poly1305_compute_mac(ctx, mac); 345 346 return 0; 347 } 348 349 int mbedtls_poly1305_mac(const unsigned char key[32], 350 const unsigned char *input, 351 size_t ilen, 352 unsigned char mac[16]) 353 { 354 mbedtls_poly1305_context ctx; 355 int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; 356 357 mbedtls_poly1305_init(&ctx); 358 359 ret = mbedtls_poly1305_starts(&ctx, key); 360 if (ret != 0) { 361 goto cleanup; 362 } 363 364 ret = mbedtls_poly1305_update(&ctx, input, ilen); 365 if (ret != 0) { 366 goto cleanup; 367 } 368 369 ret = mbedtls_poly1305_finish(&ctx, mac); 370 371 cleanup: 372 mbedtls_poly1305_free(&ctx); 373 return ret; 374 } 375 376 #endif /* MBEDTLS_POLY1305_ALT */ 377 378 #if defined(MBEDTLS_SELF_TEST) 379 380 static const unsigned char test_keys[2][32] = 381 { 382 { 383 0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33, 384 0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8, 385 0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd, 386 0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b 387 }, 388 { 389 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, 390 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0, 391 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09, 392 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0 393 } 394 }; 395 396 static const unsigned char test_data[2][127] = 397 { 398 { 399 0x43, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x67, 0x72, 400 0x61, 0x70, 0x68, 0x69, 0x63, 0x20, 0x46, 0x6f, 401 0x72, 0x75, 0x6d, 0x20, 0x52, 0x65, 0x73, 0x65, 402 0x61, 0x72, 0x63, 0x68, 0x20, 0x47, 0x72, 0x6f, 403 0x75, 0x70 404 }, 405 { 406 0x27, 0x54, 0x77, 0x61, 0x73, 0x20, 0x62, 0x72, 407 0x69, 0x6c, 0x6c, 0x69, 0x67, 0x2c, 0x20, 0x61, 408 0x6e, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x73, 409 0x6c, 0x69, 0x74, 0x68, 0x79, 0x20, 0x74, 0x6f, 410 0x76, 0x65, 0x73, 0x0a, 0x44, 0x69, 0x64, 0x20, 411 0x67, 0x79, 0x72, 0x65, 0x20, 0x61, 0x6e, 0x64, 412 0x20, 0x67, 0x69, 0x6d, 0x62, 0x6c, 0x65, 0x20, 413 0x69, 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77, 414 0x61, 0x62, 0x65, 0x3a, 0x0a, 0x41, 0x6c, 0x6c, 415 0x20, 0x6d, 0x69, 0x6d, 0x73, 0x79, 0x20, 0x77, 416 0x65, 0x72, 0x65, 0x20, 0x74, 0x68, 0x65, 0x20, 417 0x62, 0x6f, 0x72, 0x6f, 0x67, 0x6f, 0x76, 0x65, 418 0x73, 0x2c, 0x0a, 0x41, 0x6e, 0x64, 0x20, 0x74, 419 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x6d, 0x65, 0x20, 420 0x72, 0x61, 0x74, 0x68, 0x73, 0x20, 0x6f, 0x75, 421 0x74, 0x67, 0x72, 0x61, 0x62, 0x65, 0x2e 422 } 423 }; 424 425 static const size_t test_data_len[2] = 426 { 427 34U, 428 127U 429 }; 430 431 static const unsigned char test_mac[2][16] = 432 { 433 { 434 0xa8, 0x06, 0x1d, 0xc1, 0x30, 0x51, 0x36, 0xc6, 435 0xc2, 0x2b, 0x8b, 0xaf, 0x0c, 0x01, 0x27, 0xa9 436 }, 437 { 438 0x45, 0x41, 0x66, 0x9a, 0x7e, 0xaa, 0xee, 0x61, 439 0xe7, 0x08, 0xdc, 0x7c, 0xbc, 0xc5, 0xeb, 0x62 440 } 441 }; 442 443 /* Make sure no other definition is already present. */ 444 #undef ASSERT 445 446 #define ASSERT(cond, args) \ 447 do \ 448 { \ 449 if (!(cond)) \ 450 { \ 451 if (verbose != 0) \ 452 mbedtls_printf args; \ 453 \ 454 return -1; \ 455 } \ 456 } \ 457 while (0) 458 459 int mbedtls_poly1305_self_test(int verbose) 460 { 461 unsigned char mac[16]; 462 unsigned i; 463 int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; 464 465 for (i = 0U; i < 2U; i++) { 466 if (verbose != 0) { 467 mbedtls_printf(" Poly1305 test %u ", i); 468 } 469 470 ret = mbedtls_poly1305_mac(test_keys[i], 471 test_data[i], 472 test_data_len[i], 473 mac); 474 ASSERT(0 == ret, ("error code: %i\n", ret)); 475 476 ASSERT(0 == memcmp(mac, test_mac[i], 16U), ("failed (mac)\n")); 477 478 if (verbose != 0) { 479 mbedtls_printf("passed\n"); 480 } 481 } 482 483 if (verbose != 0) { 484 mbedtls_printf("\n"); 485 } 486 487 return 0; 488 } 489 490 #endif /* MBEDTLS_SELF_TEST */ 491 492 #endif /* MBEDTLS_POLY1305_C */ 493