xref: /optee_os/lib/libmbedtls/mbedtls/library/poly1305.c (revision 11fa71b9ddb429088f325cfda430183003ccd1db)
1 /*  SPDX-License-Identifier: Apache-2.0 */
2 /**
3  * \file poly1305.c
4  *
5  * \brief Poly1305 authentication algorithm.
6  *
7  *  Copyright (C) 2006-2016, ARM Limited, All Rights Reserved
8  *
9  *  Licensed under the Apache License, Version 2.0 (the "License"); you may
10  *  not use this file except in compliance with the License.
11  *  You may obtain a copy of the License at
12  *
13  *  http://www.apache.org/licenses/LICENSE-2.0
14  *
15  *  Unless required by applicable law or agreed to in writing, software
16  *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
17  *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  *  See the License for the specific language governing permissions and
19  *  limitations under the License.
20  *
21  *  This file is part of mbed TLS (https://tls.mbed.org)
22  */
23 #if !defined(MBEDTLS_CONFIG_FILE)
24 #include "mbedtls/config.h"
25 #else
26 #include MBEDTLS_CONFIG_FILE
27 #endif
28 
29 #if defined(MBEDTLS_POLY1305_C)
30 
31 #include "mbedtls/poly1305.h"
32 #include "mbedtls/platform_util.h"
33 #include "mbedtls/error.h"
34 
35 #include <string.h>
36 
37 #if defined(MBEDTLS_SELF_TEST)
38 #if defined(MBEDTLS_PLATFORM_C)
39 #include "mbedtls/platform.h"
40 #else
41 #include <stdio.h>
42 #define mbedtls_printf printf
43 #endif /* MBEDTLS_PLATFORM_C */
44 #endif /* MBEDTLS_SELF_TEST */
45 
46 #if !defined(MBEDTLS_POLY1305_ALT)
47 
48 #if ( defined(__ARMCC_VERSION) || defined(_MSC_VER) ) && \
49     !defined(inline) && !defined(__cplusplus)
50 #define inline __inline
51 #endif
52 
53 /* Parameter validation macros */
54 #define POLY1305_VALIDATE_RET( cond )                                       \
55     MBEDTLS_INTERNAL_VALIDATE_RET( cond, MBEDTLS_ERR_POLY1305_BAD_INPUT_DATA )
56 #define POLY1305_VALIDATE( cond )                                           \
57     MBEDTLS_INTERNAL_VALIDATE( cond )
58 
59 #define POLY1305_BLOCK_SIZE_BYTES ( 16U )
60 
61 #define BYTES_TO_U32_LE( data, offset )                           \
62     ( (uint32_t) (data)[offset]                                     \
63           | (uint32_t) ( (uint32_t) (data)[( offset ) + 1] << 8 )   \
64           | (uint32_t) ( (uint32_t) (data)[( offset ) + 2] << 16 )  \
65           | (uint32_t) ( (uint32_t) (data)[( offset ) + 3] << 24 )  \
66     )
67 
68 /*
69  * Our implementation is tuned for 32-bit platforms with a 64-bit multiplier.
70  * However we provided an alternative for platforms without such a multiplier.
71  */
72 #if defined(MBEDTLS_NO_64BIT_MULTIPLICATION)
73 static uint64_t mul64( uint32_t a, uint32_t b )
74 {
75     /* a = al + 2**16 ah, b = bl + 2**16 bh */
76     const uint16_t al = (uint16_t) a;
77     const uint16_t bl = (uint16_t) b;
78     const uint16_t ah = a >> 16;
79     const uint16_t bh = b >> 16;
80 
81     /* ab = al*bl + 2**16 (ah*bl + bl*bh) + 2**32 ah*bh */
82     const uint32_t lo = (uint32_t) al * bl;
83     const uint64_t me = (uint64_t)( (uint32_t) ah * bl ) + (uint32_t) al * bh;
84     const uint32_t hi = (uint32_t) ah * bh;
85 
86     return( lo + ( me << 16 ) + ( (uint64_t) hi << 32 ) );
87 }
88 #else
89 static inline uint64_t mul64( uint32_t a, uint32_t b )
90 {
91     return( (uint64_t) a * b );
92 }
93 #endif
94 
95 
96 /**
97  * \brief                   Process blocks with Poly1305.
98  *
99  * \param ctx               The Poly1305 context.
100  * \param nblocks           Number of blocks to process. Note that this
101  *                          function only processes full blocks.
102  * \param input             Buffer containing the input block(s).
103  * \param needs_padding     Set to 0 if the padding bit has already been
104  *                          applied to the input data before calling this
105  *                          function.  Otherwise, set this parameter to 1.
106  */
107 static void poly1305_process( mbedtls_poly1305_context *ctx,
108                               size_t nblocks,
109                               const unsigned char *input,
110                               uint32_t needs_padding )
111 {
112     uint64_t d0, d1, d2, d3;
113     uint32_t acc0, acc1, acc2, acc3, acc4;
114     uint32_t r0, r1, r2, r3;
115     uint32_t rs1, rs2, rs3;
116     size_t offset  = 0U;
117     size_t i;
118 
119     r0 = ctx->r[0];
120     r1 = ctx->r[1];
121     r2 = ctx->r[2];
122     r3 = ctx->r[3];
123 
124     rs1 = r1 + ( r1 >> 2U );
125     rs2 = r2 + ( r2 >> 2U );
126     rs3 = r3 + ( r3 >> 2U );
127 
128     acc0 = ctx->acc[0];
129     acc1 = ctx->acc[1];
130     acc2 = ctx->acc[2];
131     acc3 = ctx->acc[3];
132     acc4 = ctx->acc[4];
133 
134     /* Process full blocks */
135     for( i = 0U; i < nblocks; i++ )
136     {
137         /* The input block is treated as a 128-bit little-endian integer */
138         d0   = BYTES_TO_U32_LE( input, offset + 0  );
139         d1   = BYTES_TO_U32_LE( input, offset + 4  );
140         d2   = BYTES_TO_U32_LE( input, offset + 8  );
141         d3   = BYTES_TO_U32_LE( input, offset + 12 );
142 
143         /* Compute: acc += (padded) block as a 130-bit integer */
144         d0  += (uint64_t) acc0;
145         d1  += (uint64_t) acc1 + ( d0 >> 32U );
146         d2  += (uint64_t) acc2 + ( d1 >> 32U );
147         d3  += (uint64_t) acc3 + ( d2 >> 32U );
148         acc0 = (uint32_t) d0;
149         acc1 = (uint32_t) d1;
150         acc2 = (uint32_t) d2;
151         acc3 = (uint32_t) d3;
152         acc4 += (uint32_t) ( d3 >> 32U ) + needs_padding;
153 
154         /* Compute: acc *= r */
155         d0 = mul64( acc0, r0  ) +
156              mul64( acc1, rs3 ) +
157              mul64( acc2, rs2 ) +
158              mul64( acc3, rs1 );
159         d1 = mul64( acc0, r1  ) +
160              mul64( acc1, r0  ) +
161              mul64( acc2, rs3 ) +
162              mul64( acc3, rs2 ) +
163              mul64( acc4, rs1 );
164         d2 = mul64( acc0, r2  ) +
165              mul64( acc1, r1  ) +
166              mul64( acc2, r0  ) +
167              mul64( acc3, rs3 ) +
168              mul64( acc4, rs2 );
169         d3 = mul64( acc0, r3  ) +
170              mul64( acc1, r2  ) +
171              mul64( acc2, r1  ) +
172              mul64( acc3, r0  ) +
173              mul64( acc4, rs3 );
174         acc4 *= r0;
175 
176         /* Compute: acc %= (2^130 - 5) (partial remainder) */
177         d1 += ( d0 >> 32 );
178         d2 += ( d1 >> 32 );
179         d3 += ( d2 >> 32 );
180         acc0 = (uint32_t) d0;
181         acc1 = (uint32_t) d1;
182         acc2 = (uint32_t) d2;
183         acc3 = (uint32_t) d3;
184         acc4 = (uint32_t) ( d3 >> 32 ) + acc4;
185 
186         d0 = (uint64_t) acc0 + ( acc4 >> 2 ) + ( acc4 & 0xFFFFFFFCU );
187         acc4 &= 3U;
188         acc0 = (uint32_t) d0;
189         d0 = (uint64_t) acc1 + ( d0 >> 32U );
190         acc1 = (uint32_t) d0;
191         d0 = (uint64_t) acc2 + ( d0 >> 32U );
192         acc2 = (uint32_t) d0;
193         d0 = (uint64_t) acc3 + ( d0 >> 32U );
194         acc3 = (uint32_t) d0;
195         d0 = (uint64_t) acc4 + ( d0 >> 32U );
196         acc4 = (uint32_t) d0;
197 
198         offset    += POLY1305_BLOCK_SIZE_BYTES;
199     }
200 
201     ctx->acc[0] = acc0;
202     ctx->acc[1] = acc1;
203     ctx->acc[2] = acc2;
204     ctx->acc[3] = acc3;
205     ctx->acc[4] = acc4;
206 }
207 
208 /**
209  * \brief                   Compute the Poly1305 MAC
210  *
211  * \param ctx               The Poly1305 context.
212  * \param mac               The buffer to where the MAC is written. Must be
213  *                          big enough to contain the 16-byte MAC.
214  */
215 static void poly1305_compute_mac( const mbedtls_poly1305_context *ctx,
216                                   unsigned char mac[16] )
217 {
218     uint64_t d;
219     uint32_t g0, g1, g2, g3, g4;
220     uint32_t acc0, acc1, acc2, acc3, acc4;
221     uint32_t mask;
222     uint32_t mask_inv;
223 
224     acc0 = ctx->acc[0];
225     acc1 = ctx->acc[1];
226     acc2 = ctx->acc[2];
227     acc3 = ctx->acc[3];
228     acc4 = ctx->acc[4];
229 
230     /* Before adding 's' we ensure that the accumulator is mod 2^130 - 5.
231      * We do this by calculating acc - (2^130 - 5), then checking if
232      * the 131st bit is set. If it is, then reduce: acc -= (2^130 - 5)
233      */
234 
235     /* Calculate acc + -(2^130 - 5) */
236     d  = ( (uint64_t) acc0 + 5U );
237     g0 = (uint32_t) d;
238     d  = ( (uint64_t) acc1 + ( d >> 32 ) );
239     g1 = (uint32_t) d;
240     d  = ( (uint64_t) acc2 + ( d >> 32 ) );
241     g2 = (uint32_t) d;
242     d  = ( (uint64_t) acc3 + ( d >> 32 ) );
243     g3 = (uint32_t) d;
244     g4 = acc4 + (uint32_t) ( d >> 32U );
245 
246     /* mask == 0xFFFFFFFF if 131st bit is set, otherwise mask == 0 */
247     mask = (uint32_t) 0U - ( g4 >> 2U );
248     mask_inv = ~mask;
249 
250     /* If 131st bit is set then acc=g, otherwise, acc is unmodified */
251     acc0 = ( acc0 & mask_inv ) | ( g0 & mask );
252     acc1 = ( acc1 & mask_inv ) | ( g1 & mask );
253     acc2 = ( acc2 & mask_inv ) | ( g2 & mask );
254     acc3 = ( acc3 & mask_inv ) | ( g3 & mask );
255 
256     /* Add 's' */
257     d = (uint64_t) acc0 + ctx->s[0];
258     acc0 = (uint32_t) d;
259     d = (uint64_t) acc1 + ctx->s[1] + ( d >> 32U );
260     acc1 = (uint32_t) d;
261     d = (uint64_t) acc2 + ctx->s[2] + ( d >> 32U );
262     acc2 = (uint32_t) d;
263     acc3 += ctx->s[3] + (uint32_t) ( d >> 32U );
264 
265     /* Compute MAC (128 least significant bits of the accumulator) */
266     mac[ 0] = (unsigned char)( acc0       );
267     mac[ 1] = (unsigned char)( acc0 >>  8 );
268     mac[ 2] = (unsigned char)( acc0 >> 16 );
269     mac[ 3] = (unsigned char)( acc0 >> 24 );
270     mac[ 4] = (unsigned char)( acc1       );
271     mac[ 5] = (unsigned char)( acc1 >>  8 );
272     mac[ 6] = (unsigned char)( acc1 >> 16 );
273     mac[ 7] = (unsigned char)( acc1 >> 24 );
274     mac[ 8] = (unsigned char)( acc2       );
275     mac[ 9] = (unsigned char)( acc2 >>  8 );
276     mac[10] = (unsigned char)( acc2 >> 16 );
277     mac[11] = (unsigned char)( acc2 >> 24 );
278     mac[12] = (unsigned char)( acc3       );
279     mac[13] = (unsigned char)( acc3 >>  8 );
280     mac[14] = (unsigned char)( acc3 >> 16 );
281     mac[15] = (unsigned char)( acc3 >> 24 );
282 }
283 
284 void mbedtls_poly1305_init( mbedtls_poly1305_context *ctx )
285 {
286     POLY1305_VALIDATE( ctx != NULL );
287 
288     mbedtls_platform_zeroize( ctx, sizeof( mbedtls_poly1305_context ) );
289 }
290 
291 void mbedtls_poly1305_free( mbedtls_poly1305_context *ctx )
292 {
293     if( ctx == NULL )
294         return;
295 
296     mbedtls_platform_zeroize( ctx, sizeof( mbedtls_poly1305_context ) );
297 }
298 
299 int mbedtls_poly1305_starts( mbedtls_poly1305_context *ctx,
300                              const unsigned char key[32] )
301 {
302     POLY1305_VALIDATE_RET( ctx != NULL );
303     POLY1305_VALIDATE_RET( key != NULL );
304 
305     /* r &= 0x0ffffffc0ffffffc0ffffffc0fffffff */
306     ctx->r[0] = BYTES_TO_U32_LE( key, 0 )  & 0x0FFFFFFFU;
307     ctx->r[1] = BYTES_TO_U32_LE( key, 4 )  & 0x0FFFFFFCU;
308     ctx->r[2] = BYTES_TO_U32_LE( key, 8 )  & 0x0FFFFFFCU;
309     ctx->r[3] = BYTES_TO_U32_LE( key, 12 ) & 0x0FFFFFFCU;
310 
311     ctx->s[0] = BYTES_TO_U32_LE( key, 16 );
312     ctx->s[1] = BYTES_TO_U32_LE( key, 20 );
313     ctx->s[2] = BYTES_TO_U32_LE( key, 24 );
314     ctx->s[3] = BYTES_TO_U32_LE( key, 28 );
315 
316     /* Initial accumulator state */
317     ctx->acc[0] = 0U;
318     ctx->acc[1] = 0U;
319     ctx->acc[2] = 0U;
320     ctx->acc[3] = 0U;
321     ctx->acc[4] = 0U;
322 
323     /* Queue initially empty */
324     mbedtls_platform_zeroize( ctx->queue, sizeof( ctx->queue ) );
325     ctx->queue_len = 0U;
326 
327     return( 0 );
328 }
329 
330 int mbedtls_poly1305_update( mbedtls_poly1305_context *ctx,
331                              const unsigned char *input,
332                              size_t ilen )
333 {
334     size_t offset    = 0U;
335     size_t remaining = ilen;
336     size_t queue_free_len;
337     size_t nblocks;
338     POLY1305_VALIDATE_RET( ctx != NULL );
339     POLY1305_VALIDATE_RET( ilen == 0 || input != NULL );
340 
341     if( ( remaining > 0U ) && ( ctx->queue_len > 0U ) )
342     {
343         queue_free_len = ( POLY1305_BLOCK_SIZE_BYTES - ctx->queue_len );
344 
345         if( ilen < queue_free_len )
346         {
347             /* Not enough data to complete the block.
348              * Store this data with the other leftovers.
349              */
350             memcpy( &ctx->queue[ctx->queue_len],
351                     input,
352                     ilen );
353 
354             ctx->queue_len += ilen;
355 
356             remaining = 0U;
357         }
358         else
359         {
360             /* Enough data to produce a complete block */
361             memcpy( &ctx->queue[ctx->queue_len],
362                     input,
363                     queue_free_len );
364 
365             ctx->queue_len = 0U;
366 
367             poly1305_process( ctx, 1U, ctx->queue, 1U ); /* add padding bit */
368 
369             offset    += queue_free_len;
370             remaining -= queue_free_len;
371         }
372     }
373 
374     if( remaining >= POLY1305_BLOCK_SIZE_BYTES )
375     {
376         nblocks = remaining / POLY1305_BLOCK_SIZE_BYTES;
377 
378         poly1305_process( ctx, nblocks, &input[offset], 1U );
379 
380         offset += nblocks * POLY1305_BLOCK_SIZE_BYTES;
381         remaining %= POLY1305_BLOCK_SIZE_BYTES;
382     }
383 
384     if( remaining > 0U )
385     {
386         /* Store partial block */
387         ctx->queue_len = remaining;
388         memcpy( ctx->queue, &input[offset], remaining );
389     }
390 
391     return( 0 );
392 }
393 
394 int mbedtls_poly1305_finish( mbedtls_poly1305_context *ctx,
395                              unsigned char mac[16] )
396 {
397     POLY1305_VALIDATE_RET( ctx != NULL );
398     POLY1305_VALIDATE_RET( mac != NULL );
399 
400     /* Process any leftover data */
401     if( ctx->queue_len > 0U )
402     {
403         /* Add padding bit */
404         ctx->queue[ctx->queue_len] = 1U;
405         ctx->queue_len++;
406 
407         /* Pad with zeroes */
408         memset( &ctx->queue[ctx->queue_len],
409                 0,
410                 POLY1305_BLOCK_SIZE_BYTES - ctx->queue_len );
411 
412         poly1305_process( ctx, 1U,          /* Process 1 block */
413                           ctx->queue, 0U ); /* Already padded above */
414     }
415 
416     poly1305_compute_mac( ctx, mac );
417 
418     return( 0 );
419 }
420 
421 int mbedtls_poly1305_mac( const unsigned char key[32],
422                           const unsigned char *input,
423                           size_t ilen,
424                           unsigned char mac[16] )
425 {
426     mbedtls_poly1305_context ctx;
427     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
428     POLY1305_VALIDATE_RET( key != NULL );
429     POLY1305_VALIDATE_RET( mac != NULL );
430     POLY1305_VALIDATE_RET( ilen == 0 || input != NULL );
431 
432     mbedtls_poly1305_init( &ctx );
433 
434     ret = mbedtls_poly1305_starts( &ctx, key );
435     if( ret != 0 )
436         goto cleanup;
437 
438     ret = mbedtls_poly1305_update( &ctx, input, ilen );
439     if( ret != 0 )
440         goto cleanup;
441 
442     ret = mbedtls_poly1305_finish( &ctx, mac );
443 
444 cleanup:
445     mbedtls_poly1305_free( &ctx );
446     return( ret );
447 }
448 
449 #endif /* MBEDTLS_POLY1305_ALT */
450 
451 #if defined(MBEDTLS_SELF_TEST)
452 
453 static const unsigned char test_keys[2][32] =
454 {
455     {
456         0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33,
457         0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8,
458         0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd,
459         0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b
460     },
461     {
462         0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
463         0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
464         0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
465         0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
466     }
467 };
468 
469 static const unsigned char test_data[2][127] =
470 {
471     {
472         0x43, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x67, 0x72,
473         0x61, 0x70, 0x68, 0x69, 0x63, 0x20, 0x46, 0x6f,
474         0x72, 0x75, 0x6d, 0x20, 0x52, 0x65, 0x73, 0x65,
475         0x61, 0x72, 0x63, 0x68, 0x20, 0x47, 0x72, 0x6f,
476         0x75, 0x70
477     },
478     {
479         0x27, 0x54, 0x77, 0x61, 0x73, 0x20, 0x62, 0x72,
480         0x69, 0x6c, 0x6c, 0x69, 0x67, 0x2c, 0x20, 0x61,
481         0x6e, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x73,
482         0x6c, 0x69, 0x74, 0x68, 0x79, 0x20, 0x74, 0x6f,
483         0x76, 0x65, 0x73, 0x0a, 0x44, 0x69, 0x64, 0x20,
484         0x67, 0x79, 0x72, 0x65, 0x20, 0x61, 0x6e, 0x64,
485         0x20, 0x67, 0x69, 0x6d, 0x62, 0x6c, 0x65, 0x20,
486         0x69, 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77,
487         0x61, 0x62, 0x65, 0x3a, 0x0a, 0x41, 0x6c, 0x6c,
488         0x20, 0x6d, 0x69, 0x6d, 0x73, 0x79, 0x20, 0x77,
489         0x65, 0x72, 0x65, 0x20, 0x74, 0x68, 0x65, 0x20,
490         0x62, 0x6f, 0x72, 0x6f, 0x67, 0x6f, 0x76, 0x65,
491         0x73, 0x2c, 0x0a, 0x41, 0x6e, 0x64, 0x20, 0x74,
492         0x68, 0x65, 0x20, 0x6d, 0x6f, 0x6d, 0x65, 0x20,
493         0x72, 0x61, 0x74, 0x68, 0x73, 0x20, 0x6f, 0x75,
494         0x74, 0x67, 0x72, 0x61, 0x62, 0x65, 0x2e
495     }
496 };
497 
498 static const size_t test_data_len[2] =
499 {
500     34U,
501     127U
502 };
503 
504 static const unsigned char test_mac[2][16] =
505 {
506     {
507         0xa8, 0x06, 0x1d, 0xc1, 0x30, 0x51, 0x36, 0xc6,
508         0xc2, 0x2b, 0x8b, 0xaf, 0x0c, 0x01, 0x27, 0xa9
509     },
510     {
511         0x45, 0x41, 0x66, 0x9a, 0x7e, 0xaa, 0xee, 0x61,
512         0xe7, 0x08, 0xdc, 0x7c, 0xbc, 0xc5, 0xeb, 0x62
513     }
514 };
515 
516 #define ASSERT( cond, args )            \
517     do                                  \
518     {                                   \
519         if( ! ( cond ) )                \
520         {                               \
521             if( verbose != 0 )          \
522                 mbedtls_printf args;    \
523                                         \
524             return( -1 );               \
525         }                               \
526     }                                   \
527     while( 0 )
528 
529 int mbedtls_poly1305_self_test( int verbose )
530 {
531     unsigned char mac[16];
532     unsigned i;
533     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
534 
535     for( i = 0U; i < 2U; i++ )
536     {
537         if( verbose != 0 )
538             mbedtls_printf( "  Poly1305 test %u ", i );
539 
540         ret = mbedtls_poly1305_mac( test_keys[i],
541                                     test_data[i],
542                                     test_data_len[i],
543                                     mac );
544         ASSERT( 0 == ret, ( "error code: %i\n", ret ) );
545 
546         ASSERT( 0 == memcmp( mac, test_mac[i], 16U ), ( "failed (mac)\n" ) );
547 
548         if( verbose != 0 )
549             mbedtls_printf( "passed\n" );
550     }
551 
552     if( verbose != 0 )
553         mbedtls_printf( "\n" );
554 
555     return( 0 );
556 }
557 
558 #endif /* MBEDTLS_SELF_TEST */
559 
560 #endif /* MBEDTLS_POLY1305_C */
561