1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * The "hash function" used as the core of the ChaCha stream cipher (RFC7539)
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2015 Martin Willi
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <linux/bug.h>
9*4882a593Smuzhiyun #include <linux/kernel.h>
10*4882a593Smuzhiyun #include <linux/export.h>
11*4882a593Smuzhiyun #include <linux/bitops.h>
12*4882a593Smuzhiyun #include <linux/string.h>
13*4882a593Smuzhiyun #include <asm/unaligned.h>
14*4882a593Smuzhiyun #include <crypto/chacha.h>
15*4882a593Smuzhiyun
chacha_permute(u32 * x,int nrounds)16*4882a593Smuzhiyun static void chacha_permute(u32 *x, int nrounds)
17*4882a593Smuzhiyun {
18*4882a593Smuzhiyun int i;
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun /* whitelist the allowed round counts */
21*4882a593Smuzhiyun WARN_ON_ONCE(nrounds != 20 && nrounds != 12);
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun for (i = 0; i < nrounds; i += 2) {
24*4882a593Smuzhiyun x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16);
25*4882a593Smuzhiyun x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16);
26*4882a593Smuzhiyun x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16);
27*4882a593Smuzhiyun x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16);
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12);
30*4882a593Smuzhiyun x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12);
31*4882a593Smuzhiyun x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12);
32*4882a593Smuzhiyun x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12);
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8);
35*4882a593Smuzhiyun x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8);
36*4882a593Smuzhiyun x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8);
37*4882a593Smuzhiyun x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8);
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7);
40*4882a593Smuzhiyun x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7);
41*4882a593Smuzhiyun x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7);
42*4882a593Smuzhiyun x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7);
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16);
45*4882a593Smuzhiyun x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16);
46*4882a593Smuzhiyun x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16);
47*4882a593Smuzhiyun x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16);
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12);
50*4882a593Smuzhiyun x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12);
51*4882a593Smuzhiyun x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12);
52*4882a593Smuzhiyun x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12);
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8);
55*4882a593Smuzhiyun x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8);
56*4882a593Smuzhiyun x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8);
57*4882a593Smuzhiyun x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8);
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7);
60*4882a593Smuzhiyun x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7);
61*4882a593Smuzhiyun x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7);
62*4882a593Smuzhiyun x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7);
63*4882a593Smuzhiyun }
64*4882a593Smuzhiyun }
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun /**
67*4882a593Smuzhiyun * chacha_block - generate one keystream block and increment block counter
68*4882a593Smuzhiyun * @state: input state matrix (16 32-bit words)
69*4882a593Smuzhiyun * @stream: output keystream block (64 bytes)
70*4882a593Smuzhiyun * @nrounds: number of rounds (20 or 12; 20 is recommended)
71*4882a593Smuzhiyun *
72*4882a593Smuzhiyun * This is the ChaCha core, a function from 64-byte strings to 64-byte strings.
73*4882a593Smuzhiyun * The caller has already converted the endianness of the input. This function
74*4882a593Smuzhiyun * also handles incrementing the block counter in the input matrix.
75*4882a593Smuzhiyun */
chacha_block_generic(u32 * state,u8 * stream,int nrounds)76*4882a593Smuzhiyun void chacha_block_generic(u32 *state, u8 *stream, int nrounds)
77*4882a593Smuzhiyun {
78*4882a593Smuzhiyun u32 x[16];
79*4882a593Smuzhiyun int i;
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun memcpy(x, state, 64);
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun chacha_permute(x, nrounds);
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun for (i = 0; i < ARRAY_SIZE(x); i++)
86*4882a593Smuzhiyun put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun state[12]++;
89*4882a593Smuzhiyun }
90*4882a593Smuzhiyun EXPORT_SYMBOL(chacha_block_generic);
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun /**
93*4882a593Smuzhiyun * hchacha_block_generic - abbreviated ChaCha core, for XChaCha
94*4882a593Smuzhiyun * @state: input state matrix (16 32-bit words)
95*4882a593Smuzhiyun * @out: output (8 32-bit words)
96*4882a593Smuzhiyun * @nrounds: number of rounds (20 or 12; 20 is recommended)
97*4882a593Smuzhiyun *
98*4882a593Smuzhiyun * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step
99*4882a593Smuzhiyun * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha
100*4882a593Smuzhiyun * skips the final addition of the initial state, and outputs only certain words
101*4882a593Smuzhiyun * of the state. It should not be used for streaming directly.
102*4882a593Smuzhiyun */
hchacha_block_generic(const u32 * state,u32 * stream,int nrounds)103*4882a593Smuzhiyun void hchacha_block_generic(const u32 *state, u32 *stream, int nrounds)
104*4882a593Smuzhiyun {
105*4882a593Smuzhiyun u32 x[16];
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun memcpy(x, state, 64);
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun chacha_permute(x, nrounds);
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun memcpy(&stream[0], &x[0], 16);
112*4882a593Smuzhiyun memcpy(&stream[4], &x[12], 16);
113*4882a593Smuzhiyun }
114*4882a593Smuzhiyun EXPORT_SYMBOL(hchacha_block_generic);
115