1221b1638SMasahiro Yamada /* crc32.c -- compute the CRC-32 of a data stream
2a194255dSDaniel Boulby * Copyright (C) 1995-2022 Mark Adler
3221b1638SMasahiro Yamada * For conditions of distribution and use, see copyright notice in zlib.h
4221b1638SMasahiro Yamada *
5a194255dSDaniel Boulby * This interleaved implementation of a CRC makes use of pipelined multiple
6a194255dSDaniel Boulby * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7a194255dSDaniel Boulby * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8221b1638SMasahiro Yamada */
9221b1638SMasahiro Yamada
10221b1638SMasahiro Yamada /* @(#) $Id$ */
11221b1638SMasahiro Yamada
12221b1638SMasahiro Yamada /*
13221b1638SMasahiro Yamada Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14221b1638SMasahiro Yamada protection on the static variables used to control the first-use generation
15221b1638SMasahiro Yamada of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16221b1638SMasahiro Yamada first call get_crc_table() to initialize the tables before allowing more than
17221b1638SMasahiro Yamada one thread to use crc32().
18221b1638SMasahiro Yamada
19a194255dSDaniel Boulby MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20a194255dSDaniel Boulby produced, so that this one source file can be compiled to an executable.
21221b1638SMasahiro Yamada */
22221b1638SMasahiro Yamada
23221b1638SMasahiro Yamada #ifdef MAKECRCH
24221b1638SMasahiro Yamada # include <stdio.h>
25221b1638SMasahiro Yamada # ifndef DYNAMIC_CRC_TABLE
26221b1638SMasahiro Yamada # define DYNAMIC_CRC_TABLE
27221b1638SMasahiro Yamada # endif /* !DYNAMIC_CRC_TABLE */
28221b1638SMasahiro Yamada #endif /* MAKECRCH */
29221b1638SMasahiro Yamada
30a194255dSDaniel Boulby #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
31221b1638SMasahiro Yamada
32a194255dSDaniel Boulby /*
33a194255dSDaniel Boulby A CRC of a message is computed on N braids of words in the message, where
34a194255dSDaniel Boulby each word consists of W bytes (4 or 8). If N is 3, for example, then three
35a194255dSDaniel Boulby running sparse CRCs are calculated respectively on each braid, at these
36a194255dSDaniel Boulby indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37a194255dSDaniel Boulby This is done starting at a word boundary, and continues until as many blocks
38a194255dSDaniel Boulby of N * W bytes as are available have been processed. The results are combined
39a194255dSDaniel Boulby into a single CRC at the end. For this code, N must be in the range 1..6 and
40a194255dSDaniel Boulby W must be 4 or 8. The upper limit on N can be increased if desired by adding
41a194255dSDaniel Boulby more #if blocks, extending the patterns apparent in the code. In addition,
42a194255dSDaniel Boulby crc32.h would need to be regenerated, if the maximum N value is increased.
43a194255dSDaniel Boulby
44a194255dSDaniel Boulby N and W are chosen empirically by benchmarking the execution time on a given
45a194255dSDaniel Boulby processor. The choices for N and W below were based on testing on Intel Kaby
46a194255dSDaniel Boulby Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47a194255dSDaniel Boulby Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48a194255dSDaniel Boulby with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49a194255dSDaniel Boulby They were all tested with either gcc or clang, all using the -O3 optimization
50a194255dSDaniel Boulby level. Your mileage may vary.
51a194255dSDaniel Boulby */
52a194255dSDaniel Boulby
53a194255dSDaniel Boulby /* Define N */
54a194255dSDaniel Boulby #ifdef Z_TESTN
55a194255dSDaniel Boulby # define N Z_TESTN
56221b1638SMasahiro Yamada #else
57a194255dSDaniel Boulby # define N 5
58a194255dSDaniel Boulby #endif
59a194255dSDaniel Boulby #if N < 1 || N > 6
60a194255dSDaniel Boulby # error N must be in 1..6
61a194255dSDaniel Boulby #endif
62221b1638SMasahiro Yamada
63a194255dSDaniel Boulby /*
64a194255dSDaniel Boulby z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65a194255dSDaniel Boulby z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66a194255dSDaniel Boulby that bytes are eight bits.
67a194255dSDaniel Boulby */
68221b1638SMasahiro Yamada
69a194255dSDaniel Boulby /*
70a194255dSDaniel Boulby Define W and the associated z_word_t type. If W is not defined, then a
71a194255dSDaniel Boulby braided calculation is not used, and the associated tables and code are not
72a194255dSDaniel Boulby compiled.
73a194255dSDaniel Boulby */
74a194255dSDaniel Boulby #ifdef Z_TESTW
75a194255dSDaniel Boulby # if Z_TESTW-1 != -1
76a194255dSDaniel Boulby # define W Z_TESTW
77a194255dSDaniel Boulby # endif
78a194255dSDaniel Boulby #else
79a194255dSDaniel Boulby # ifdef MAKECRCH
80a194255dSDaniel Boulby # define W 8 /* required for MAKECRCH */
81a194255dSDaniel Boulby # else
82a194255dSDaniel Boulby # if defined(__x86_64__) || defined(__aarch64__)
83a194255dSDaniel Boulby # define W 8
84a194255dSDaniel Boulby # else
85a194255dSDaniel Boulby # define W 4
86a194255dSDaniel Boulby # endif
87a194255dSDaniel Boulby # endif
88a194255dSDaniel Boulby #endif
89a194255dSDaniel Boulby #ifdef W
90a194255dSDaniel Boulby # if W == 8 && defined(Z_U8)
91a194255dSDaniel Boulby typedef Z_U8 z_word_t;
92a194255dSDaniel Boulby # elif defined(Z_U4)
93a194255dSDaniel Boulby # undef W
94a194255dSDaniel Boulby # define W 4
95a194255dSDaniel Boulby typedef Z_U4 z_word_t;
96a194255dSDaniel Boulby # else
97a194255dSDaniel Boulby # undef W
98a194255dSDaniel Boulby # endif
99a194255dSDaniel Boulby #endif
100a194255dSDaniel Boulby
101a194255dSDaniel Boulby /* If available, use the ARM processor CRC32 instruction. */
102a194255dSDaniel Boulby #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
103a194255dSDaniel Boulby # define ARMCRC32
104a194255dSDaniel Boulby #endif
105a194255dSDaniel Boulby
106a194255dSDaniel Boulby #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
107a194255dSDaniel Boulby /*
108a194255dSDaniel Boulby Swap the bytes in a z_word_t to convert between little and big endian. Any
109a194255dSDaniel Boulby self-respecting compiler will optimize this to a single machine byte-swap
110a194255dSDaniel Boulby instruction, if one is available. This assumes that word_t is either 32 bits
111a194255dSDaniel Boulby or 64 bits.
112a194255dSDaniel Boulby */
byte_swap(z_word_t word)113*fd39217aSManish Pandey local z_word_t byte_swap(z_word_t word) {
114a194255dSDaniel Boulby # if W == 8
115a194255dSDaniel Boulby return
116a194255dSDaniel Boulby (word & 0xff00000000000000) >> 56 |
117a194255dSDaniel Boulby (word & 0xff000000000000) >> 40 |
118a194255dSDaniel Boulby (word & 0xff0000000000) >> 24 |
119a194255dSDaniel Boulby (word & 0xff00000000) >> 8 |
120a194255dSDaniel Boulby (word & 0xff000000) << 8 |
121a194255dSDaniel Boulby (word & 0xff0000) << 24 |
122a194255dSDaniel Boulby (word & 0xff00) << 40 |
123a194255dSDaniel Boulby (word & 0xff) << 56;
124a194255dSDaniel Boulby # else /* W == 4 */
125a194255dSDaniel Boulby return
126a194255dSDaniel Boulby (word & 0xff000000) >> 24 |
127a194255dSDaniel Boulby (word & 0xff0000) >> 8 |
128a194255dSDaniel Boulby (word & 0xff00) << 8 |
129a194255dSDaniel Boulby (word & 0xff) << 24;
130a194255dSDaniel Boulby # endif
131a194255dSDaniel Boulby }
132a194255dSDaniel Boulby #endif
133a194255dSDaniel Boulby
134*fd39217aSManish Pandey #ifdef DYNAMIC_CRC_TABLE
135*fd39217aSManish Pandey /* =========================================================================
136*fd39217aSManish Pandey * Table of powers of x for combining CRC-32s, filled in by make_crc_table()
137*fd39217aSManish Pandey * below.
138*fd39217aSManish Pandey */
139*fd39217aSManish Pandey local z_crc_t FAR x2n_table[32];
140*fd39217aSManish Pandey #else
141*fd39217aSManish Pandey /* =========================================================================
142*fd39217aSManish Pandey * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
143*fd39217aSManish Pandey * of x for combining CRC-32s, all made by make_crc_table().
144*fd39217aSManish Pandey */
145*fd39217aSManish Pandey # include "crc32.h"
146*fd39217aSManish Pandey #endif
147*fd39217aSManish Pandey
148a194255dSDaniel Boulby /* CRC polynomial. */
149a194255dSDaniel Boulby #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */
150221b1638SMasahiro Yamada
151*fd39217aSManish Pandey /*
152*fd39217aSManish Pandey Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
153*fd39217aSManish Pandey reflected. For speed, this requires that a not be zero.
154*fd39217aSManish Pandey */
multmodp(z_crc_t a,z_crc_t b)155*fd39217aSManish Pandey local z_crc_t multmodp(z_crc_t a, z_crc_t b) {
156*fd39217aSManish Pandey z_crc_t m, p;
157221b1638SMasahiro Yamada
158*fd39217aSManish Pandey m = (z_crc_t)1 << 31;
159*fd39217aSManish Pandey p = 0;
160*fd39217aSManish Pandey for (;;) {
161*fd39217aSManish Pandey if (a & m) {
162*fd39217aSManish Pandey p ^= b;
163*fd39217aSManish Pandey if ((a & (m - 1)) == 0)
164*fd39217aSManish Pandey break;
165*fd39217aSManish Pandey }
166*fd39217aSManish Pandey m >>= 1;
167*fd39217aSManish Pandey b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
168*fd39217aSManish Pandey }
169*fd39217aSManish Pandey return p;
170*fd39217aSManish Pandey }
171*fd39217aSManish Pandey
172*fd39217aSManish Pandey /*
173*fd39217aSManish Pandey Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
174*fd39217aSManish Pandey initialized.
175*fd39217aSManish Pandey */
x2nmodp(z_off64_t n,unsigned k)176*fd39217aSManish Pandey local z_crc_t x2nmodp(z_off64_t n, unsigned k) {
177*fd39217aSManish Pandey z_crc_t p;
178*fd39217aSManish Pandey
179*fd39217aSManish Pandey p = (z_crc_t)1 << 31; /* x^0 == 1 */
180*fd39217aSManish Pandey while (n) {
181*fd39217aSManish Pandey if (n & 1)
182*fd39217aSManish Pandey p = multmodp(x2n_table[k & 31], p);
183*fd39217aSManish Pandey n >>= 1;
184*fd39217aSManish Pandey k++;
185*fd39217aSManish Pandey }
186*fd39217aSManish Pandey return p;
187*fd39217aSManish Pandey }
188*fd39217aSManish Pandey
189*fd39217aSManish Pandey #ifdef DYNAMIC_CRC_TABLE
190*fd39217aSManish Pandey /* =========================================================================
191*fd39217aSManish Pandey * Build the tables for byte-wise and braided CRC-32 calculations, and a table
192*fd39217aSManish Pandey * of powers of x for combining CRC-32s.
193*fd39217aSManish Pandey */
194a194255dSDaniel Boulby local z_crc_t FAR crc_table[256];
195a194255dSDaniel Boulby #ifdef W
196a194255dSDaniel Boulby local z_word_t FAR crc_big_table[256];
197a194255dSDaniel Boulby local z_crc_t FAR crc_braid_table[W][256];
198a194255dSDaniel Boulby local z_word_t FAR crc_braid_big_table[W][256];
199*fd39217aSManish Pandey local void braid(z_crc_t [][256], z_word_t [][256], int, int);
200a194255dSDaniel Boulby #endif
201221b1638SMasahiro Yamada #ifdef MAKECRCH
202*fd39217aSManish Pandey local void write_table(FILE *, const z_crc_t FAR *, int);
203*fd39217aSManish Pandey local void write_table32hi(FILE *, const z_word_t FAR *, int);
204*fd39217aSManish Pandey local void write_table64(FILE *, const z_word_t FAR *, int);
205221b1638SMasahiro Yamada #endif /* MAKECRCH */
206a194255dSDaniel Boulby
207a194255dSDaniel Boulby /*
208a194255dSDaniel Boulby Define a once() function depending on the availability of atomics. If this is
209a194255dSDaniel Boulby compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
210a194255dSDaniel Boulby multiple threads, and if atomics are not available, then get_crc_table() must
211a194255dSDaniel Boulby be called to initialize the tables and must return before any threads are
212a194255dSDaniel Boulby allowed to compute or combine CRCs.
213a194255dSDaniel Boulby */
214a194255dSDaniel Boulby
215a194255dSDaniel Boulby /* Definition of once functionality. */
216a194255dSDaniel Boulby typedef struct once_s once_t;
217a194255dSDaniel Boulby
218a194255dSDaniel Boulby /* Check for the availability of atomics. */
219a194255dSDaniel Boulby #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
220a194255dSDaniel Boulby !defined(__STDC_NO_ATOMICS__)
221a194255dSDaniel Boulby
222a194255dSDaniel Boulby #include <stdatomic.h>
223a194255dSDaniel Boulby
224a194255dSDaniel Boulby /* Structure for once(), which must be initialized with ONCE_INIT. */
225a194255dSDaniel Boulby struct once_s {
226a194255dSDaniel Boulby atomic_flag begun;
227a194255dSDaniel Boulby atomic_int done;
228a194255dSDaniel Boulby };
229a194255dSDaniel Boulby #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
230a194255dSDaniel Boulby
231a194255dSDaniel Boulby /*
232a194255dSDaniel Boulby Run the provided init() function exactly once, even if multiple threads
233a194255dSDaniel Boulby invoke once() at the same time. The state must be a once_t initialized with
234a194255dSDaniel Boulby ONCE_INIT.
235a194255dSDaniel Boulby */
once(once_t * state,void (* init)(void))236*fd39217aSManish Pandey local void once(once_t *state, void (*init)(void)) {
237a194255dSDaniel Boulby if (!atomic_load(&state->done)) {
238a194255dSDaniel Boulby if (atomic_flag_test_and_set(&state->begun))
239a194255dSDaniel Boulby while (!atomic_load(&state->done))
240a194255dSDaniel Boulby ;
241a194255dSDaniel Boulby else {
242a194255dSDaniel Boulby init();
243a194255dSDaniel Boulby atomic_store(&state->done, 1);
244a194255dSDaniel Boulby }
245a194255dSDaniel Boulby }
246a194255dSDaniel Boulby }
247a194255dSDaniel Boulby
248a194255dSDaniel Boulby #else /* no atomics */
249a194255dSDaniel Boulby
250a194255dSDaniel Boulby /* Structure for once(), which must be initialized with ONCE_INIT. */
251a194255dSDaniel Boulby struct once_s {
252a194255dSDaniel Boulby volatile int begun;
253a194255dSDaniel Boulby volatile int done;
254a194255dSDaniel Boulby };
255a194255dSDaniel Boulby #define ONCE_INIT {0, 0}
256a194255dSDaniel Boulby
257a194255dSDaniel Boulby /* Test and set. Alas, not atomic, but tries to minimize the period of
258a194255dSDaniel Boulby vulnerability. */
test_and_set(int volatile * flag)259*fd39217aSManish Pandey local int test_and_set(int volatile *flag) {
260a194255dSDaniel Boulby int was;
261a194255dSDaniel Boulby
262a194255dSDaniel Boulby was = *flag;
263a194255dSDaniel Boulby *flag = 1;
264a194255dSDaniel Boulby return was;
265a194255dSDaniel Boulby }
266a194255dSDaniel Boulby
267a194255dSDaniel Boulby /* Run the provided init() function once. This is not thread-safe. */
once(once_t * state,void (* init)(void))268*fd39217aSManish Pandey local void once(once_t *state, void (*init)(void)) {
269a194255dSDaniel Boulby if (!state->done) {
270a194255dSDaniel Boulby if (test_and_set(&state->begun))
271a194255dSDaniel Boulby while (!state->done)
272a194255dSDaniel Boulby ;
273a194255dSDaniel Boulby else {
274a194255dSDaniel Boulby init();
275a194255dSDaniel Boulby state->done = 1;
276a194255dSDaniel Boulby }
277a194255dSDaniel Boulby }
278a194255dSDaniel Boulby }
279a194255dSDaniel Boulby
280a194255dSDaniel Boulby #endif
281a194255dSDaniel Boulby
282a194255dSDaniel Boulby /* State for once(). */
283a194255dSDaniel Boulby local once_t made = ONCE_INIT;
284a194255dSDaniel Boulby
285221b1638SMasahiro Yamada /*
286221b1638SMasahiro Yamada Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
287221b1638SMasahiro Yamada x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
288221b1638SMasahiro Yamada
289221b1638SMasahiro Yamada Polynomials over GF(2) are represented in binary, one bit per coefficient,
290221b1638SMasahiro Yamada with the lowest powers in the most significant bit. Then adding polynomials
291221b1638SMasahiro Yamada is just exclusive-or, and multiplying a polynomial by x is a right shift by
292221b1638SMasahiro Yamada one. If we call the above polynomial p, and represent a byte as the
293221b1638SMasahiro Yamada polynomial q, also with the lowest power in the most significant bit (so the
294a194255dSDaniel Boulby byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
295221b1638SMasahiro Yamada where a mod b means the remainder after dividing a by b.
296221b1638SMasahiro Yamada
297221b1638SMasahiro Yamada This calculation is done using the shift-register method of multiplying and
298221b1638SMasahiro Yamada taking the remainder. The register is initialized to zero, and for each
299221b1638SMasahiro Yamada incoming bit, x^32 is added mod p to the register if the bit is a one (where
300a194255dSDaniel Boulby x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
301a194255dSDaniel Boulby (which is shifting right by one and adding x^32 mod p if the bit shifted out
302a194255dSDaniel Boulby is a one). We start with the highest power (least significant bit) of q and
303a194255dSDaniel Boulby repeat for all eight bits of q.
304221b1638SMasahiro Yamada
305a194255dSDaniel Boulby The table is simply the CRC of all possible eight bit values. This is all the
306a194255dSDaniel Boulby information needed to generate CRCs on data a byte at a time for all
307a194255dSDaniel Boulby combinations of CRC register values and incoming bytes.
308221b1638SMasahiro Yamada */
309a194255dSDaniel Boulby
make_crc_table(void)310*fd39217aSManish Pandey local void make_crc_table(void) {
311a194255dSDaniel Boulby unsigned i, j, n;
312a194255dSDaniel Boulby z_crc_t p;
313221b1638SMasahiro Yamada
314a194255dSDaniel Boulby /* initialize the CRC of bytes tables */
315a194255dSDaniel Boulby for (i = 0; i < 256; i++) {
316a194255dSDaniel Boulby p = i;
317a194255dSDaniel Boulby for (j = 0; j < 8; j++)
318a194255dSDaniel Boulby p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
319a194255dSDaniel Boulby crc_table[i] = p;
320a194255dSDaniel Boulby #ifdef W
321a194255dSDaniel Boulby crc_big_table[i] = byte_swap(p);
322a194255dSDaniel Boulby #endif
323221b1638SMasahiro Yamada }
324221b1638SMasahiro Yamada
325a194255dSDaniel Boulby /* initialize the x^2^n mod p(x) table */
326a194255dSDaniel Boulby p = (z_crc_t)1 << 30; /* x^1 */
327a194255dSDaniel Boulby x2n_table[0] = p;
328a194255dSDaniel Boulby for (n = 1; n < 32; n++)
329a194255dSDaniel Boulby x2n_table[n] = p = multmodp(p, p);
330221b1638SMasahiro Yamada
331a194255dSDaniel Boulby #ifdef W
332a194255dSDaniel Boulby /* initialize the braiding tables -- needs x2n_table[] */
333a194255dSDaniel Boulby braid(crc_braid_table, crc_braid_big_table, N, W);
334a194255dSDaniel Boulby #endif
335221b1638SMasahiro Yamada
336221b1638SMasahiro Yamada #ifdef MAKECRCH
337221b1638SMasahiro Yamada {
338a194255dSDaniel Boulby /*
339a194255dSDaniel Boulby The crc32.h header file contains tables for both 32-bit and 64-bit
340a194255dSDaniel Boulby z_word_t's, and so requires a 64-bit type be available. In that case,
341a194255dSDaniel Boulby z_word_t must be defined to be 64-bits. This code then also generates
342a194255dSDaniel Boulby and writes out the tables for the case that z_word_t is 32 bits.
343a194255dSDaniel Boulby */
344a194255dSDaniel Boulby #if !defined(W) || W != 8
345a194255dSDaniel Boulby # error Need a 64-bit integer type in order to generate crc32.h.
346a194255dSDaniel Boulby #endif
347221b1638SMasahiro Yamada FILE *out;
348a194255dSDaniel Boulby int k, n;
349a194255dSDaniel Boulby z_crc_t ltl[8][256];
350a194255dSDaniel Boulby z_word_t big[8][256];
351221b1638SMasahiro Yamada
352221b1638SMasahiro Yamada out = fopen("crc32.h", "w");
353221b1638SMasahiro Yamada if (out == NULL) return;
354a194255dSDaniel Boulby
355a194255dSDaniel Boulby /* write out little-endian CRC table to crc32.h */
356a194255dSDaniel Boulby fprintf(out,
357a194255dSDaniel Boulby "/* crc32.h -- tables for rapid CRC calculation\n"
358a194255dSDaniel Boulby " * Generated automatically by crc32.c\n */\n"
359a194255dSDaniel Boulby "\n"
360a194255dSDaniel Boulby "local const z_crc_t FAR crc_table[] = {\n"
361a194255dSDaniel Boulby " ");
362a194255dSDaniel Boulby write_table(out, crc_table, 256);
363a194255dSDaniel Boulby fprintf(out,
364a194255dSDaniel Boulby "};\n");
365a194255dSDaniel Boulby
366a194255dSDaniel Boulby /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
367a194255dSDaniel Boulby fprintf(out,
368a194255dSDaniel Boulby "\n"
369a194255dSDaniel Boulby "#ifdef W\n"
370a194255dSDaniel Boulby "\n"
371a194255dSDaniel Boulby "#if W == 8\n"
372a194255dSDaniel Boulby "\n"
373a194255dSDaniel Boulby "local const z_word_t FAR crc_big_table[] = {\n"
374a194255dSDaniel Boulby " ");
375a194255dSDaniel Boulby write_table64(out, crc_big_table, 256);
376a194255dSDaniel Boulby fprintf(out,
377a194255dSDaniel Boulby "};\n");
378a194255dSDaniel Boulby
379a194255dSDaniel Boulby /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
380a194255dSDaniel Boulby fprintf(out,
381a194255dSDaniel Boulby "\n"
382a194255dSDaniel Boulby "#else /* W == 4 */\n"
383a194255dSDaniel Boulby "\n"
384a194255dSDaniel Boulby "local const z_word_t FAR crc_big_table[] = {\n"
385a194255dSDaniel Boulby " ");
386a194255dSDaniel Boulby write_table32hi(out, crc_big_table, 256);
387a194255dSDaniel Boulby fprintf(out,
388a194255dSDaniel Boulby "};\n"
389a194255dSDaniel Boulby "\n"
390a194255dSDaniel Boulby "#endif\n");
391a194255dSDaniel Boulby
392a194255dSDaniel Boulby /* write out braid tables for each value of N */
393a194255dSDaniel Boulby for (n = 1; n <= 6; n++) {
394a194255dSDaniel Boulby fprintf(out,
395a194255dSDaniel Boulby "\n"
396a194255dSDaniel Boulby "#if N == %d\n", n);
397a194255dSDaniel Boulby
398a194255dSDaniel Boulby /* compute braid tables for this N and 64-bit word_t */
399a194255dSDaniel Boulby braid(ltl, big, n, 8);
400a194255dSDaniel Boulby
401a194255dSDaniel Boulby /* write out braid tables for 64-bit z_word_t to crc32.h */
402a194255dSDaniel Boulby fprintf(out,
403a194255dSDaniel Boulby "\n"
404a194255dSDaniel Boulby "#if W == 8\n"
405a194255dSDaniel Boulby "\n"
406a194255dSDaniel Boulby "local const z_crc_t FAR crc_braid_table[][256] = {\n");
407a194255dSDaniel Boulby for (k = 0; k < 8; k++) {
408a194255dSDaniel Boulby fprintf(out, " {");
409a194255dSDaniel Boulby write_table(out, ltl[k], 256);
410a194255dSDaniel Boulby fprintf(out, "}%s", k < 7 ? ",\n" : "");
411221b1638SMasahiro Yamada }
412a194255dSDaniel Boulby fprintf(out,
413a194255dSDaniel Boulby "};\n"
414a194255dSDaniel Boulby "\n"
415a194255dSDaniel Boulby "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
416a194255dSDaniel Boulby for (k = 0; k < 8; k++) {
417a194255dSDaniel Boulby fprintf(out, " {");
418a194255dSDaniel Boulby write_table64(out, big[k], 256);
419a194255dSDaniel Boulby fprintf(out, "}%s", k < 7 ? ",\n" : "");
420a194255dSDaniel Boulby }
421a194255dSDaniel Boulby fprintf(out,
422a194255dSDaniel Boulby "};\n");
423a194255dSDaniel Boulby
424a194255dSDaniel Boulby /* compute braid tables for this N and 32-bit word_t */
425a194255dSDaniel Boulby braid(ltl, big, n, 4);
426a194255dSDaniel Boulby
427a194255dSDaniel Boulby /* write out braid tables for 32-bit z_word_t to crc32.h */
428a194255dSDaniel Boulby fprintf(out,
429a194255dSDaniel Boulby "\n"
430a194255dSDaniel Boulby "#else /* W == 4 */\n"
431a194255dSDaniel Boulby "\n"
432a194255dSDaniel Boulby "local const z_crc_t FAR crc_braid_table[][256] = {\n");
433a194255dSDaniel Boulby for (k = 0; k < 4; k++) {
434a194255dSDaniel Boulby fprintf(out, " {");
435a194255dSDaniel Boulby write_table(out, ltl[k], 256);
436a194255dSDaniel Boulby fprintf(out, "}%s", k < 3 ? ",\n" : "");
437a194255dSDaniel Boulby }
438a194255dSDaniel Boulby fprintf(out,
439a194255dSDaniel Boulby "};\n"
440a194255dSDaniel Boulby "\n"
441a194255dSDaniel Boulby "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
442a194255dSDaniel Boulby for (k = 0; k < 4; k++) {
443a194255dSDaniel Boulby fprintf(out, " {");
444a194255dSDaniel Boulby write_table32hi(out, big[k], 256);
445a194255dSDaniel Boulby fprintf(out, "}%s", k < 3 ? ",\n" : "");
446a194255dSDaniel Boulby }
447a194255dSDaniel Boulby fprintf(out,
448a194255dSDaniel Boulby "};\n"
449a194255dSDaniel Boulby "\n"
450a194255dSDaniel Boulby "#endif\n"
451a194255dSDaniel Boulby "\n"
452a194255dSDaniel Boulby "#endif\n");
453a194255dSDaniel Boulby }
454a194255dSDaniel Boulby fprintf(out,
455a194255dSDaniel Boulby "\n"
456a194255dSDaniel Boulby "#endif\n");
457a194255dSDaniel Boulby
458a194255dSDaniel Boulby /* write out zeros operator table to crc32.h */
459a194255dSDaniel Boulby fprintf(out,
460a194255dSDaniel Boulby "\n"
461a194255dSDaniel Boulby "local const z_crc_t FAR x2n_table[] = {\n"
462a194255dSDaniel Boulby " ");
463a194255dSDaniel Boulby write_table(out, x2n_table, 32);
464a194255dSDaniel Boulby fprintf(out,
465a194255dSDaniel Boulby "};\n");
466221b1638SMasahiro Yamada fclose(out);
467221b1638SMasahiro Yamada }
468221b1638SMasahiro Yamada #endif /* MAKECRCH */
469221b1638SMasahiro Yamada }
470221b1638SMasahiro Yamada
471221b1638SMasahiro Yamada #ifdef MAKECRCH
472a194255dSDaniel Boulby
473a194255dSDaniel Boulby /*
474a194255dSDaniel Boulby Write the 32-bit values in table[0..k-1] to out, five per line in
475a194255dSDaniel Boulby hexadecimal separated by commas.
476a194255dSDaniel Boulby */
write_table(FILE * out,const z_crc_t FAR * table,int k)477*fd39217aSManish Pandey local void write_table(FILE *out, const z_crc_t FAR *table, int k) {
478221b1638SMasahiro Yamada int n;
479221b1638SMasahiro Yamada
480a194255dSDaniel Boulby for (n = 0; n < k; n++)
481a194255dSDaniel Boulby fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
482221b1638SMasahiro Yamada (unsigned long)(table[n]),
483a194255dSDaniel Boulby n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
484221b1638SMasahiro Yamada }
485a194255dSDaniel Boulby
486a194255dSDaniel Boulby /*
487a194255dSDaniel Boulby Write the high 32-bits of each value in table[0..k-1] to out, five per line
488a194255dSDaniel Boulby in hexadecimal separated by commas.
489a194255dSDaniel Boulby */
write_table32hi(FILE * out,const z_word_t FAR * table,int k)490*fd39217aSManish Pandey local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) {
491a194255dSDaniel Boulby int n;
492a194255dSDaniel Boulby
493a194255dSDaniel Boulby for (n = 0; n < k; n++)
494a194255dSDaniel Boulby fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
495a194255dSDaniel Boulby (unsigned long)(table[n] >> 32),
496a194255dSDaniel Boulby n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
497a194255dSDaniel Boulby }
498a194255dSDaniel Boulby
499a194255dSDaniel Boulby /*
500a194255dSDaniel Boulby Write the 64-bit values in table[0..k-1] to out, three per line in
501a194255dSDaniel Boulby hexadecimal separated by commas. This assumes that if there is a 64-bit
502a194255dSDaniel Boulby type, then there is also a long long integer type, and it is at least 64
503a194255dSDaniel Boulby bits. If not, then the type cast and format string can be adjusted
504a194255dSDaniel Boulby accordingly.
505a194255dSDaniel Boulby */
write_table64(FILE * out,const z_word_t FAR * table,int k)506*fd39217aSManish Pandey local void write_table64(FILE *out, const z_word_t FAR *table, int k) {
507a194255dSDaniel Boulby int n;
508a194255dSDaniel Boulby
509a194255dSDaniel Boulby for (n = 0; n < k; n++)
510a194255dSDaniel Boulby fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ",
511a194255dSDaniel Boulby (unsigned long long)(table[n]),
512a194255dSDaniel Boulby n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
513a194255dSDaniel Boulby }
514a194255dSDaniel Boulby
515a194255dSDaniel Boulby /* Actually do the deed. */
main(void)516*fd39217aSManish Pandey int main(void) {
517a194255dSDaniel Boulby make_crc_table();
518a194255dSDaniel Boulby return 0;
519a194255dSDaniel Boulby }
520a194255dSDaniel Boulby
521221b1638SMasahiro Yamada #endif /* MAKECRCH */
522221b1638SMasahiro Yamada
523a194255dSDaniel Boulby #ifdef W
524a194255dSDaniel Boulby /*
525a194255dSDaniel Boulby Generate the little and big-endian braid tables for the given n and z_word_t
526a194255dSDaniel Boulby size w. Each array must have room for w blocks of 256 elements.
527a194255dSDaniel Boulby */
braid(z_crc_t ltl[][256],z_word_t big[][256],int n,int w)528*fd39217aSManish Pandey local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) {
529a194255dSDaniel Boulby int k;
530a194255dSDaniel Boulby z_crc_t i, p, q;
531a194255dSDaniel Boulby for (k = 0; k < w; k++) {
532a194255dSDaniel Boulby p = x2nmodp((n * w + 3 - k) << 3, 0);
533a194255dSDaniel Boulby ltl[k][0] = 0;
534a194255dSDaniel Boulby big[w - 1 - k][0] = 0;
535a194255dSDaniel Boulby for (i = 1; i < 256; i++) {
536a194255dSDaniel Boulby ltl[k][i] = q = multmodp(i << 24, p);
537a194255dSDaniel Boulby big[w - 1 - k][i] = byte_swap(q);
538a194255dSDaniel Boulby }
539a194255dSDaniel Boulby }
540a194255dSDaniel Boulby }
541a194255dSDaniel Boulby #endif
542a194255dSDaniel Boulby
543221b1638SMasahiro Yamada #endif /* DYNAMIC_CRC_TABLE */
544221b1638SMasahiro Yamada
545221b1638SMasahiro Yamada /* =========================================================================
546a194255dSDaniel Boulby * This function can be used by asm versions of crc32(), and to force the
547a194255dSDaniel Boulby * generation of the CRC tables in a threaded application.
548221b1638SMasahiro Yamada */
get_crc_table(void)549*fd39217aSManish Pandey const z_crc_t FAR * ZEXPORT get_crc_table(void) {
550221b1638SMasahiro Yamada #ifdef DYNAMIC_CRC_TABLE
551a194255dSDaniel Boulby once(&made, make_crc_table);
552221b1638SMasahiro Yamada #endif /* DYNAMIC_CRC_TABLE */
553221b1638SMasahiro Yamada return (const z_crc_t FAR *)crc_table;
554221b1638SMasahiro Yamada }
555221b1638SMasahiro Yamada
556a194255dSDaniel Boulby /* =========================================================================
557a194255dSDaniel Boulby * Use ARM machine instructions if available. This will compute the CRC about
558a194255dSDaniel Boulby * ten times faster than the braided calculation. This code does not check for
559a194255dSDaniel Boulby * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
560a194255dSDaniel Boulby * only be defined if the compilation specifies an ARM processor architecture
561a194255dSDaniel Boulby * that has the instructions. For example, compiling with -march=armv8.1-a or
562a194255dSDaniel Boulby * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
563a194255dSDaniel Boulby * instructions.
564a194255dSDaniel Boulby */
565a194255dSDaniel Boulby #ifdef ARMCRC32
566a194255dSDaniel Boulby
567a194255dSDaniel Boulby /*
568a194255dSDaniel Boulby Constants empirically determined to maximize speed. These values are from
569a194255dSDaniel Boulby measurements on a Cortex-A57. Your mileage may vary.
570a194255dSDaniel Boulby */
571a194255dSDaniel Boulby #define Z_BATCH 3990 /* number of words in a batch */
572a194255dSDaniel Boulby #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */
573a194255dSDaniel Boulby #define Z_BATCH_MIN 800 /* fewest words in a final batch */
574a194255dSDaniel Boulby
crc32_z(unsigned long crc,const unsigned char FAR * buf,z_size_t len)575*fd39217aSManish Pandey unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
576*fd39217aSManish Pandey z_size_t len) {
577a194255dSDaniel Boulby z_crc_t val;
578a194255dSDaniel Boulby z_word_t crc1, crc2;
579a194255dSDaniel Boulby const z_word_t *word;
580a194255dSDaniel Boulby z_word_t val0, val1, val2;
581a194255dSDaniel Boulby z_size_t last, last2, i;
582a194255dSDaniel Boulby z_size_t num;
583a194255dSDaniel Boulby
584a194255dSDaniel Boulby /* Return initial CRC, if requested. */
585a194255dSDaniel Boulby if (buf == Z_NULL) return 0;
586a194255dSDaniel Boulby
587a194255dSDaniel Boulby #ifdef DYNAMIC_CRC_TABLE
588a194255dSDaniel Boulby once(&made, make_crc_table);
589a194255dSDaniel Boulby #endif /* DYNAMIC_CRC_TABLE */
590a194255dSDaniel Boulby
591a194255dSDaniel Boulby /* Pre-condition the CRC */
592a194255dSDaniel Boulby crc = (~crc) & 0xffffffff;
593a194255dSDaniel Boulby
594a194255dSDaniel Boulby /* Compute the CRC up to a word boundary. */
595a194255dSDaniel Boulby while (len && ((z_size_t)buf & 7) != 0) {
596a194255dSDaniel Boulby len--;
597a194255dSDaniel Boulby val = *buf++;
598a194255dSDaniel Boulby __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
599a194255dSDaniel Boulby }
600a194255dSDaniel Boulby
601a194255dSDaniel Boulby /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
602a194255dSDaniel Boulby word = (z_word_t const *)buf;
603a194255dSDaniel Boulby num = len >> 3;
604a194255dSDaniel Boulby len &= 7;
605a194255dSDaniel Boulby
606a194255dSDaniel Boulby /* Do three interleaved CRCs to realize the throughput of one crc32x
607a194255dSDaniel Boulby instruction per cycle. Each CRC is calculated on Z_BATCH words. The
608a194255dSDaniel Boulby three CRCs are combined into a single CRC after each set of batches. */
609a194255dSDaniel Boulby while (num >= 3 * Z_BATCH) {
610a194255dSDaniel Boulby crc1 = 0;
611a194255dSDaniel Boulby crc2 = 0;
612a194255dSDaniel Boulby for (i = 0; i < Z_BATCH; i++) {
613a194255dSDaniel Boulby val0 = word[i];
614a194255dSDaniel Boulby val1 = word[i + Z_BATCH];
615a194255dSDaniel Boulby val2 = word[i + 2 * Z_BATCH];
616a194255dSDaniel Boulby __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
617a194255dSDaniel Boulby __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
618a194255dSDaniel Boulby __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
619a194255dSDaniel Boulby }
620a194255dSDaniel Boulby word += 3 * Z_BATCH;
621a194255dSDaniel Boulby num -= 3 * Z_BATCH;
622a194255dSDaniel Boulby crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
623a194255dSDaniel Boulby crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
624a194255dSDaniel Boulby }
625a194255dSDaniel Boulby
626a194255dSDaniel Boulby /* Do one last smaller batch with the remaining words, if there are enough
627a194255dSDaniel Boulby to pay for the combination of CRCs. */
628a194255dSDaniel Boulby last = num / 3;
629a194255dSDaniel Boulby if (last >= Z_BATCH_MIN) {
630a194255dSDaniel Boulby last2 = last << 1;
631a194255dSDaniel Boulby crc1 = 0;
632a194255dSDaniel Boulby crc2 = 0;
633a194255dSDaniel Boulby for (i = 0; i < last; i++) {
634a194255dSDaniel Boulby val0 = word[i];
635a194255dSDaniel Boulby val1 = word[i + last];
636a194255dSDaniel Boulby val2 = word[i + last2];
637a194255dSDaniel Boulby __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
638a194255dSDaniel Boulby __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
639a194255dSDaniel Boulby __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
640a194255dSDaniel Boulby }
641a194255dSDaniel Boulby word += 3 * last;
642a194255dSDaniel Boulby num -= 3 * last;
643a194255dSDaniel Boulby val = x2nmodp(last, 6);
644a194255dSDaniel Boulby crc = multmodp(val, crc) ^ crc1;
645a194255dSDaniel Boulby crc = multmodp(val, crc) ^ crc2;
646a194255dSDaniel Boulby }
647a194255dSDaniel Boulby
648a194255dSDaniel Boulby /* Compute the CRC on any remaining words. */
649a194255dSDaniel Boulby for (i = 0; i < num; i++) {
650a194255dSDaniel Boulby val0 = word[i];
651a194255dSDaniel Boulby __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
652a194255dSDaniel Boulby }
653a194255dSDaniel Boulby word += num;
654a194255dSDaniel Boulby
655a194255dSDaniel Boulby /* Complete the CRC on any remaining bytes. */
656a194255dSDaniel Boulby buf = (const unsigned char FAR *)word;
657a194255dSDaniel Boulby while (len) {
658a194255dSDaniel Boulby len--;
659a194255dSDaniel Boulby val = *buf++;
660a194255dSDaniel Boulby __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
661a194255dSDaniel Boulby }
662a194255dSDaniel Boulby
663a194255dSDaniel Boulby /* Return the CRC, post-conditioned. */
664a194255dSDaniel Boulby return crc ^ 0xffffffff;
665a194255dSDaniel Boulby }
666a194255dSDaniel Boulby
667a194255dSDaniel Boulby #else
668a194255dSDaniel Boulby
669a194255dSDaniel Boulby #ifdef W
670a194255dSDaniel Boulby
671a194255dSDaniel Boulby /*
672a194255dSDaniel Boulby Return the CRC of the W bytes in the word_t data, taking the
673a194255dSDaniel Boulby least-significant byte of the word as the first byte of data, without any pre
674a194255dSDaniel Boulby or post conditioning. This is used to combine the CRCs of each braid.
675a194255dSDaniel Boulby */
crc_word(z_word_t data)676*fd39217aSManish Pandey local z_crc_t crc_word(z_word_t data) {
677a194255dSDaniel Boulby int k;
678a194255dSDaniel Boulby for (k = 0; k < W; k++)
679a194255dSDaniel Boulby data = (data >> 8) ^ crc_table[data & 0xff];
680a194255dSDaniel Boulby return (z_crc_t)data;
681a194255dSDaniel Boulby }
682a194255dSDaniel Boulby
crc_word_big(z_word_t data)683*fd39217aSManish Pandey local z_word_t crc_word_big(z_word_t data) {
684a194255dSDaniel Boulby int k;
685a194255dSDaniel Boulby for (k = 0; k < W; k++)
686a194255dSDaniel Boulby data = (data << 8) ^
687a194255dSDaniel Boulby crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
688a194255dSDaniel Boulby return data;
689a194255dSDaniel Boulby }
690a194255dSDaniel Boulby
691a194255dSDaniel Boulby #endif
692221b1638SMasahiro Yamada
693221b1638SMasahiro Yamada /* ========================================================================= */
crc32_z(unsigned long crc,const unsigned char FAR * buf,z_size_t len)694*fd39217aSManish Pandey unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
695*fd39217aSManish Pandey z_size_t len) {
696a194255dSDaniel Boulby /* Return initial CRC, if requested. */
697a194255dSDaniel Boulby if (buf == Z_NULL) return 0;
698221b1638SMasahiro Yamada
699221b1638SMasahiro Yamada #ifdef DYNAMIC_CRC_TABLE
700a194255dSDaniel Boulby once(&made, make_crc_table);
701221b1638SMasahiro Yamada #endif /* DYNAMIC_CRC_TABLE */
702221b1638SMasahiro Yamada
703a194255dSDaniel Boulby /* Pre-condition the CRC */
704a194255dSDaniel Boulby crc = (~crc) & 0xffffffff;
705221b1638SMasahiro Yamada
706a194255dSDaniel Boulby #ifdef W
707a194255dSDaniel Boulby
708a194255dSDaniel Boulby /* If provided enough bytes, do a braided CRC calculation. */
709a194255dSDaniel Boulby if (len >= N * W + W - 1) {
710a194255dSDaniel Boulby z_size_t blks;
711a194255dSDaniel Boulby z_word_t const *words;
712a194255dSDaniel Boulby unsigned endian;
713a194255dSDaniel Boulby int k;
714a194255dSDaniel Boulby
715a194255dSDaniel Boulby /* Compute the CRC up to a z_word_t boundary. */
716a194255dSDaniel Boulby while (len && ((z_size_t)buf & (W - 1)) != 0) {
717a194255dSDaniel Boulby len--;
718a194255dSDaniel Boulby crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
719a194255dSDaniel Boulby }
720a194255dSDaniel Boulby
721a194255dSDaniel Boulby /* Compute the CRC on as many N z_word_t blocks as are available. */
722a194255dSDaniel Boulby blks = len / (N * W);
723a194255dSDaniel Boulby len -= blks * N * W;
724a194255dSDaniel Boulby words = (z_word_t const *)buf;
725a194255dSDaniel Boulby
726a194255dSDaniel Boulby /* Do endian check at execution time instead of compile time, since ARM
727*fd39217aSManish Pandey processors can change the endianness at execution time. If the
728*fd39217aSManish Pandey compiler knows what the endianness will be, it can optimize out the
729a194255dSDaniel Boulby check and the unused branch. */
730221b1638SMasahiro Yamada endian = 1;
731a194255dSDaniel Boulby if (*(unsigned char *)&endian) {
732a194255dSDaniel Boulby /* Little endian. */
733a194255dSDaniel Boulby
734a194255dSDaniel Boulby z_crc_t crc0;
735a194255dSDaniel Boulby z_word_t word0;
736a194255dSDaniel Boulby #if N > 1
737a194255dSDaniel Boulby z_crc_t crc1;
738a194255dSDaniel Boulby z_word_t word1;
739a194255dSDaniel Boulby #if N > 2
740a194255dSDaniel Boulby z_crc_t crc2;
741a194255dSDaniel Boulby z_word_t word2;
742a194255dSDaniel Boulby #if N > 3
743a194255dSDaniel Boulby z_crc_t crc3;
744a194255dSDaniel Boulby z_word_t word3;
745a194255dSDaniel Boulby #if N > 4
746a194255dSDaniel Boulby z_crc_t crc4;
747a194255dSDaniel Boulby z_word_t word4;
748a194255dSDaniel Boulby #if N > 5
749a194255dSDaniel Boulby z_crc_t crc5;
750a194255dSDaniel Boulby z_word_t word5;
751a194255dSDaniel Boulby #endif
752a194255dSDaniel Boulby #endif
753a194255dSDaniel Boulby #endif
754a194255dSDaniel Boulby #endif
755a194255dSDaniel Boulby #endif
756a194255dSDaniel Boulby
757a194255dSDaniel Boulby /* Initialize the CRC for each braid. */
758a194255dSDaniel Boulby crc0 = crc;
759a194255dSDaniel Boulby #if N > 1
760a194255dSDaniel Boulby crc1 = 0;
761a194255dSDaniel Boulby #if N > 2
762a194255dSDaniel Boulby crc2 = 0;
763a194255dSDaniel Boulby #if N > 3
764a194255dSDaniel Boulby crc3 = 0;
765a194255dSDaniel Boulby #if N > 4
766a194255dSDaniel Boulby crc4 = 0;
767a194255dSDaniel Boulby #if N > 5
768a194255dSDaniel Boulby crc5 = 0;
769a194255dSDaniel Boulby #endif
770a194255dSDaniel Boulby #endif
771a194255dSDaniel Boulby #endif
772a194255dSDaniel Boulby #endif
773a194255dSDaniel Boulby #endif
774a194255dSDaniel Boulby
775a194255dSDaniel Boulby /*
776a194255dSDaniel Boulby Process the first blks-1 blocks, computing the CRCs on each braid
777a194255dSDaniel Boulby independently.
778a194255dSDaniel Boulby */
779a194255dSDaniel Boulby while (--blks) {
780a194255dSDaniel Boulby /* Load the word for each braid into registers. */
781a194255dSDaniel Boulby word0 = crc0 ^ words[0];
782a194255dSDaniel Boulby #if N > 1
783a194255dSDaniel Boulby word1 = crc1 ^ words[1];
784a194255dSDaniel Boulby #if N > 2
785a194255dSDaniel Boulby word2 = crc2 ^ words[2];
786a194255dSDaniel Boulby #if N > 3
787a194255dSDaniel Boulby word3 = crc3 ^ words[3];
788a194255dSDaniel Boulby #if N > 4
789a194255dSDaniel Boulby word4 = crc4 ^ words[4];
790a194255dSDaniel Boulby #if N > 5
791a194255dSDaniel Boulby word5 = crc5 ^ words[5];
792a194255dSDaniel Boulby #endif
793a194255dSDaniel Boulby #endif
794a194255dSDaniel Boulby #endif
795a194255dSDaniel Boulby #endif
796a194255dSDaniel Boulby #endif
797a194255dSDaniel Boulby words += N;
798a194255dSDaniel Boulby
799a194255dSDaniel Boulby /* Compute and update the CRC for each word. The loop should
800a194255dSDaniel Boulby get unrolled. */
801a194255dSDaniel Boulby crc0 = crc_braid_table[0][word0 & 0xff];
802a194255dSDaniel Boulby #if N > 1
803a194255dSDaniel Boulby crc1 = crc_braid_table[0][word1 & 0xff];
804a194255dSDaniel Boulby #if N > 2
805a194255dSDaniel Boulby crc2 = crc_braid_table[0][word2 & 0xff];
806a194255dSDaniel Boulby #if N > 3
807a194255dSDaniel Boulby crc3 = crc_braid_table[0][word3 & 0xff];
808a194255dSDaniel Boulby #if N > 4
809a194255dSDaniel Boulby crc4 = crc_braid_table[0][word4 & 0xff];
810a194255dSDaniel Boulby #if N > 5
811a194255dSDaniel Boulby crc5 = crc_braid_table[0][word5 & 0xff];
812a194255dSDaniel Boulby #endif
813a194255dSDaniel Boulby #endif
814a194255dSDaniel Boulby #endif
815a194255dSDaniel Boulby #endif
816a194255dSDaniel Boulby #endif
817a194255dSDaniel Boulby for (k = 1; k < W; k++) {
818a194255dSDaniel Boulby crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
819a194255dSDaniel Boulby #if N > 1
820a194255dSDaniel Boulby crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
821a194255dSDaniel Boulby #if N > 2
822a194255dSDaniel Boulby crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
823a194255dSDaniel Boulby #if N > 3
824a194255dSDaniel Boulby crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
825a194255dSDaniel Boulby #if N > 4
826a194255dSDaniel Boulby crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
827a194255dSDaniel Boulby #if N > 5
828a194255dSDaniel Boulby crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
829a194255dSDaniel Boulby #endif
830a194255dSDaniel Boulby #endif
831a194255dSDaniel Boulby #endif
832a194255dSDaniel Boulby #endif
833a194255dSDaniel Boulby #endif
834221b1638SMasahiro Yamada }
835a194255dSDaniel Boulby }
836a194255dSDaniel Boulby
837a194255dSDaniel Boulby /*
838a194255dSDaniel Boulby Process the last block, combining the CRCs of the N braids at the
839a194255dSDaniel Boulby same time.
840a194255dSDaniel Boulby */
841a194255dSDaniel Boulby crc = crc_word(crc0 ^ words[0]);
842a194255dSDaniel Boulby #if N > 1
843a194255dSDaniel Boulby crc = crc_word(crc1 ^ words[1] ^ crc);
844a194255dSDaniel Boulby #if N > 2
845a194255dSDaniel Boulby crc = crc_word(crc2 ^ words[2] ^ crc);
846a194255dSDaniel Boulby #if N > 3
847a194255dSDaniel Boulby crc = crc_word(crc3 ^ words[3] ^ crc);
848a194255dSDaniel Boulby #if N > 4
849a194255dSDaniel Boulby crc = crc_word(crc4 ^ words[4] ^ crc);
850a194255dSDaniel Boulby #if N > 5
851a194255dSDaniel Boulby crc = crc_word(crc5 ^ words[5] ^ crc);
852a194255dSDaniel Boulby #endif
853a194255dSDaniel Boulby #endif
854a194255dSDaniel Boulby #endif
855a194255dSDaniel Boulby #endif
856a194255dSDaniel Boulby #endif
857a194255dSDaniel Boulby words += N;
858a194255dSDaniel Boulby }
859a194255dSDaniel Boulby else {
860a194255dSDaniel Boulby /* Big endian. */
861a194255dSDaniel Boulby
862a194255dSDaniel Boulby z_word_t crc0, word0, comb;
863a194255dSDaniel Boulby #if N > 1
864a194255dSDaniel Boulby z_word_t crc1, word1;
865a194255dSDaniel Boulby #if N > 2
866a194255dSDaniel Boulby z_word_t crc2, word2;
867a194255dSDaniel Boulby #if N > 3
868a194255dSDaniel Boulby z_word_t crc3, word3;
869a194255dSDaniel Boulby #if N > 4
870a194255dSDaniel Boulby z_word_t crc4, word4;
871a194255dSDaniel Boulby #if N > 5
872a194255dSDaniel Boulby z_word_t crc5, word5;
873a194255dSDaniel Boulby #endif
874a194255dSDaniel Boulby #endif
875a194255dSDaniel Boulby #endif
876a194255dSDaniel Boulby #endif
877a194255dSDaniel Boulby #endif
878a194255dSDaniel Boulby
879a194255dSDaniel Boulby /* Initialize the CRC for each braid. */
880a194255dSDaniel Boulby crc0 = byte_swap(crc);
881a194255dSDaniel Boulby #if N > 1
882a194255dSDaniel Boulby crc1 = 0;
883a194255dSDaniel Boulby #if N > 2
884a194255dSDaniel Boulby crc2 = 0;
885a194255dSDaniel Boulby #if N > 3
886a194255dSDaniel Boulby crc3 = 0;
887a194255dSDaniel Boulby #if N > 4
888a194255dSDaniel Boulby crc4 = 0;
889a194255dSDaniel Boulby #if N > 5
890a194255dSDaniel Boulby crc5 = 0;
891a194255dSDaniel Boulby #endif
892a194255dSDaniel Boulby #endif
893a194255dSDaniel Boulby #endif
894a194255dSDaniel Boulby #endif
895a194255dSDaniel Boulby #endif
896a194255dSDaniel Boulby
897a194255dSDaniel Boulby /*
898a194255dSDaniel Boulby Process the first blks-1 blocks, computing the CRCs on each braid
899a194255dSDaniel Boulby independently.
900a194255dSDaniel Boulby */
901a194255dSDaniel Boulby while (--blks) {
902a194255dSDaniel Boulby /* Load the word for each braid into registers. */
903a194255dSDaniel Boulby word0 = crc0 ^ words[0];
904a194255dSDaniel Boulby #if N > 1
905a194255dSDaniel Boulby word1 = crc1 ^ words[1];
906a194255dSDaniel Boulby #if N > 2
907a194255dSDaniel Boulby word2 = crc2 ^ words[2];
908a194255dSDaniel Boulby #if N > 3
909a194255dSDaniel Boulby word3 = crc3 ^ words[3];
910a194255dSDaniel Boulby #if N > 4
911a194255dSDaniel Boulby word4 = crc4 ^ words[4];
912a194255dSDaniel Boulby #if N > 5
913a194255dSDaniel Boulby word5 = crc5 ^ words[5];
914a194255dSDaniel Boulby #endif
915a194255dSDaniel Boulby #endif
916a194255dSDaniel Boulby #endif
917a194255dSDaniel Boulby #endif
918a194255dSDaniel Boulby #endif
919a194255dSDaniel Boulby words += N;
920a194255dSDaniel Boulby
921a194255dSDaniel Boulby /* Compute and update the CRC for each word. The loop should
922a194255dSDaniel Boulby get unrolled. */
923a194255dSDaniel Boulby crc0 = crc_braid_big_table[0][word0 & 0xff];
924a194255dSDaniel Boulby #if N > 1
925a194255dSDaniel Boulby crc1 = crc_braid_big_table[0][word1 & 0xff];
926a194255dSDaniel Boulby #if N > 2
927a194255dSDaniel Boulby crc2 = crc_braid_big_table[0][word2 & 0xff];
928a194255dSDaniel Boulby #if N > 3
929a194255dSDaniel Boulby crc3 = crc_braid_big_table[0][word3 & 0xff];
930a194255dSDaniel Boulby #if N > 4
931a194255dSDaniel Boulby crc4 = crc_braid_big_table[0][word4 & 0xff];
932a194255dSDaniel Boulby #if N > 5
933a194255dSDaniel Boulby crc5 = crc_braid_big_table[0][word5 & 0xff];
934a194255dSDaniel Boulby #endif
935a194255dSDaniel Boulby #endif
936a194255dSDaniel Boulby #endif
937a194255dSDaniel Boulby #endif
938a194255dSDaniel Boulby #endif
939a194255dSDaniel Boulby for (k = 1; k < W; k++) {
940a194255dSDaniel Boulby crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
941a194255dSDaniel Boulby #if N > 1
942a194255dSDaniel Boulby crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
943a194255dSDaniel Boulby #if N > 2
944a194255dSDaniel Boulby crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
945a194255dSDaniel Boulby #if N > 3
946a194255dSDaniel Boulby crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
947a194255dSDaniel Boulby #if N > 4
948a194255dSDaniel Boulby crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
949a194255dSDaniel Boulby #if N > 5
950a194255dSDaniel Boulby crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
951a194255dSDaniel Boulby #endif
952a194255dSDaniel Boulby #endif
953a194255dSDaniel Boulby #endif
954a194255dSDaniel Boulby #endif
955a194255dSDaniel Boulby #endif
956a194255dSDaniel Boulby }
957a194255dSDaniel Boulby }
958a194255dSDaniel Boulby
959a194255dSDaniel Boulby /*
960a194255dSDaniel Boulby Process the last block, combining the CRCs of the N braids at the
961a194255dSDaniel Boulby same time.
962a194255dSDaniel Boulby */
963a194255dSDaniel Boulby comb = crc_word_big(crc0 ^ words[0]);
964a194255dSDaniel Boulby #if N > 1
965a194255dSDaniel Boulby comb = crc_word_big(crc1 ^ words[1] ^ comb);
966a194255dSDaniel Boulby #if N > 2
967a194255dSDaniel Boulby comb = crc_word_big(crc2 ^ words[2] ^ comb);
968a194255dSDaniel Boulby #if N > 3
969a194255dSDaniel Boulby comb = crc_word_big(crc3 ^ words[3] ^ comb);
970a194255dSDaniel Boulby #if N > 4
971a194255dSDaniel Boulby comb = crc_word_big(crc4 ^ words[4] ^ comb);
972a194255dSDaniel Boulby #if N > 5
973a194255dSDaniel Boulby comb = crc_word_big(crc5 ^ words[5] ^ comb);
974a194255dSDaniel Boulby #endif
975a194255dSDaniel Boulby #endif
976a194255dSDaniel Boulby #endif
977a194255dSDaniel Boulby #endif
978a194255dSDaniel Boulby #endif
979a194255dSDaniel Boulby words += N;
980a194255dSDaniel Boulby crc = byte_swap(comb);
981a194255dSDaniel Boulby }
982a194255dSDaniel Boulby
983a194255dSDaniel Boulby /*
984a194255dSDaniel Boulby Update the pointer to the remaining bytes to process.
985a194255dSDaniel Boulby */
986a194255dSDaniel Boulby buf = (unsigned char const *)words;
987a194255dSDaniel Boulby }
988a194255dSDaniel Boulby
989a194255dSDaniel Boulby #endif /* W */
990a194255dSDaniel Boulby
991a194255dSDaniel Boulby /* Complete the computation of the CRC on any remaining bytes. */
992221b1638SMasahiro Yamada while (len >= 8) {
993221b1638SMasahiro Yamada len -= 8;
994a194255dSDaniel Boulby crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
995a194255dSDaniel Boulby crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
996a194255dSDaniel Boulby crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
997a194255dSDaniel Boulby crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
998a194255dSDaniel Boulby crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
999a194255dSDaniel Boulby crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1000a194255dSDaniel Boulby crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1001a194255dSDaniel Boulby crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1002221b1638SMasahiro Yamada }
1003a194255dSDaniel Boulby while (len) {
1004a194255dSDaniel Boulby len--;
1005a194255dSDaniel Boulby crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1006221b1638SMasahiro Yamada }
1007221b1638SMasahiro Yamada
1008a194255dSDaniel Boulby /* Return the CRC, post-conditioned. */
1009a194255dSDaniel Boulby return crc ^ 0xffffffff;
1010a194255dSDaniel Boulby }
1011a194255dSDaniel Boulby
1012a194255dSDaniel Boulby #endif
1013a194255dSDaniel Boulby
1014221b1638SMasahiro Yamada /* ========================================================================= */
crc32(unsigned long crc,const unsigned char FAR * buf,uInt len)1015*fd39217aSManish Pandey unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf,
1016*fd39217aSManish Pandey uInt len) {
1017221b1638SMasahiro Yamada return crc32_z(crc, buf, len);
1018221b1638SMasahiro Yamada }
1019221b1638SMasahiro Yamada
1020221b1638SMasahiro Yamada /* ========================================================================= */
crc32_combine64(uLong crc1,uLong crc2,z_off64_t len2)1021*fd39217aSManish Pandey uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) {
1022a194255dSDaniel Boulby #ifdef DYNAMIC_CRC_TABLE
1023a194255dSDaniel Boulby once(&made, make_crc_table);
1024a194255dSDaniel Boulby #endif /* DYNAMIC_CRC_TABLE */
1025a194255dSDaniel Boulby return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
1026221b1638SMasahiro Yamada }
1027221b1638SMasahiro Yamada
1028221b1638SMasahiro Yamada /* ========================================================================= */
crc32_combine(uLong crc1,uLong crc2,z_off_t len2)1029*fd39217aSManish Pandey uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) {
1030a194255dSDaniel Boulby return crc32_combine64(crc1, crc2, (z_off64_t)len2);
1031221b1638SMasahiro Yamada }
1032221b1638SMasahiro Yamada
1033a194255dSDaniel Boulby /* ========================================================================= */
crc32_combine_gen64(z_off64_t len2)1034*fd39217aSManish Pandey uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) {
1035a194255dSDaniel Boulby #ifdef DYNAMIC_CRC_TABLE
1036a194255dSDaniel Boulby once(&made, make_crc_table);
1037a194255dSDaniel Boulby #endif /* DYNAMIC_CRC_TABLE */
1038a194255dSDaniel Boulby return x2nmodp(len2, 3);
1039a194255dSDaniel Boulby }
1040a194255dSDaniel Boulby
1041a194255dSDaniel Boulby /* ========================================================================= */
crc32_combine_gen(z_off_t len2)1042*fd39217aSManish Pandey uLong ZEXPORT crc32_combine_gen(z_off_t len2) {
1043a194255dSDaniel Boulby return crc32_combine_gen64((z_off64_t)len2);
1044a194255dSDaniel Boulby }
1045a194255dSDaniel Boulby
1046a194255dSDaniel Boulby /* ========================================================================= */
crc32_combine_op(uLong crc1,uLong crc2,uLong op)1047*fd39217aSManish Pandey uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) {
1048a194255dSDaniel Boulby return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
1049221b1638SMasahiro Yamada }
1050