1*f399d4a2SKyungmin Park /* 2*f399d4a2SKyungmin Park * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com> 3*f399d4a2SKyungmin Park * Nicer crc32 functions/docs submitted by linux@horizon.com. Thanks! 4*f399d4a2SKyungmin Park * Code was from the public domain, copyright abandoned. Code was 5*f399d4a2SKyungmin Park * subsequently included in the kernel, thus was re-licensed under the 6*f399d4a2SKyungmin Park * GNU GPL v2. 7*f399d4a2SKyungmin Park * 8*f399d4a2SKyungmin Park * Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com> 9*f399d4a2SKyungmin Park * Same crc32 function was used in 5 other places in the kernel. 10*f399d4a2SKyungmin Park * I made one version, and deleted the others. 11*f399d4a2SKyungmin Park * There are various incantations of crc32(). Some use a seed of 0 or ~0. 12*f399d4a2SKyungmin Park * Some xor at the end with ~0. The generic crc32() function takes 13*f399d4a2SKyungmin Park * seed as an argument, and doesn't xor at the end. Then individual 14*f399d4a2SKyungmin Park * users can do whatever they need. 15*f399d4a2SKyungmin Park * drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0. 16*f399d4a2SKyungmin Park * fs/jffs2 uses seed 0, doesn't xor with ~0. 17*f399d4a2SKyungmin Park * fs/partitions/efi.c uses seed ~0, xor's with ~0. 18*f399d4a2SKyungmin Park * 19*f399d4a2SKyungmin Park * This source code is licensed under the GNU General Public License, 20*f399d4a2SKyungmin Park * Version 2. See the file COPYING for more details. 21*f399d4a2SKyungmin Park */ 22*f399d4a2SKyungmin Park 23*f399d4a2SKyungmin Park #ifdef UBI_LINUX 24*f399d4a2SKyungmin Park #include <linux/crc32.h> 25*f399d4a2SKyungmin Park #include <linux/kernel.h> 26*f399d4a2SKyungmin Park #include <linux/module.h> 27*f399d4a2SKyungmin Park #include <linux/compiler.h> 28*f399d4a2SKyungmin Park #endif 29*f399d4a2SKyungmin Park #include <linux/types.h> 30*f399d4a2SKyungmin Park 31*f399d4a2SKyungmin Park #include <asm/byteorder.h> 32*f399d4a2SKyungmin Park 33*f399d4a2SKyungmin Park #ifdef UBI_LINUX 34*f399d4a2SKyungmin Park #include <linux/slab.h> 35*f399d4a2SKyungmin Park #include <linux/init.h> 36*f399d4a2SKyungmin Park #include <asm/atomic.h> 37*f399d4a2SKyungmin Park #endif 38*f399d4a2SKyungmin Park #include "crc32defs.h" 39*f399d4a2SKyungmin Park #define CRC_LE_BITS 8 40*f399d4a2SKyungmin Park 41*f399d4a2SKyungmin Park # define __force 42*f399d4a2SKyungmin Park #ifndef __constant_cpu_to_le32 43*f399d4a2SKyungmin Park #define __constant_cpu_to_le32(x) ((__force __le32)(__u32)(x)) 44*f399d4a2SKyungmin Park #endif 45*f399d4a2SKyungmin Park #ifndef __constant_le32_to_cpu 46*f399d4a2SKyungmin Park #define __constant_le32_to_cpu(x) ((__force __u32)(__le32)(x)) 47*f399d4a2SKyungmin Park #endif 48*f399d4a2SKyungmin Park 49*f399d4a2SKyungmin Park #if CRC_LE_BITS == 8 50*f399d4a2SKyungmin Park #define tole(x) __constant_cpu_to_le32(x) 51*f399d4a2SKyungmin Park #define tobe(x) __constant_cpu_to_be32(x) 52*f399d4a2SKyungmin Park #else 53*f399d4a2SKyungmin Park #define tole(x) (x) 54*f399d4a2SKyungmin Park #define tobe(x) (x) 55*f399d4a2SKyungmin Park #endif 56*f399d4a2SKyungmin Park #include "crc32table.h" 57*f399d4a2SKyungmin Park #ifdef UBI_LINUX 58*f399d4a2SKyungmin Park MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>"); 59*f399d4a2SKyungmin Park MODULE_DESCRIPTION("Ethernet CRC32 calculations"); 60*f399d4a2SKyungmin Park MODULE_LICENSE("GPL"); 61*f399d4a2SKyungmin Park #endif 62*f399d4a2SKyungmin Park /** 63*f399d4a2SKyungmin Park * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32 64*f399d4a2SKyungmin Park * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for 65*f399d4a2SKyungmin Park * other uses, or the previous crc32 value if computing incrementally. 66*f399d4a2SKyungmin Park * @p: pointer to buffer over which CRC is run 67*f399d4a2SKyungmin Park * @len: length of buffer @p 68*f399d4a2SKyungmin Park */ 69*f399d4a2SKyungmin Park u32 crc32_le(u32 crc, unsigned char const *p, size_t len); 70*f399d4a2SKyungmin Park 71*f399d4a2SKyungmin Park #if CRC_LE_BITS == 1 72*f399d4a2SKyungmin Park /* 73*f399d4a2SKyungmin Park * In fact, the table-based code will work in this case, but it can be 74*f399d4a2SKyungmin Park * simplified by inlining the table in ?: form. 75*f399d4a2SKyungmin Park */ 76*f399d4a2SKyungmin Park 77*f399d4a2SKyungmin Park u32 crc32_le(u32 crc, unsigned char const *p, size_t len) 78*f399d4a2SKyungmin Park { 79*f399d4a2SKyungmin Park int i; 80*f399d4a2SKyungmin Park while (len--) { 81*f399d4a2SKyungmin Park crc ^= *p++; 82*f399d4a2SKyungmin Park for (i = 0; i < 8; i++) 83*f399d4a2SKyungmin Park crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); 84*f399d4a2SKyungmin Park } 85*f399d4a2SKyungmin Park return crc; 86*f399d4a2SKyungmin Park } 87*f399d4a2SKyungmin Park #else /* Table-based approach */ 88*f399d4a2SKyungmin Park 89*f399d4a2SKyungmin Park u32 crc32_le(u32 crc, unsigned char const *p, size_t len) 90*f399d4a2SKyungmin Park { 91*f399d4a2SKyungmin Park # if CRC_LE_BITS == 8 92*f399d4a2SKyungmin Park const u32 *b =(u32 *)p; 93*f399d4a2SKyungmin Park const u32 *tab = crc32table_le; 94*f399d4a2SKyungmin Park 95*f399d4a2SKyungmin Park # ifdef __LITTLE_ENDIAN 96*f399d4a2SKyungmin Park # define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8) 97*f399d4a2SKyungmin Park # else 98*f399d4a2SKyungmin Park # define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8) 99*f399d4a2SKyungmin Park # endif 100*f399d4a2SKyungmin Park //printf("Crc32_le crc=%x\n",crc); 101*f399d4a2SKyungmin Park crc = __cpu_to_le32(crc); 102*f399d4a2SKyungmin Park /* Align it */ 103*f399d4a2SKyungmin Park if((((long)b)&3 && len)){ 104*f399d4a2SKyungmin Park do { 105*f399d4a2SKyungmin Park u8 *p = (u8 *)b; 106*f399d4a2SKyungmin Park DO_CRC(*p++); 107*f399d4a2SKyungmin Park b = (void *)p; 108*f399d4a2SKyungmin Park } while ((--len) && ((long)b)&3 ); 109*f399d4a2SKyungmin Park } 110*f399d4a2SKyungmin Park if((len >= 4)){ 111*f399d4a2SKyungmin Park /* load data 32 bits wide, xor data 32 bits wide. */ 112*f399d4a2SKyungmin Park size_t save_len = len & 3; 113*f399d4a2SKyungmin Park len = len >> 2; 114*f399d4a2SKyungmin Park --b; /* use pre increment below(*++b) for speed */ 115*f399d4a2SKyungmin Park do { 116*f399d4a2SKyungmin Park crc ^= *++b; 117*f399d4a2SKyungmin Park DO_CRC(0); 118*f399d4a2SKyungmin Park DO_CRC(0); 119*f399d4a2SKyungmin Park DO_CRC(0); 120*f399d4a2SKyungmin Park DO_CRC(0); 121*f399d4a2SKyungmin Park } while (--len); 122*f399d4a2SKyungmin Park b++; /* point to next byte(s) */ 123*f399d4a2SKyungmin Park len = save_len; 124*f399d4a2SKyungmin Park } 125*f399d4a2SKyungmin Park /* And the last few bytes */ 126*f399d4a2SKyungmin Park if(len){ 127*f399d4a2SKyungmin Park do { 128*f399d4a2SKyungmin Park u8 *p = (u8 *)b; 129*f399d4a2SKyungmin Park DO_CRC(*p++); 130*f399d4a2SKyungmin Park b = (void *)p; 131*f399d4a2SKyungmin Park } while (--len); 132*f399d4a2SKyungmin Park } 133*f399d4a2SKyungmin Park 134*f399d4a2SKyungmin Park return __le32_to_cpu(crc); 135*f399d4a2SKyungmin Park #undef ENDIAN_SHIFT 136*f399d4a2SKyungmin Park #undef DO_CRC 137*f399d4a2SKyungmin Park 138*f399d4a2SKyungmin Park # elif CRC_LE_BITS == 4 139*f399d4a2SKyungmin Park while (len--) { 140*f399d4a2SKyungmin Park crc ^= *p++; 141*f399d4a2SKyungmin Park crc = (crc >> 4) ^ crc32table_le[crc & 15]; 142*f399d4a2SKyungmin Park crc = (crc >> 4) ^ crc32table_le[crc & 15]; 143*f399d4a2SKyungmin Park } 144*f399d4a2SKyungmin Park return crc; 145*f399d4a2SKyungmin Park # elif CRC_LE_BITS == 2 146*f399d4a2SKyungmin Park while (len--) { 147*f399d4a2SKyungmin Park crc ^= *p++; 148*f399d4a2SKyungmin Park crc = (crc >> 2) ^ crc32table_le[crc & 3]; 149*f399d4a2SKyungmin Park crc = (crc >> 2) ^ crc32table_le[crc & 3]; 150*f399d4a2SKyungmin Park crc = (crc >> 2) ^ crc32table_le[crc & 3]; 151*f399d4a2SKyungmin Park crc = (crc >> 2) ^ crc32table_le[crc & 3]; 152*f399d4a2SKyungmin Park } 153*f399d4a2SKyungmin Park return crc; 154*f399d4a2SKyungmin Park # endif 155*f399d4a2SKyungmin Park } 156*f399d4a2SKyungmin Park #endif 157*f399d4a2SKyungmin Park #ifdef UBI_LINUX 158*f399d4a2SKyungmin Park /** 159*f399d4a2SKyungmin Park * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 160*f399d4a2SKyungmin Park * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for 161*f399d4a2SKyungmin Park * other uses, or the previous crc32 value if computing incrementally. 162*f399d4a2SKyungmin Park * @p: pointer to buffer over which CRC is run 163*f399d4a2SKyungmin Park * @len: length of buffer @p 164*f399d4a2SKyungmin Park */ 165*f399d4a2SKyungmin Park u32 __attribute_pure__ crc32_be(u32 crc, unsigned char const *p, size_t len); 166*f399d4a2SKyungmin Park 167*f399d4a2SKyungmin Park #if CRC_BE_BITS == 1 168*f399d4a2SKyungmin Park /* 169*f399d4a2SKyungmin Park * In fact, the table-based code will work in this case, but it can be 170*f399d4a2SKyungmin Park * simplified by inlining the table in ?: form. 171*f399d4a2SKyungmin Park */ 172*f399d4a2SKyungmin Park 173*f399d4a2SKyungmin Park u32 __attribute_pure__ crc32_be(u32 crc, unsigned char const *p, size_t len) 174*f399d4a2SKyungmin Park { 175*f399d4a2SKyungmin Park int i; 176*f399d4a2SKyungmin Park while (len--) { 177*f399d4a2SKyungmin Park crc ^= *p++ << 24; 178*f399d4a2SKyungmin Park for (i = 0; i < 8; i++) 179*f399d4a2SKyungmin Park crc = 180*f399d4a2SKyungmin Park (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 181*f399d4a2SKyungmin Park 0); 182*f399d4a2SKyungmin Park } 183*f399d4a2SKyungmin Park return crc; 184*f399d4a2SKyungmin Park } 185*f399d4a2SKyungmin Park 186*f399d4a2SKyungmin Park #else /* Table-based approach */ 187*f399d4a2SKyungmin Park u32 __attribute_pure__ crc32_be(u32 crc, unsigned char const *p, size_t len) 188*f399d4a2SKyungmin Park { 189*f399d4a2SKyungmin Park # if CRC_BE_BITS == 8 190*f399d4a2SKyungmin Park const u32 *b =(u32 *)p; 191*f399d4a2SKyungmin Park const u32 *tab = crc32table_be; 192*f399d4a2SKyungmin Park 193*f399d4a2SKyungmin Park # ifdef __LITTLE_ENDIAN 194*f399d4a2SKyungmin Park # define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8) 195*f399d4a2SKyungmin Park # else 196*f399d4a2SKyungmin Park # define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8) 197*f399d4a2SKyungmin Park # endif 198*f399d4a2SKyungmin Park 199*f399d4a2SKyungmin Park crc = __cpu_to_be32(crc); 200*f399d4a2SKyungmin Park /* Align it */ 201*f399d4a2SKyungmin Park if(unlikely(((long)b)&3 && len)){ 202*f399d4a2SKyungmin Park do { 203*f399d4a2SKyungmin Park u8 *p = (u8 *)b; 204*f399d4a2SKyungmin Park DO_CRC(*p++); 205*f399d4a2SKyungmin Park b = (u32 *)p; 206*f399d4a2SKyungmin Park } while ((--len) && ((long)b)&3 ); 207*f399d4a2SKyungmin Park } 208*f399d4a2SKyungmin Park if(likely(len >= 4)){ 209*f399d4a2SKyungmin Park /* load data 32 bits wide, xor data 32 bits wide. */ 210*f399d4a2SKyungmin Park size_t save_len = len & 3; 211*f399d4a2SKyungmin Park len = len >> 2; 212*f399d4a2SKyungmin Park --b; /* use pre increment below(*++b) for speed */ 213*f399d4a2SKyungmin Park do { 214*f399d4a2SKyungmin Park crc ^= *++b; 215*f399d4a2SKyungmin Park DO_CRC(0); 216*f399d4a2SKyungmin Park DO_CRC(0); 217*f399d4a2SKyungmin Park DO_CRC(0); 218*f399d4a2SKyungmin Park DO_CRC(0); 219*f399d4a2SKyungmin Park } while (--len); 220*f399d4a2SKyungmin Park b++; /* point to next byte(s) */ 221*f399d4a2SKyungmin Park len = save_len; 222*f399d4a2SKyungmin Park } 223*f399d4a2SKyungmin Park /* And the last few bytes */ 224*f399d4a2SKyungmin Park if(len){ 225*f399d4a2SKyungmin Park do { 226*f399d4a2SKyungmin Park u8 *p = (u8 *)b; 227*f399d4a2SKyungmin Park DO_CRC(*p++); 228*f399d4a2SKyungmin Park b = (void *)p; 229*f399d4a2SKyungmin Park } while (--len); 230*f399d4a2SKyungmin Park } 231*f399d4a2SKyungmin Park return __be32_to_cpu(crc); 232*f399d4a2SKyungmin Park #undef ENDIAN_SHIFT 233*f399d4a2SKyungmin Park #undef DO_CRC 234*f399d4a2SKyungmin Park 235*f399d4a2SKyungmin Park # elif CRC_BE_BITS == 4 236*f399d4a2SKyungmin Park while (len--) { 237*f399d4a2SKyungmin Park crc ^= *p++ << 24; 238*f399d4a2SKyungmin Park crc = (crc << 4) ^ crc32table_be[crc >> 28]; 239*f399d4a2SKyungmin Park crc = (crc << 4) ^ crc32table_be[crc >> 28]; 240*f399d4a2SKyungmin Park } 241*f399d4a2SKyungmin Park return crc; 242*f399d4a2SKyungmin Park # elif CRC_BE_BITS == 2 243*f399d4a2SKyungmin Park while (len--) { 244*f399d4a2SKyungmin Park crc ^= *p++ << 24; 245*f399d4a2SKyungmin Park crc = (crc << 2) ^ crc32table_be[crc >> 30]; 246*f399d4a2SKyungmin Park crc = (crc << 2) ^ crc32table_be[crc >> 30]; 247*f399d4a2SKyungmin Park crc = (crc << 2) ^ crc32table_be[crc >> 30]; 248*f399d4a2SKyungmin Park crc = (crc << 2) ^ crc32table_be[crc >> 30]; 249*f399d4a2SKyungmin Park } 250*f399d4a2SKyungmin Park return crc; 251*f399d4a2SKyungmin Park # endif 252*f399d4a2SKyungmin Park } 253*f399d4a2SKyungmin Park #endif 254*f399d4a2SKyungmin Park 255*f399d4a2SKyungmin Park EXPORT_SYMBOL(crc32_le); 256*f399d4a2SKyungmin Park EXPORT_SYMBOL(crc32_be); 257*f399d4a2SKyungmin Park #endif 258*f399d4a2SKyungmin Park /* 259*f399d4a2SKyungmin Park * A brief CRC tutorial. 260*f399d4a2SKyungmin Park * 261*f399d4a2SKyungmin Park * A CRC is a long-division remainder. You add the CRC to the message, 262*f399d4a2SKyungmin Park * and the whole thing (message+CRC) is a multiple of the given 263*f399d4a2SKyungmin Park * CRC polynomial. To check the CRC, you can either check that the 264*f399d4a2SKyungmin Park * CRC matches the recomputed value, *or* you can check that the 265*f399d4a2SKyungmin Park * remainder computed on the message+CRC is 0. This latter approach 266*f399d4a2SKyungmin Park * is used by a lot of hardware implementations, and is why so many 267*f399d4a2SKyungmin Park * protocols put the end-of-frame flag after the CRC. 268*f399d4a2SKyungmin Park * 269*f399d4a2SKyungmin Park * It's actually the same long division you learned in school, except that 270*f399d4a2SKyungmin Park * - We're working in binary, so the digits are only 0 and 1, and 271*f399d4a2SKyungmin Park * - When dividing polynomials, there are no carries. Rather than add and 272*f399d4a2SKyungmin Park * subtract, we just xor. Thus, we tend to get a bit sloppy about 273*f399d4a2SKyungmin Park * the difference between adding and subtracting. 274*f399d4a2SKyungmin Park * 275*f399d4a2SKyungmin Park * A 32-bit CRC polynomial is actually 33 bits long. But since it's 276*f399d4a2SKyungmin Park * 33 bits long, bit 32 is always going to be set, so usually the CRC 277*f399d4a2SKyungmin Park * is written in hex with the most significant bit omitted. (If you're 278*f399d4a2SKyungmin Park * familiar with the IEEE 754 floating-point format, it's the same idea.) 279*f399d4a2SKyungmin Park * 280*f399d4a2SKyungmin Park * Note that a CRC is computed over a string of *bits*, so you have 281*f399d4a2SKyungmin Park * to decide on the endianness of the bits within each byte. To get 282*f399d4a2SKyungmin Park * the best error-detecting properties, this should correspond to the 283*f399d4a2SKyungmin Park * order they're actually sent. For example, standard RS-232 serial is 284*f399d4a2SKyungmin Park * little-endian; the most significant bit (sometimes used for parity) 285*f399d4a2SKyungmin Park * is sent last. And when appending a CRC word to a message, you should 286*f399d4a2SKyungmin Park * do it in the right order, matching the endianness. 287*f399d4a2SKyungmin Park * 288*f399d4a2SKyungmin Park * Just like with ordinary division, the remainder is always smaller than 289*f399d4a2SKyungmin Park * the divisor (the CRC polynomial) you're dividing by. Each step of the 290*f399d4a2SKyungmin Park * division, you take one more digit (bit) of the dividend and append it 291*f399d4a2SKyungmin Park * to the current remainder. Then you figure out the appropriate multiple 292*f399d4a2SKyungmin Park * of the divisor to subtract to being the remainder back into range. 293*f399d4a2SKyungmin Park * In binary, it's easy - it has to be either 0 or 1, and to make the 294*f399d4a2SKyungmin Park * XOR cancel, it's just a copy of bit 32 of the remainder. 295*f399d4a2SKyungmin Park * 296*f399d4a2SKyungmin Park * When computing a CRC, we don't care about the quotient, so we can 297*f399d4a2SKyungmin Park * throw the quotient bit away, but subtract the appropriate multiple of 298*f399d4a2SKyungmin Park * the polynomial from the remainder and we're back to where we started, 299*f399d4a2SKyungmin Park * ready to process the next bit. 300*f399d4a2SKyungmin Park * 301*f399d4a2SKyungmin Park * A big-endian CRC written this way would be coded like: 302*f399d4a2SKyungmin Park * for (i = 0; i < input_bits; i++) { 303*f399d4a2SKyungmin Park * multiple = remainder & 0x80000000 ? CRCPOLY : 0; 304*f399d4a2SKyungmin Park * remainder = (remainder << 1 | next_input_bit()) ^ multiple; 305*f399d4a2SKyungmin Park * } 306*f399d4a2SKyungmin Park * Notice how, to get at bit 32 of the shifted remainder, we look 307*f399d4a2SKyungmin Park * at bit 31 of the remainder *before* shifting it. 308*f399d4a2SKyungmin Park * 309*f399d4a2SKyungmin Park * But also notice how the next_input_bit() bits we're shifting into 310*f399d4a2SKyungmin Park * the remainder don't actually affect any decision-making until 311*f399d4a2SKyungmin Park * 32 bits later. Thus, the first 32 cycles of this are pretty boring. 312*f399d4a2SKyungmin Park * Also, to add the CRC to a message, we need a 32-bit-long hole for it at 313*f399d4a2SKyungmin Park * the end, so we have to add 32 extra cycles shifting in zeros at the 314*f399d4a2SKyungmin Park * end of every message, 315*f399d4a2SKyungmin Park * 316*f399d4a2SKyungmin Park * So the standard trick is to rearrage merging in the next_input_bit() 317*f399d4a2SKyungmin Park * until the moment it's needed. Then the first 32 cycles can be precomputed, 318*f399d4a2SKyungmin Park * and merging in the final 32 zero bits to make room for the CRC can be 319*f399d4a2SKyungmin Park * skipped entirely. 320*f399d4a2SKyungmin Park * This changes the code to: 321*f399d4a2SKyungmin Park * for (i = 0; i < input_bits; i++) { 322*f399d4a2SKyungmin Park * remainder ^= next_input_bit() << 31; 323*f399d4a2SKyungmin Park * multiple = (remainder & 0x80000000) ? CRCPOLY : 0; 324*f399d4a2SKyungmin Park * remainder = (remainder << 1) ^ multiple; 325*f399d4a2SKyungmin Park * } 326*f399d4a2SKyungmin Park * With this optimization, the little-endian code is simpler: 327*f399d4a2SKyungmin Park * for (i = 0; i < input_bits; i++) { 328*f399d4a2SKyungmin Park * remainder ^= next_input_bit(); 329*f399d4a2SKyungmin Park * multiple = (remainder & 1) ? CRCPOLY : 0; 330*f399d4a2SKyungmin Park * remainder = (remainder >> 1) ^ multiple; 331*f399d4a2SKyungmin Park * } 332*f399d4a2SKyungmin Park * 333*f399d4a2SKyungmin Park * Note that the other details of endianness have been hidden in CRCPOLY 334*f399d4a2SKyungmin Park * (which must be bit-reversed) and next_input_bit(). 335*f399d4a2SKyungmin Park * 336*f399d4a2SKyungmin Park * However, as long as next_input_bit is returning the bits in a sensible 337*f399d4a2SKyungmin Park * order, we can actually do the merging 8 or more bits at a time rather 338*f399d4a2SKyungmin Park * than one bit at a time: 339*f399d4a2SKyungmin Park * for (i = 0; i < input_bytes; i++) { 340*f399d4a2SKyungmin Park * remainder ^= next_input_byte() << 24; 341*f399d4a2SKyungmin Park * for (j = 0; j < 8; j++) { 342*f399d4a2SKyungmin Park * multiple = (remainder & 0x80000000) ? CRCPOLY : 0; 343*f399d4a2SKyungmin Park * remainder = (remainder << 1) ^ multiple; 344*f399d4a2SKyungmin Park * } 345*f399d4a2SKyungmin Park * } 346*f399d4a2SKyungmin Park * Or in little-endian: 347*f399d4a2SKyungmin Park * for (i = 0; i < input_bytes; i++) { 348*f399d4a2SKyungmin Park * remainder ^= next_input_byte(); 349*f399d4a2SKyungmin Park * for (j = 0; j < 8; j++) { 350*f399d4a2SKyungmin Park * multiple = (remainder & 1) ? CRCPOLY : 0; 351*f399d4a2SKyungmin Park * remainder = (remainder << 1) ^ multiple; 352*f399d4a2SKyungmin Park * } 353*f399d4a2SKyungmin Park * } 354*f399d4a2SKyungmin Park * If the input is a multiple of 32 bits, you can even XOR in a 32-bit 355*f399d4a2SKyungmin Park * word at a time and increase the inner loop count to 32. 356*f399d4a2SKyungmin Park * 357*f399d4a2SKyungmin Park * You can also mix and match the two loop styles, for example doing the 358*f399d4a2SKyungmin Park * bulk of a message byte-at-a-time and adding bit-at-a-time processing 359*f399d4a2SKyungmin Park * for any fractional bytes at the end. 360*f399d4a2SKyungmin Park * 361*f399d4a2SKyungmin Park * The only remaining optimization is to the byte-at-a-time table method. 362*f399d4a2SKyungmin Park * Here, rather than just shifting one bit of the remainder to decide 363*f399d4a2SKyungmin Park * in the correct multiple to subtract, we can shift a byte at a time. 364*f399d4a2SKyungmin Park * This produces a 40-bit (rather than a 33-bit) intermediate remainder, 365*f399d4a2SKyungmin Park * but again the multiple of the polynomial to subtract depends only on 366*f399d4a2SKyungmin Park * the high bits, the high 8 bits in this case. 367*f399d4a2SKyungmin Park * 368*f399d4a2SKyungmin Park * The multile we need in that case is the low 32 bits of a 40-bit 369*f399d4a2SKyungmin Park * value whose high 8 bits are given, and which is a multiple of the 370*f399d4a2SKyungmin Park * generator polynomial. This is simply the CRC-32 of the given 371*f399d4a2SKyungmin Park * one-byte message. 372*f399d4a2SKyungmin Park * 373*f399d4a2SKyungmin Park * Two more details: normally, appending zero bits to a message which 374*f399d4a2SKyungmin Park * is already a multiple of a polynomial produces a larger multiple of that 375*f399d4a2SKyungmin Park * polynomial. To enable a CRC to detect this condition, it's common to 376*f399d4a2SKyungmin Park * invert the CRC before appending it. This makes the remainder of the 377*f399d4a2SKyungmin Park * message+crc come out not as zero, but some fixed non-zero value. 378*f399d4a2SKyungmin Park * 379*f399d4a2SKyungmin Park * The same problem applies to zero bits prepended to the message, and 380*f399d4a2SKyungmin Park * a similar solution is used. Instead of starting with a remainder of 381*f399d4a2SKyungmin Park * 0, an initial remainder of all ones is used. As long as you start 382*f399d4a2SKyungmin Park * the same way on decoding, it doesn't make a difference. 383*f399d4a2SKyungmin Park */ 384*f399d4a2SKyungmin Park 385*f399d4a2SKyungmin Park #ifdef UNITTEST 386*f399d4a2SKyungmin Park 387*f399d4a2SKyungmin Park #include <stdlib.h> 388*f399d4a2SKyungmin Park #include <stdio.h> 389*f399d4a2SKyungmin Park 390*f399d4a2SKyungmin Park #ifdef UBI_LINUX /*Not used at present */ 391*f399d4a2SKyungmin Park static void 392*f399d4a2SKyungmin Park buf_dump(char const *prefix, unsigned char const *buf, size_t len) 393*f399d4a2SKyungmin Park { 394*f399d4a2SKyungmin Park fputs(prefix, stdout); 395*f399d4a2SKyungmin Park while (len--) 396*f399d4a2SKyungmin Park printf(" %02x", *buf++); 397*f399d4a2SKyungmin Park putchar('\n'); 398*f399d4a2SKyungmin Park 399*f399d4a2SKyungmin Park } 400*f399d4a2SKyungmin Park #endif 401*f399d4a2SKyungmin Park 402*f399d4a2SKyungmin Park static void bytereverse(unsigned char *buf, size_t len) 403*f399d4a2SKyungmin Park { 404*f399d4a2SKyungmin Park while (len--) { 405*f399d4a2SKyungmin Park unsigned char x = bitrev8(*buf); 406*f399d4a2SKyungmin Park *buf++ = x; 407*f399d4a2SKyungmin Park } 408*f399d4a2SKyungmin Park } 409*f399d4a2SKyungmin Park 410*f399d4a2SKyungmin Park static void random_garbage(unsigned char *buf, size_t len) 411*f399d4a2SKyungmin Park { 412*f399d4a2SKyungmin Park while (len--) 413*f399d4a2SKyungmin Park *buf++ = (unsigned char) random(); 414*f399d4a2SKyungmin Park } 415*f399d4a2SKyungmin Park 416*f399d4a2SKyungmin Park #ifdef UBI_LINUX /* Not used at present */ 417*f399d4a2SKyungmin Park static void store_le(u32 x, unsigned char *buf) 418*f399d4a2SKyungmin Park { 419*f399d4a2SKyungmin Park buf[0] = (unsigned char) x; 420*f399d4a2SKyungmin Park buf[1] = (unsigned char) (x >> 8); 421*f399d4a2SKyungmin Park buf[2] = (unsigned char) (x >> 16); 422*f399d4a2SKyungmin Park buf[3] = (unsigned char) (x >> 24); 423*f399d4a2SKyungmin Park } 424*f399d4a2SKyungmin Park #endif 425*f399d4a2SKyungmin Park 426*f399d4a2SKyungmin Park static void store_be(u32 x, unsigned char *buf) 427*f399d4a2SKyungmin Park { 428*f399d4a2SKyungmin Park buf[0] = (unsigned char) (x >> 24); 429*f399d4a2SKyungmin Park buf[1] = (unsigned char) (x >> 16); 430*f399d4a2SKyungmin Park buf[2] = (unsigned char) (x >> 8); 431*f399d4a2SKyungmin Park buf[3] = (unsigned char) x; 432*f399d4a2SKyungmin Park } 433*f399d4a2SKyungmin Park 434*f399d4a2SKyungmin Park /* 435*f399d4a2SKyungmin Park * This checks that CRC(buf + CRC(buf)) = 0, and that 436*f399d4a2SKyungmin Park * CRC commutes with bit-reversal. This has the side effect 437*f399d4a2SKyungmin Park * of bytewise bit-reversing the input buffer, and returns 438*f399d4a2SKyungmin Park * the CRC of the reversed buffer. 439*f399d4a2SKyungmin Park */ 440*f399d4a2SKyungmin Park static u32 test_step(u32 init, unsigned char *buf, size_t len) 441*f399d4a2SKyungmin Park { 442*f399d4a2SKyungmin Park u32 crc1, crc2; 443*f399d4a2SKyungmin Park size_t i; 444*f399d4a2SKyungmin Park 445*f399d4a2SKyungmin Park crc1 = crc32_be(init, buf, len); 446*f399d4a2SKyungmin Park store_be(crc1, buf + len); 447*f399d4a2SKyungmin Park crc2 = crc32_be(init, buf, len + 4); 448*f399d4a2SKyungmin Park if (crc2) 449*f399d4a2SKyungmin Park printf("\nCRC cancellation fail: 0x%08x should be 0\n", 450*f399d4a2SKyungmin Park crc2); 451*f399d4a2SKyungmin Park 452*f399d4a2SKyungmin Park for (i = 0; i <= len + 4; i++) { 453*f399d4a2SKyungmin Park crc2 = crc32_be(init, buf, i); 454*f399d4a2SKyungmin Park crc2 = crc32_be(crc2, buf + i, len + 4 - i); 455*f399d4a2SKyungmin Park if (crc2) 456*f399d4a2SKyungmin Park printf("\nCRC split fail: 0x%08x\n", crc2); 457*f399d4a2SKyungmin Park } 458*f399d4a2SKyungmin Park 459*f399d4a2SKyungmin Park /* Now swap it around for the other test */ 460*f399d4a2SKyungmin Park 461*f399d4a2SKyungmin Park bytereverse(buf, len + 4); 462*f399d4a2SKyungmin Park init = bitrev32(init); 463*f399d4a2SKyungmin Park crc2 = bitrev32(crc1); 464*f399d4a2SKyungmin Park if (crc1 != bitrev32(crc2)) 465*f399d4a2SKyungmin Park printf("\nBit reversal fail: 0x%08x -> 0x%08x -> 0x%08x\n", 466*f399d4a2SKyungmin Park crc1, crc2, bitrev32(crc2)); 467*f399d4a2SKyungmin Park crc1 = crc32_le(init, buf, len); 468*f399d4a2SKyungmin Park if (crc1 != crc2) 469*f399d4a2SKyungmin Park printf("\nCRC endianness fail: 0x%08x != 0x%08x\n", crc1, 470*f399d4a2SKyungmin Park crc2); 471*f399d4a2SKyungmin Park crc2 = crc32_le(init, buf, len + 4); 472*f399d4a2SKyungmin Park if (crc2) 473*f399d4a2SKyungmin Park printf("\nCRC cancellation fail: 0x%08x should be 0\n", 474*f399d4a2SKyungmin Park crc2); 475*f399d4a2SKyungmin Park 476*f399d4a2SKyungmin Park for (i = 0; i <= len + 4; i++) { 477*f399d4a2SKyungmin Park crc2 = crc32_le(init, buf, i); 478*f399d4a2SKyungmin Park crc2 = crc32_le(crc2, buf + i, len + 4 - i); 479*f399d4a2SKyungmin Park if (crc2) 480*f399d4a2SKyungmin Park printf("\nCRC split fail: 0x%08x\n", crc2); 481*f399d4a2SKyungmin Park } 482*f399d4a2SKyungmin Park 483*f399d4a2SKyungmin Park return crc1; 484*f399d4a2SKyungmin Park } 485*f399d4a2SKyungmin Park 486*f399d4a2SKyungmin Park #define SIZE 64 487*f399d4a2SKyungmin Park #define INIT1 0 488*f399d4a2SKyungmin Park #define INIT2 0 489*f399d4a2SKyungmin Park 490*f399d4a2SKyungmin Park int main(void) 491*f399d4a2SKyungmin Park { 492*f399d4a2SKyungmin Park unsigned char buf1[SIZE + 4]; 493*f399d4a2SKyungmin Park unsigned char buf2[SIZE + 4]; 494*f399d4a2SKyungmin Park unsigned char buf3[SIZE + 4]; 495*f399d4a2SKyungmin Park int i, j; 496*f399d4a2SKyungmin Park u32 crc1, crc2, crc3; 497*f399d4a2SKyungmin Park 498*f399d4a2SKyungmin Park for (i = 0; i <= SIZE; i++) { 499*f399d4a2SKyungmin Park printf("\rTesting length %d...", i); 500*f399d4a2SKyungmin Park fflush(stdout); 501*f399d4a2SKyungmin Park random_garbage(buf1, i); 502*f399d4a2SKyungmin Park random_garbage(buf2, i); 503*f399d4a2SKyungmin Park for (j = 0; j < i; j++) 504*f399d4a2SKyungmin Park buf3[j] = buf1[j] ^ buf2[j]; 505*f399d4a2SKyungmin Park 506*f399d4a2SKyungmin Park crc1 = test_step(INIT1, buf1, i); 507*f399d4a2SKyungmin Park crc2 = test_step(INIT2, buf2, i); 508*f399d4a2SKyungmin Park /* Now check that CRC(buf1 ^ buf2) = CRC(buf1) ^ CRC(buf2) */ 509*f399d4a2SKyungmin Park crc3 = test_step(INIT1 ^ INIT2, buf3, i); 510*f399d4a2SKyungmin Park if (crc3 != (crc1 ^ crc2)) 511*f399d4a2SKyungmin Park printf("CRC XOR fail: 0x%08x != 0x%08x ^ 0x%08x\n", 512*f399d4a2SKyungmin Park crc3, crc1, crc2); 513*f399d4a2SKyungmin Park } 514*f399d4a2SKyungmin Park printf("\nAll test complete. No failures expected.\n"); 515*f399d4a2SKyungmin Park return 0; 516*f399d4a2SKyungmin Park } 517*f399d4a2SKyungmin Park 518*f399d4a2SKyungmin Park #endif /* UNITTEST */ 519