1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * This file contains an ECC algorithm that detects and corrects 1 bit
4*4882a593Smuzhiyun * errors in a 256 byte block of data.
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * Copyright © 2008 Koninklijke Philips Electronics NV.
7*4882a593Smuzhiyun * Author: Frans Meulenbroeks
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * Completely replaces the previous ECC implementation which was written by:
10*4882a593Smuzhiyun * Steven J. Hill (sjhill@realitydiluted.com)
11*4882a593Smuzhiyun * Thomas Gleixner (tglx@linutronix.de)
12*4882a593Smuzhiyun *
13*4882a593Smuzhiyun * Information on how this algorithm works and how it was developed
14*4882a593Smuzhiyun * can be found in Documentation/driver-api/mtd/nand_ecc.rst
15*4882a593Smuzhiyun */
16*4882a593Smuzhiyun
17*4882a593Smuzhiyun #include <linux/types.h>
18*4882a593Smuzhiyun #include <linux/kernel.h>
19*4882a593Smuzhiyun #include <linux/module.h>
20*4882a593Smuzhiyun #include <linux/mtd/mtd.h>
21*4882a593Smuzhiyun #include <linux/mtd/rawnand.h>
22*4882a593Smuzhiyun #include <linux/mtd/nand_ecc.h>
23*4882a593Smuzhiyun #include <asm/byteorder.h>
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun /*
26*4882a593Smuzhiyun * invparity is a 256 byte table that contains the odd parity
27*4882a593Smuzhiyun * for each byte. So if the number of bits in a byte is even,
28*4882a593Smuzhiyun * the array element is 1, and when the number of bits is odd
29*4882a593Smuzhiyun * the array eleemnt is 0.
30*4882a593Smuzhiyun */
31*4882a593Smuzhiyun static const char invparity[256] = {
32*4882a593Smuzhiyun 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
33*4882a593Smuzhiyun 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
34*4882a593Smuzhiyun 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
35*4882a593Smuzhiyun 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
36*4882a593Smuzhiyun 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
37*4882a593Smuzhiyun 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
38*4882a593Smuzhiyun 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
39*4882a593Smuzhiyun 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
40*4882a593Smuzhiyun 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
41*4882a593Smuzhiyun 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
42*4882a593Smuzhiyun 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
43*4882a593Smuzhiyun 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
44*4882a593Smuzhiyun 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
45*4882a593Smuzhiyun 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
46*4882a593Smuzhiyun 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
47*4882a593Smuzhiyun 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
48*4882a593Smuzhiyun };
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun /*
51*4882a593Smuzhiyun * bitsperbyte contains the number of bits per byte
52*4882a593Smuzhiyun * this is only used for testing and repairing parity
53*4882a593Smuzhiyun * (a precalculated value slightly improves performance)
54*4882a593Smuzhiyun */
55*4882a593Smuzhiyun static const char bitsperbyte[256] = {
56*4882a593Smuzhiyun 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
57*4882a593Smuzhiyun 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
58*4882a593Smuzhiyun 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
59*4882a593Smuzhiyun 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
60*4882a593Smuzhiyun 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
61*4882a593Smuzhiyun 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
62*4882a593Smuzhiyun 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
63*4882a593Smuzhiyun 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
64*4882a593Smuzhiyun 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
65*4882a593Smuzhiyun 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
66*4882a593Smuzhiyun 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
67*4882a593Smuzhiyun 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
68*4882a593Smuzhiyun 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
69*4882a593Smuzhiyun 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
70*4882a593Smuzhiyun 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
71*4882a593Smuzhiyun 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
72*4882a593Smuzhiyun };
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun /*
75*4882a593Smuzhiyun * addressbits is a lookup table to filter out the bits from the xor-ed
76*4882a593Smuzhiyun * ECC data that identify the faulty location.
77*4882a593Smuzhiyun * this is only used for repairing parity
78*4882a593Smuzhiyun * see the comments in nand_correct_data for more details
79*4882a593Smuzhiyun */
80*4882a593Smuzhiyun static const char addressbits[256] = {
81*4882a593Smuzhiyun 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
82*4882a593Smuzhiyun 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
83*4882a593Smuzhiyun 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
84*4882a593Smuzhiyun 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
85*4882a593Smuzhiyun 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
86*4882a593Smuzhiyun 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
87*4882a593Smuzhiyun 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
88*4882a593Smuzhiyun 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
89*4882a593Smuzhiyun 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
90*4882a593Smuzhiyun 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
91*4882a593Smuzhiyun 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
92*4882a593Smuzhiyun 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
93*4882a593Smuzhiyun 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
94*4882a593Smuzhiyun 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
95*4882a593Smuzhiyun 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
96*4882a593Smuzhiyun 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
97*4882a593Smuzhiyun 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
98*4882a593Smuzhiyun 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
99*4882a593Smuzhiyun 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
100*4882a593Smuzhiyun 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
101*4882a593Smuzhiyun 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
102*4882a593Smuzhiyun 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
103*4882a593Smuzhiyun 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
104*4882a593Smuzhiyun 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
105*4882a593Smuzhiyun 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
106*4882a593Smuzhiyun 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
107*4882a593Smuzhiyun 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
108*4882a593Smuzhiyun 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
109*4882a593Smuzhiyun 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
110*4882a593Smuzhiyun 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
111*4882a593Smuzhiyun 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
112*4882a593Smuzhiyun 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f
113*4882a593Smuzhiyun };
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun /**
116*4882a593Smuzhiyun * __nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
117*4882a593Smuzhiyun * block
118*4882a593Smuzhiyun * @buf: input buffer with raw data
119*4882a593Smuzhiyun * @eccsize: data bytes per ECC step (256 or 512)
120*4882a593Smuzhiyun * @code: output buffer with ECC
121*4882a593Smuzhiyun * @sm_order: Smart Media byte ordering
122*4882a593Smuzhiyun */
__nand_calculate_ecc(const unsigned char * buf,unsigned int eccsize,unsigned char * code,bool sm_order)123*4882a593Smuzhiyun void __nand_calculate_ecc(const unsigned char *buf, unsigned int eccsize,
124*4882a593Smuzhiyun unsigned char *code, bool sm_order)
125*4882a593Smuzhiyun {
126*4882a593Smuzhiyun int i;
127*4882a593Smuzhiyun const uint32_t *bp = (uint32_t *)buf;
128*4882a593Smuzhiyun /* 256 or 512 bytes/ecc */
129*4882a593Smuzhiyun const uint32_t eccsize_mult = eccsize >> 8;
130*4882a593Smuzhiyun uint32_t cur; /* current value in buffer */
131*4882a593Smuzhiyun /* rp0..rp15..rp17 are the various accumulated parities (per byte) */
132*4882a593Smuzhiyun uint32_t rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
133*4882a593Smuzhiyun uint32_t rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15, rp16;
134*4882a593Smuzhiyun uint32_t rp17;
135*4882a593Smuzhiyun uint32_t par; /* the cumulative parity for all data */
136*4882a593Smuzhiyun uint32_t tmppar; /* the cumulative parity for this iteration;
137*4882a593Smuzhiyun for rp12, rp14 and rp16 at the end of the
138*4882a593Smuzhiyun loop */
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun par = 0;
141*4882a593Smuzhiyun rp4 = 0;
142*4882a593Smuzhiyun rp6 = 0;
143*4882a593Smuzhiyun rp8 = 0;
144*4882a593Smuzhiyun rp10 = 0;
145*4882a593Smuzhiyun rp12 = 0;
146*4882a593Smuzhiyun rp14 = 0;
147*4882a593Smuzhiyun rp16 = 0;
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun /*
150*4882a593Smuzhiyun * The loop is unrolled a number of times;
151*4882a593Smuzhiyun * This avoids if statements to decide on which rp value to update
152*4882a593Smuzhiyun * Also we process the data by longwords.
153*4882a593Smuzhiyun * Note: passing unaligned data might give a performance penalty.
154*4882a593Smuzhiyun * It is assumed that the buffers are aligned.
155*4882a593Smuzhiyun * tmppar is the cumulative sum of this iteration.
156*4882a593Smuzhiyun * needed for calculating rp12, rp14, rp16 and par
157*4882a593Smuzhiyun * also used as a performance improvement for rp6, rp8 and rp10
158*4882a593Smuzhiyun */
159*4882a593Smuzhiyun for (i = 0; i < eccsize_mult << 2; i++) {
160*4882a593Smuzhiyun cur = *bp++;
161*4882a593Smuzhiyun tmppar = cur;
162*4882a593Smuzhiyun rp4 ^= cur;
163*4882a593Smuzhiyun cur = *bp++;
164*4882a593Smuzhiyun tmppar ^= cur;
165*4882a593Smuzhiyun rp6 ^= tmppar;
166*4882a593Smuzhiyun cur = *bp++;
167*4882a593Smuzhiyun tmppar ^= cur;
168*4882a593Smuzhiyun rp4 ^= cur;
169*4882a593Smuzhiyun cur = *bp++;
170*4882a593Smuzhiyun tmppar ^= cur;
171*4882a593Smuzhiyun rp8 ^= tmppar;
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun cur = *bp++;
174*4882a593Smuzhiyun tmppar ^= cur;
175*4882a593Smuzhiyun rp4 ^= cur;
176*4882a593Smuzhiyun rp6 ^= cur;
177*4882a593Smuzhiyun cur = *bp++;
178*4882a593Smuzhiyun tmppar ^= cur;
179*4882a593Smuzhiyun rp6 ^= cur;
180*4882a593Smuzhiyun cur = *bp++;
181*4882a593Smuzhiyun tmppar ^= cur;
182*4882a593Smuzhiyun rp4 ^= cur;
183*4882a593Smuzhiyun cur = *bp++;
184*4882a593Smuzhiyun tmppar ^= cur;
185*4882a593Smuzhiyun rp10 ^= tmppar;
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun cur = *bp++;
188*4882a593Smuzhiyun tmppar ^= cur;
189*4882a593Smuzhiyun rp4 ^= cur;
190*4882a593Smuzhiyun rp6 ^= cur;
191*4882a593Smuzhiyun rp8 ^= cur;
192*4882a593Smuzhiyun cur = *bp++;
193*4882a593Smuzhiyun tmppar ^= cur;
194*4882a593Smuzhiyun rp6 ^= cur;
195*4882a593Smuzhiyun rp8 ^= cur;
196*4882a593Smuzhiyun cur = *bp++;
197*4882a593Smuzhiyun tmppar ^= cur;
198*4882a593Smuzhiyun rp4 ^= cur;
199*4882a593Smuzhiyun rp8 ^= cur;
200*4882a593Smuzhiyun cur = *bp++;
201*4882a593Smuzhiyun tmppar ^= cur;
202*4882a593Smuzhiyun rp8 ^= cur;
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun cur = *bp++;
205*4882a593Smuzhiyun tmppar ^= cur;
206*4882a593Smuzhiyun rp4 ^= cur;
207*4882a593Smuzhiyun rp6 ^= cur;
208*4882a593Smuzhiyun cur = *bp++;
209*4882a593Smuzhiyun tmppar ^= cur;
210*4882a593Smuzhiyun rp6 ^= cur;
211*4882a593Smuzhiyun cur = *bp++;
212*4882a593Smuzhiyun tmppar ^= cur;
213*4882a593Smuzhiyun rp4 ^= cur;
214*4882a593Smuzhiyun cur = *bp++;
215*4882a593Smuzhiyun tmppar ^= cur;
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun par ^= tmppar;
218*4882a593Smuzhiyun if ((i & 0x1) == 0)
219*4882a593Smuzhiyun rp12 ^= tmppar;
220*4882a593Smuzhiyun if ((i & 0x2) == 0)
221*4882a593Smuzhiyun rp14 ^= tmppar;
222*4882a593Smuzhiyun if (eccsize_mult == 2 && (i & 0x4) == 0)
223*4882a593Smuzhiyun rp16 ^= tmppar;
224*4882a593Smuzhiyun }
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun /*
227*4882a593Smuzhiyun * handle the fact that we use longword operations
228*4882a593Smuzhiyun * we'll bring rp4..rp14..rp16 back to single byte entities by
229*4882a593Smuzhiyun * shifting and xoring first fold the upper and lower 16 bits,
230*4882a593Smuzhiyun * then the upper and lower 8 bits.
231*4882a593Smuzhiyun */
232*4882a593Smuzhiyun rp4 ^= (rp4 >> 16);
233*4882a593Smuzhiyun rp4 ^= (rp4 >> 8);
234*4882a593Smuzhiyun rp4 &= 0xff;
235*4882a593Smuzhiyun rp6 ^= (rp6 >> 16);
236*4882a593Smuzhiyun rp6 ^= (rp6 >> 8);
237*4882a593Smuzhiyun rp6 &= 0xff;
238*4882a593Smuzhiyun rp8 ^= (rp8 >> 16);
239*4882a593Smuzhiyun rp8 ^= (rp8 >> 8);
240*4882a593Smuzhiyun rp8 &= 0xff;
241*4882a593Smuzhiyun rp10 ^= (rp10 >> 16);
242*4882a593Smuzhiyun rp10 ^= (rp10 >> 8);
243*4882a593Smuzhiyun rp10 &= 0xff;
244*4882a593Smuzhiyun rp12 ^= (rp12 >> 16);
245*4882a593Smuzhiyun rp12 ^= (rp12 >> 8);
246*4882a593Smuzhiyun rp12 &= 0xff;
247*4882a593Smuzhiyun rp14 ^= (rp14 >> 16);
248*4882a593Smuzhiyun rp14 ^= (rp14 >> 8);
249*4882a593Smuzhiyun rp14 &= 0xff;
250*4882a593Smuzhiyun if (eccsize_mult == 2) {
251*4882a593Smuzhiyun rp16 ^= (rp16 >> 16);
252*4882a593Smuzhiyun rp16 ^= (rp16 >> 8);
253*4882a593Smuzhiyun rp16 &= 0xff;
254*4882a593Smuzhiyun }
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun /*
257*4882a593Smuzhiyun * we also need to calculate the row parity for rp0..rp3
258*4882a593Smuzhiyun * This is present in par, because par is now
259*4882a593Smuzhiyun * rp3 rp3 rp2 rp2 in little endian and
260*4882a593Smuzhiyun * rp2 rp2 rp3 rp3 in big endian
261*4882a593Smuzhiyun * as well as
262*4882a593Smuzhiyun * rp1 rp0 rp1 rp0 in little endian and
263*4882a593Smuzhiyun * rp0 rp1 rp0 rp1 in big endian
264*4882a593Smuzhiyun * First calculate rp2 and rp3
265*4882a593Smuzhiyun */
266*4882a593Smuzhiyun #ifdef __BIG_ENDIAN
267*4882a593Smuzhiyun rp2 = (par >> 16);
268*4882a593Smuzhiyun rp2 ^= (rp2 >> 8);
269*4882a593Smuzhiyun rp2 &= 0xff;
270*4882a593Smuzhiyun rp3 = par & 0xffff;
271*4882a593Smuzhiyun rp3 ^= (rp3 >> 8);
272*4882a593Smuzhiyun rp3 &= 0xff;
273*4882a593Smuzhiyun #else
274*4882a593Smuzhiyun rp3 = (par >> 16);
275*4882a593Smuzhiyun rp3 ^= (rp3 >> 8);
276*4882a593Smuzhiyun rp3 &= 0xff;
277*4882a593Smuzhiyun rp2 = par & 0xffff;
278*4882a593Smuzhiyun rp2 ^= (rp2 >> 8);
279*4882a593Smuzhiyun rp2 &= 0xff;
280*4882a593Smuzhiyun #endif
281*4882a593Smuzhiyun
282*4882a593Smuzhiyun /* reduce par to 16 bits then calculate rp1 and rp0 */
283*4882a593Smuzhiyun par ^= (par >> 16);
284*4882a593Smuzhiyun #ifdef __BIG_ENDIAN
285*4882a593Smuzhiyun rp0 = (par >> 8) & 0xff;
286*4882a593Smuzhiyun rp1 = (par & 0xff);
287*4882a593Smuzhiyun #else
288*4882a593Smuzhiyun rp1 = (par >> 8) & 0xff;
289*4882a593Smuzhiyun rp0 = (par & 0xff);
290*4882a593Smuzhiyun #endif
291*4882a593Smuzhiyun
292*4882a593Smuzhiyun /* finally reduce par to 8 bits */
293*4882a593Smuzhiyun par ^= (par >> 8);
294*4882a593Smuzhiyun par &= 0xff;
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun /*
297*4882a593Smuzhiyun * and calculate rp5..rp15..rp17
298*4882a593Smuzhiyun * note that par = rp4 ^ rp5 and due to the commutative property
299*4882a593Smuzhiyun * of the ^ operator we can say:
300*4882a593Smuzhiyun * rp5 = (par ^ rp4);
301*4882a593Smuzhiyun * The & 0xff seems superfluous, but benchmarking learned that
302*4882a593Smuzhiyun * leaving it out gives slightly worse results. No idea why, probably
303*4882a593Smuzhiyun * it has to do with the way the pipeline in pentium is organized.
304*4882a593Smuzhiyun */
305*4882a593Smuzhiyun rp5 = (par ^ rp4) & 0xff;
306*4882a593Smuzhiyun rp7 = (par ^ rp6) & 0xff;
307*4882a593Smuzhiyun rp9 = (par ^ rp8) & 0xff;
308*4882a593Smuzhiyun rp11 = (par ^ rp10) & 0xff;
309*4882a593Smuzhiyun rp13 = (par ^ rp12) & 0xff;
310*4882a593Smuzhiyun rp15 = (par ^ rp14) & 0xff;
311*4882a593Smuzhiyun if (eccsize_mult == 2)
312*4882a593Smuzhiyun rp17 = (par ^ rp16) & 0xff;
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun /*
315*4882a593Smuzhiyun * Finally calculate the ECC bits.
316*4882a593Smuzhiyun * Again here it might seem that there are performance optimisations
317*4882a593Smuzhiyun * possible, but benchmarks showed that on the system this is developed
318*4882a593Smuzhiyun * the code below is the fastest
319*4882a593Smuzhiyun */
320*4882a593Smuzhiyun if (sm_order) {
321*4882a593Smuzhiyun code[0] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
322*4882a593Smuzhiyun (invparity[rp5] << 5) | (invparity[rp4] << 4) |
323*4882a593Smuzhiyun (invparity[rp3] << 3) | (invparity[rp2] << 2) |
324*4882a593Smuzhiyun (invparity[rp1] << 1) | (invparity[rp0]);
325*4882a593Smuzhiyun code[1] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
326*4882a593Smuzhiyun (invparity[rp13] << 5) | (invparity[rp12] << 4) |
327*4882a593Smuzhiyun (invparity[rp11] << 3) | (invparity[rp10] << 2) |
328*4882a593Smuzhiyun (invparity[rp9] << 1) | (invparity[rp8]);
329*4882a593Smuzhiyun } else {
330*4882a593Smuzhiyun code[1] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
331*4882a593Smuzhiyun (invparity[rp5] << 5) | (invparity[rp4] << 4) |
332*4882a593Smuzhiyun (invparity[rp3] << 3) | (invparity[rp2] << 2) |
333*4882a593Smuzhiyun (invparity[rp1] << 1) | (invparity[rp0]);
334*4882a593Smuzhiyun code[0] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
335*4882a593Smuzhiyun (invparity[rp13] << 5) | (invparity[rp12] << 4) |
336*4882a593Smuzhiyun (invparity[rp11] << 3) | (invparity[rp10] << 2) |
337*4882a593Smuzhiyun (invparity[rp9] << 1) | (invparity[rp8]);
338*4882a593Smuzhiyun }
339*4882a593Smuzhiyun
340*4882a593Smuzhiyun if (eccsize_mult == 1)
341*4882a593Smuzhiyun code[2] =
342*4882a593Smuzhiyun (invparity[par & 0xf0] << 7) |
343*4882a593Smuzhiyun (invparity[par & 0x0f] << 6) |
344*4882a593Smuzhiyun (invparity[par & 0xcc] << 5) |
345*4882a593Smuzhiyun (invparity[par & 0x33] << 4) |
346*4882a593Smuzhiyun (invparity[par & 0xaa] << 3) |
347*4882a593Smuzhiyun (invparity[par & 0x55] << 2) |
348*4882a593Smuzhiyun 3;
349*4882a593Smuzhiyun else
350*4882a593Smuzhiyun code[2] =
351*4882a593Smuzhiyun (invparity[par & 0xf0] << 7) |
352*4882a593Smuzhiyun (invparity[par & 0x0f] << 6) |
353*4882a593Smuzhiyun (invparity[par & 0xcc] << 5) |
354*4882a593Smuzhiyun (invparity[par & 0x33] << 4) |
355*4882a593Smuzhiyun (invparity[par & 0xaa] << 3) |
356*4882a593Smuzhiyun (invparity[par & 0x55] << 2) |
357*4882a593Smuzhiyun (invparity[rp17] << 1) |
358*4882a593Smuzhiyun (invparity[rp16] << 0);
359*4882a593Smuzhiyun }
360*4882a593Smuzhiyun EXPORT_SYMBOL(__nand_calculate_ecc);
361*4882a593Smuzhiyun
362*4882a593Smuzhiyun /**
363*4882a593Smuzhiyun * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
364*4882a593Smuzhiyun * block
365*4882a593Smuzhiyun * @chip: NAND chip object
366*4882a593Smuzhiyun * @buf: input buffer with raw data
367*4882a593Smuzhiyun * @code: output buffer with ECC
368*4882a593Smuzhiyun */
nand_calculate_ecc(struct nand_chip * chip,const unsigned char * buf,unsigned char * code)369*4882a593Smuzhiyun int nand_calculate_ecc(struct nand_chip *chip, const unsigned char *buf,
370*4882a593Smuzhiyun unsigned char *code)
371*4882a593Smuzhiyun {
372*4882a593Smuzhiyun bool sm_order = chip->ecc.options & NAND_ECC_SOFT_HAMMING_SM_ORDER;
373*4882a593Smuzhiyun
374*4882a593Smuzhiyun __nand_calculate_ecc(buf, chip->ecc.size, code, sm_order);
375*4882a593Smuzhiyun
376*4882a593Smuzhiyun return 0;
377*4882a593Smuzhiyun }
378*4882a593Smuzhiyun EXPORT_SYMBOL(nand_calculate_ecc);
379*4882a593Smuzhiyun
380*4882a593Smuzhiyun /**
381*4882a593Smuzhiyun * __nand_correct_data - [NAND Interface] Detect and correct bit error(s)
382*4882a593Smuzhiyun * @buf: raw data read from the chip
383*4882a593Smuzhiyun * @read_ecc: ECC from the chip
384*4882a593Smuzhiyun * @calc_ecc: the ECC calculated from raw data
385*4882a593Smuzhiyun * @eccsize: data bytes per ECC step (256 or 512)
386*4882a593Smuzhiyun * @sm_order: Smart Media byte order
387*4882a593Smuzhiyun *
388*4882a593Smuzhiyun * Detect and correct a 1 bit error for eccsize byte block
389*4882a593Smuzhiyun */
__nand_correct_data(unsigned char * buf,unsigned char * read_ecc,unsigned char * calc_ecc,unsigned int eccsize,bool sm_order)390*4882a593Smuzhiyun int __nand_correct_data(unsigned char *buf,
391*4882a593Smuzhiyun unsigned char *read_ecc, unsigned char *calc_ecc,
392*4882a593Smuzhiyun unsigned int eccsize, bool sm_order)
393*4882a593Smuzhiyun {
394*4882a593Smuzhiyun unsigned char b0, b1, b2, bit_addr;
395*4882a593Smuzhiyun unsigned int byte_addr;
396*4882a593Smuzhiyun /* 256 or 512 bytes/ecc */
397*4882a593Smuzhiyun const uint32_t eccsize_mult = eccsize >> 8;
398*4882a593Smuzhiyun
399*4882a593Smuzhiyun /*
400*4882a593Smuzhiyun * b0 to b2 indicate which bit is faulty (if any)
401*4882a593Smuzhiyun * we might need the xor result more than once,
402*4882a593Smuzhiyun * so keep them in a local var
403*4882a593Smuzhiyun */
404*4882a593Smuzhiyun if (sm_order) {
405*4882a593Smuzhiyun b0 = read_ecc[0] ^ calc_ecc[0];
406*4882a593Smuzhiyun b1 = read_ecc[1] ^ calc_ecc[1];
407*4882a593Smuzhiyun } else {
408*4882a593Smuzhiyun b0 = read_ecc[1] ^ calc_ecc[1];
409*4882a593Smuzhiyun b1 = read_ecc[0] ^ calc_ecc[0];
410*4882a593Smuzhiyun }
411*4882a593Smuzhiyun
412*4882a593Smuzhiyun b2 = read_ecc[2] ^ calc_ecc[2];
413*4882a593Smuzhiyun
414*4882a593Smuzhiyun /* check if there are any bitfaults */
415*4882a593Smuzhiyun
416*4882a593Smuzhiyun /* repeated if statements are slightly more efficient than switch ... */
417*4882a593Smuzhiyun /* ordered in order of likelihood */
418*4882a593Smuzhiyun
419*4882a593Smuzhiyun if ((b0 | b1 | b2) == 0)
420*4882a593Smuzhiyun return 0; /* no error */
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun if ((((b0 ^ (b0 >> 1)) & 0x55) == 0x55) &&
423*4882a593Smuzhiyun (((b1 ^ (b1 >> 1)) & 0x55) == 0x55) &&
424*4882a593Smuzhiyun ((eccsize_mult == 1 && ((b2 ^ (b2 >> 1)) & 0x54) == 0x54) ||
425*4882a593Smuzhiyun (eccsize_mult == 2 && ((b2 ^ (b2 >> 1)) & 0x55) == 0x55))) {
426*4882a593Smuzhiyun /* single bit error */
427*4882a593Smuzhiyun /*
428*4882a593Smuzhiyun * rp17/rp15/13/11/9/7/5/3/1 indicate which byte is the faulty
429*4882a593Smuzhiyun * byte, cp 5/3/1 indicate the faulty bit.
430*4882a593Smuzhiyun * A lookup table (called addressbits) is used to filter
431*4882a593Smuzhiyun * the bits from the byte they are in.
432*4882a593Smuzhiyun * A marginal optimisation is possible by having three
433*4882a593Smuzhiyun * different lookup tables.
434*4882a593Smuzhiyun * One as we have now (for b0), one for b2
435*4882a593Smuzhiyun * (that would avoid the >> 1), and one for b1 (with all values
436*4882a593Smuzhiyun * << 4). However it was felt that introducing two more tables
437*4882a593Smuzhiyun * hardly justify the gain.
438*4882a593Smuzhiyun *
439*4882a593Smuzhiyun * The b2 shift is there to get rid of the lowest two bits.
440*4882a593Smuzhiyun * We could also do addressbits[b2] >> 1 but for the
441*4882a593Smuzhiyun * performance it does not make any difference
442*4882a593Smuzhiyun */
443*4882a593Smuzhiyun if (eccsize_mult == 1)
444*4882a593Smuzhiyun byte_addr = (addressbits[b1] << 4) + addressbits[b0];
445*4882a593Smuzhiyun else
446*4882a593Smuzhiyun byte_addr = (addressbits[b2 & 0x3] << 8) +
447*4882a593Smuzhiyun (addressbits[b1] << 4) + addressbits[b0];
448*4882a593Smuzhiyun bit_addr = addressbits[b2 >> 2];
449*4882a593Smuzhiyun /* flip the bit */
450*4882a593Smuzhiyun buf[byte_addr] ^= (1 << bit_addr);
451*4882a593Smuzhiyun return 1;
452*4882a593Smuzhiyun
453*4882a593Smuzhiyun }
454*4882a593Smuzhiyun /* count nr of bits; use table lookup, faster than calculating it */
455*4882a593Smuzhiyun if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
456*4882a593Smuzhiyun return 1; /* error in ECC data; no action needed */
457*4882a593Smuzhiyun
458*4882a593Smuzhiyun pr_err("%s: uncorrectable ECC error\n", __func__);
459*4882a593Smuzhiyun return -EBADMSG;
460*4882a593Smuzhiyun }
461*4882a593Smuzhiyun EXPORT_SYMBOL(__nand_correct_data);
462*4882a593Smuzhiyun
463*4882a593Smuzhiyun /**
464*4882a593Smuzhiyun * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
465*4882a593Smuzhiyun * @chip: NAND chip object
466*4882a593Smuzhiyun * @buf: raw data read from the chip
467*4882a593Smuzhiyun * @read_ecc: ECC from the chip
468*4882a593Smuzhiyun * @calc_ecc: the ECC calculated from raw data
469*4882a593Smuzhiyun *
470*4882a593Smuzhiyun * Detect and correct a 1 bit error for 256/512 byte block
471*4882a593Smuzhiyun */
nand_correct_data(struct nand_chip * chip,unsigned char * buf,unsigned char * read_ecc,unsigned char * calc_ecc)472*4882a593Smuzhiyun int nand_correct_data(struct nand_chip *chip, unsigned char *buf,
473*4882a593Smuzhiyun unsigned char *read_ecc, unsigned char *calc_ecc)
474*4882a593Smuzhiyun {
475*4882a593Smuzhiyun bool sm_order = chip->ecc.options & NAND_ECC_SOFT_HAMMING_SM_ORDER;
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun return __nand_correct_data(buf, read_ecc, calc_ecc, chip->ecc.size,
478*4882a593Smuzhiyun sm_order);
479*4882a593Smuzhiyun }
480*4882a593Smuzhiyun EXPORT_SYMBOL(nand_correct_data);
481*4882a593Smuzhiyun
482*4882a593Smuzhiyun MODULE_LICENSE("GPL");
483*4882a593Smuzhiyun MODULE_AUTHOR("Frans Meulenbroeks <fransmeulenbroeks@gmail.com>");
484*4882a593Smuzhiyun MODULE_DESCRIPTION("Generic NAND ECC support");
485