1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * Copyright(c) 2015, 2016 Intel Corporation.
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * This file is provided under a dual BSD/GPLv2 license. When using or
5*4882a593Smuzhiyun * redistributing this file, you may do so under either license.
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * GPL LICENSE SUMMARY
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * This program is free software; you can redistribute it and/or modify
10*4882a593Smuzhiyun * it under the terms of version 2 of the GNU General Public License as
11*4882a593Smuzhiyun * published by the Free Software Foundation.
12*4882a593Smuzhiyun *
13*4882a593Smuzhiyun * This program is distributed in the hope that it will be useful, but
14*4882a593Smuzhiyun * WITHOUT ANY WARRANTY; without even the implied warranty of
15*4882a593Smuzhiyun * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16*4882a593Smuzhiyun * General Public License for more details.
17*4882a593Smuzhiyun *
18*4882a593Smuzhiyun * BSD LICENSE
19*4882a593Smuzhiyun *
20*4882a593Smuzhiyun * Redistribution and use in source and binary forms, with or without
21*4882a593Smuzhiyun * modification, are permitted provided that the following conditions
22*4882a593Smuzhiyun * are met:
23*4882a593Smuzhiyun *
24*4882a593Smuzhiyun * - Redistributions of source code must retain the above copyright
25*4882a593Smuzhiyun * notice, this list of conditions and the following disclaimer.
26*4882a593Smuzhiyun * - Redistributions in binary form must reproduce the above copyright
27*4882a593Smuzhiyun * notice, this list of conditions and the following disclaimer in
28*4882a593Smuzhiyun * the documentation and/or other materials provided with the
29*4882a593Smuzhiyun * distribution.
30*4882a593Smuzhiyun * - Neither the name of Intel Corporation nor the names of its
31*4882a593Smuzhiyun * contributors may be used to endorse or promote products derived
32*4882a593Smuzhiyun * from this software without specific prior written permission.
33*4882a593Smuzhiyun *
34*4882a593Smuzhiyun * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35*4882a593Smuzhiyun * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36*4882a593Smuzhiyun * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37*4882a593Smuzhiyun * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38*4882a593Smuzhiyun * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39*4882a593Smuzhiyun * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40*4882a593Smuzhiyun * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41*4882a593Smuzhiyun * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42*4882a593Smuzhiyun * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43*4882a593Smuzhiyun * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44*4882a593Smuzhiyun * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45*4882a593Smuzhiyun *
46*4882a593Smuzhiyun */
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun #include "hfi.h"
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun /* additive distance between non-SOP and SOP space */
51*4882a593Smuzhiyun #define SOP_DISTANCE (TXE_PIO_SIZE / 2)
52*4882a593Smuzhiyun #define PIO_BLOCK_MASK (PIO_BLOCK_SIZE - 1)
53*4882a593Smuzhiyun /* number of QUADWORDs in a block */
54*4882a593Smuzhiyun #define PIO_BLOCK_QWS (PIO_BLOCK_SIZE / sizeof(u64))
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun /**
57*4882a593Smuzhiyun * pio_copy - copy data block to MMIO space
58*4882a593Smuzhiyun * @pbuf: a number of blocks allocated within a PIO send context
59*4882a593Smuzhiyun * @pbc: PBC to send
60*4882a593Smuzhiyun * @from: source, must be 8 byte aligned
61*4882a593Smuzhiyun * @count: number of DWORD (32-bit) quantities to copy from source
62*4882a593Smuzhiyun *
63*4882a593Smuzhiyun * Copy data from source to PIO Send Buffer memory, 8 bytes at a time.
64*4882a593Smuzhiyun * Must always write full BLOCK_SIZE bytes blocks. The first block must
65*4882a593Smuzhiyun * be written to the corresponding SOP=1 address.
66*4882a593Smuzhiyun *
67*4882a593Smuzhiyun * Known:
68*4882a593Smuzhiyun * o pbuf->start always starts on a block boundary
69*4882a593Smuzhiyun * o pbuf can wrap only at a block boundary
70*4882a593Smuzhiyun */
pio_copy(struct hfi1_devdata * dd,struct pio_buf * pbuf,u64 pbc,const void * from,size_t count)71*4882a593Smuzhiyun void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
72*4882a593Smuzhiyun const void *from, size_t count)
73*4882a593Smuzhiyun {
74*4882a593Smuzhiyun void __iomem *dest = pbuf->start + SOP_DISTANCE;
75*4882a593Smuzhiyun void __iomem *send = dest + PIO_BLOCK_SIZE;
76*4882a593Smuzhiyun void __iomem *dend; /* 8-byte data end */
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun /* write the PBC */
79*4882a593Smuzhiyun writeq(pbc, dest);
80*4882a593Smuzhiyun dest += sizeof(u64);
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun /* calculate where the QWORD data ends - in SOP=1 space */
83*4882a593Smuzhiyun dend = dest + ((count >> 1) * sizeof(u64));
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun if (dend < send) {
86*4882a593Smuzhiyun /*
87*4882a593Smuzhiyun * all QWORD data is within the SOP block, does *not*
88*4882a593Smuzhiyun * reach the end of the SOP block
89*4882a593Smuzhiyun */
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun while (dest < dend) {
92*4882a593Smuzhiyun writeq(*(u64 *)from, dest);
93*4882a593Smuzhiyun from += sizeof(u64);
94*4882a593Smuzhiyun dest += sizeof(u64);
95*4882a593Smuzhiyun }
96*4882a593Smuzhiyun /*
97*4882a593Smuzhiyun * No boundary checks are needed here:
98*4882a593Smuzhiyun * 0. We're not on the SOP block boundary
99*4882a593Smuzhiyun * 1. The possible DWORD dangle will still be within
100*4882a593Smuzhiyun * the SOP block
101*4882a593Smuzhiyun * 2. We cannot wrap except on a block boundary.
102*4882a593Smuzhiyun */
103*4882a593Smuzhiyun } else {
104*4882a593Smuzhiyun /* QWORD data extends _to_ or beyond the SOP block */
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun /* write 8-byte SOP chunk data */
107*4882a593Smuzhiyun while (dest < send) {
108*4882a593Smuzhiyun writeq(*(u64 *)from, dest);
109*4882a593Smuzhiyun from += sizeof(u64);
110*4882a593Smuzhiyun dest += sizeof(u64);
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun /* drop out of the SOP range */
113*4882a593Smuzhiyun dest -= SOP_DISTANCE;
114*4882a593Smuzhiyun dend -= SOP_DISTANCE;
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun /*
117*4882a593Smuzhiyun * If the wrap comes before or matches the data end,
118*4882a593Smuzhiyun * copy until until the wrap, then wrap.
119*4882a593Smuzhiyun *
120*4882a593Smuzhiyun * If the data ends at the end of the SOP above and
121*4882a593Smuzhiyun * the buffer wraps, then pbuf->end == dend == dest
122*4882a593Smuzhiyun * and nothing will get written, but we will wrap in
123*4882a593Smuzhiyun * case there is a dangling DWORD.
124*4882a593Smuzhiyun */
125*4882a593Smuzhiyun if (pbuf->end <= dend) {
126*4882a593Smuzhiyun while (dest < pbuf->end) {
127*4882a593Smuzhiyun writeq(*(u64 *)from, dest);
128*4882a593Smuzhiyun from += sizeof(u64);
129*4882a593Smuzhiyun dest += sizeof(u64);
130*4882a593Smuzhiyun }
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun dest -= pbuf->sc->size;
133*4882a593Smuzhiyun dend -= pbuf->sc->size;
134*4882a593Smuzhiyun }
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun /* write 8-byte non-SOP, non-wrap chunk data */
137*4882a593Smuzhiyun while (dest < dend) {
138*4882a593Smuzhiyun writeq(*(u64 *)from, dest);
139*4882a593Smuzhiyun from += sizeof(u64);
140*4882a593Smuzhiyun dest += sizeof(u64);
141*4882a593Smuzhiyun }
142*4882a593Smuzhiyun }
143*4882a593Smuzhiyun /* at this point we have wrapped if we are going to wrap */
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun /* write dangling u32, if any */
146*4882a593Smuzhiyun if (count & 1) {
147*4882a593Smuzhiyun union mix val;
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun val.val64 = 0;
150*4882a593Smuzhiyun val.val32[0] = *(u32 *)from;
151*4882a593Smuzhiyun writeq(val.val64, dest);
152*4882a593Smuzhiyun dest += sizeof(u64);
153*4882a593Smuzhiyun }
154*4882a593Smuzhiyun /*
155*4882a593Smuzhiyun * fill in rest of block, no need to check pbuf->end
156*4882a593Smuzhiyun * as we only wrap on a block boundary
157*4882a593Smuzhiyun */
158*4882a593Smuzhiyun while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
159*4882a593Smuzhiyun writeq(0, dest);
160*4882a593Smuzhiyun dest += sizeof(u64);
161*4882a593Smuzhiyun }
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun /* finished with this buffer */
164*4882a593Smuzhiyun this_cpu_dec(*pbuf->sc->buffers_allocated);
165*4882a593Smuzhiyun preempt_enable();
166*4882a593Smuzhiyun }
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun /*
169*4882a593Smuzhiyun * Handle carry bytes using shifts and masks.
170*4882a593Smuzhiyun *
171*4882a593Smuzhiyun * NOTE: the value the unused portion of carry is expected to always be zero.
172*4882a593Smuzhiyun */
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun /*
175*4882a593Smuzhiyun * "zero" shift - bit shift used to zero out upper bytes. Input is
176*4882a593Smuzhiyun * the count of LSB bytes to preserve.
177*4882a593Smuzhiyun */
178*4882a593Smuzhiyun #define zshift(x) (8 * (8 - (x)))
179*4882a593Smuzhiyun
180*4882a593Smuzhiyun /*
181*4882a593Smuzhiyun * "merge" shift - bit shift used to merge with carry bytes. Input is
182*4882a593Smuzhiyun * the LSB byte count to move beyond.
183*4882a593Smuzhiyun */
184*4882a593Smuzhiyun #define mshift(x) (8 * (x))
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun /*
187*4882a593Smuzhiyun * Jump copy - no-loop copy for < 8 bytes.
188*4882a593Smuzhiyun */
jcopy(u8 * dest,const u8 * src,u32 n)189*4882a593Smuzhiyun static inline void jcopy(u8 *dest, const u8 *src, u32 n)
190*4882a593Smuzhiyun {
191*4882a593Smuzhiyun switch (n) {
192*4882a593Smuzhiyun case 7:
193*4882a593Smuzhiyun *dest++ = *src++;
194*4882a593Smuzhiyun fallthrough;
195*4882a593Smuzhiyun case 6:
196*4882a593Smuzhiyun *dest++ = *src++;
197*4882a593Smuzhiyun fallthrough;
198*4882a593Smuzhiyun case 5:
199*4882a593Smuzhiyun *dest++ = *src++;
200*4882a593Smuzhiyun fallthrough;
201*4882a593Smuzhiyun case 4:
202*4882a593Smuzhiyun *dest++ = *src++;
203*4882a593Smuzhiyun fallthrough;
204*4882a593Smuzhiyun case 3:
205*4882a593Smuzhiyun *dest++ = *src++;
206*4882a593Smuzhiyun fallthrough;
207*4882a593Smuzhiyun case 2:
208*4882a593Smuzhiyun *dest++ = *src++;
209*4882a593Smuzhiyun fallthrough;
210*4882a593Smuzhiyun case 1:
211*4882a593Smuzhiyun *dest++ = *src++;
212*4882a593Smuzhiyun }
213*4882a593Smuzhiyun }
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun /*
216*4882a593Smuzhiyun * Read nbytes from "from" and and place them in the low bytes
217*4882a593Smuzhiyun * of pbuf->carry. Other bytes are left as-is. Any previous
218*4882a593Smuzhiyun * value in pbuf->carry is lost.
219*4882a593Smuzhiyun *
220*4882a593Smuzhiyun * NOTES:
221*4882a593Smuzhiyun * o do not read from from if nbytes is zero
222*4882a593Smuzhiyun * o from may _not_ be u64 aligned.
223*4882a593Smuzhiyun */
read_low_bytes(struct pio_buf * pbuf,const void * from,unsigned int nbytes)224*4882a593Smuzhiyun static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
225*4882a593Smuzhiyun unsigned int nbytes)
226*4882a593Smuzhiyun {
227*4882a593Smuzhiyun pbuf->carry.val64 = 0;
228*4882a593Smuzhiyun jcopy(&pbuf->carry.val8[0], from, nbytes);
229*4882a593Smuzhiyun pbuf->carry_bytes = nbytes;
230*4882a593Smuzhiyun }
231*4882a593Smuzhiyun
232*4882a593Smuzhiyun /*
233*4882a593Smuzhiyun * Read nbytes bytes from "from" and put them at the end of pbuf->carry.
234*4882a593Smuzhiyun * It is expected that the extra read does not overfill carry.
235*4882a593Smuzhiyun *
236*4882a593Smuzhiyun * NOTES:
237*4882a593Smuzhiyun * o from may _not_ be u64 aligned
238*4882a593Smuzhiyun * o nbytes may span a QW boundary
239*4882a593Smuzhiyun */
read_extra_bytes(struct pio_buf * pbuf,const void * from,unsigned int nbytes)240*4882a593Smuzhiyun static inline void read_extra_bytes(struct pio_buf *pbuf,
241*4882a593Smuzhiyun const void *from, unsigned int nbytes)
242*4882a593Smuzhiyun {
243*4882a593Smuzhiyun jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes);
244*4882a593Smuzhiyun pbuf->carry_bytes += nbytes;
245*4882a593Smuzhiyun }
246*4882a593Smuzhiyun
247*4882a593Smuzhiyun /*
248*4882a593Smuzhiyun * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
249*4882a593Smuzhiyun * Put the unused part of the next 8 bytes of src into the LSB bytes of
250*4882a593Smuzhiyun * pbuf->carry with the upper bytes zeroed..
251*4882a593Smuzhiyun *
252*4882a593Smuzhiyun * NOTES:
253*4882a593Smuzhiyun * o result must keep unused bytes zeroed
254*4882a593Smuzhiyun * o src must be u64 aligned
255*4882a593Smuzhiyun */
merge_write8(struct pio_buf * pbuf,void __iomem * dest,const void * src)256*4882a593Smuzhiyun static inline void merge_write8(
257*4882a593Smuzhiyun struct pio_buf *pbuf,
258*4882a593Smuzhiyun void __iomem *dest,
259*4882a593Smuzhiyun const void *src)
260*4882a593Smuzhiyun {
261*4882a593Smuzhiyun u64 new, temp;
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun new = *(u64 *)src;
264*4882a593Smuzhiyun temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
265*4882a593Smuzhiyun writeq(temp, dest);
266*4882a593Smuzhiyun pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
267*4882a593Smuzhiyun }
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun /*
270*4882a593Smuzhiyun * Write a quad word using all bytes of carry.
271*4882a593Smuzhiyun */
carry8_write8(union mix carry,void __iomem * dest)272*4882a593Smuzhiyun static inline void carry8_write8(union mix carry, void __iomem *dest)
273*4882a593Smuzhiyun {
274*4882a593Smuzhiyun writeq(carry.val64, dest);
275*4882a593Smuzhiyun }
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun /*
278*4882a593Smuzhiyun * Write a quad word using all the valid bytes of carry. If carry
279*4882a593Smuzhiyun * has zero valid bytes, nothing is written.
280*4882a593Smuzhiyun * Returns 0 on nothing written, non-zero on quad word written.
281*4882a593Smuzhiyun */
carry_write8(struct pio_buf * pbuf,void __iomem * dest)282*4882a593Smuzhiyun static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
283*4882a593Smuzhiyun {
284*4882a593Smuzhiyun if (pbuf->carry_bytes) {
285*4882a593Smuzhiyun /* unused bytes are always kept zeroed, so just write */
286*4882a593Smuzhiyun writeq(pbuf->carry.val64, dest);
287*4882a593Smuzhiyun return 1;
288*4882a593Smuzhiyun }
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun return 0;
291*4882a593Smuzhiyun }
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun /*
294*4882a593Smuzhiyun * Segmented PIO Copy - start
295*4882a593Smuzhiyun *
296*4882a593Smuzhiyun * Start a PIO copy.
297*4882a593Smuzhiyun *
298*4882a593Smuzhiyun * @pbuf: destination buffer
299*4882a593Smuzhiyun * @pbc: the PBC for the PIO buffer
300*4882a593Smuzhiyun * @from: data source, QWORD aligned
301*4882a593Smuzhiyun * @nbytes: bytes to copy
302*4882a593Smuzhiyun */
seg_pio_copy_start(struct pio_buf * pbuf,u64 pbc,const void * from,size_t nbytes)303*4882a593Smuzhiyun void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
304*4882a593Smuzhiyun const void *from, size_t nbytes)
305*4882a593Smuzhiyun {
306*4882a593Smuzhiyun void __iomem *dest = pbuf->start + SOP_DISTANCE;
307*4882a593Smuzhiyun void __iomem *send = dest + PIO_BLOCK_SIZE;
308*4882a593Smuzhiyun void __iomem *dend; /* 8-byte data end */
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun writeq(pbc, dest);
311*4882a593Smuzhiyun dest += sizeof(u64);
312*4882a593Smuzhiyun
313*4882a593Smuzhiyun /* calculate where the QWORD data ends - in SOP=1 space */
314*4882a593Smuzhiyun dend = dest + ((nbytes >> 3) * sizeof(u64));
315*4882a593Smuzhiyun
316*4882a593Smuzhiyun if (dend < send) {
317*4882a593Smuzhiyun /*
318*4882a593Smuzhiyun * all QWORD data is within the SOP block, does *not*
319*4882a593Smuzhiyun * reach the end of the SOP block
320*4882a593Smuzhiyun */
321*4882a593Smuzhiyun
322*4882a593Smuzhiyun while (dest < dend) {
323*4882a593Smuzhiyun writeq(*(u64 *)from, dest);
324*4882a593Smuzhiyun from += sizeof(u64);
325*4882a593Smuzhiyun dest += sizeof(u64);
326*4882a593Smuzhiyun }
327*4882a593Smuzhiyun /*
328*4882a593Smuzhiyun * No boundary checks are needed here:
329*4882a593Smuzhiyun * 0. We're not on the SOP block boundary
330*4882a593Smuzhiyun * 1. The possible DWORD dangle will still be within
331*4882a593Smuzhiyun * the SOP block
332*4882a593Smuzhiyun * 2. We cannot wrap except on a block boundary.
333*4882a593Smuzhiyun */
334*4882a593Smuzhiyun } else {
335*4882a593Smuzhiyun /* QWORD data extends _to_ or beyond the SOP block */
336*4882a593Smuzhiyun
337*4882a593Smuzhiyun /* write 8-byte SOP chunk data */
338*4882a593Smuzhiyun while (dest < send) {
339*4882a593Smuzhiyun writeq(*(u64 *)from, dest);
340*4882a593Smuzhiyun from += sizeof(u64);
341*4882a593Smuzhiyun dest += sizeof(u64);
342*4882a593Smuzhiyun }
343*4882a593Smuzhiyun /* drop out of the SOP range */
344*4882a593Smuzhiyun dest -= SOP_DISTANCE;
345*4882a593Smuzhiyun dend -= SOP_DISTANCE;
346*4882a593Smuzhiyun
347*4882a593Smuzhiyun /*
348*4882a593Smuzhiyun * If the wrap comes before or matches the data end,
349*4882a593Smuzhiyun * copy until until the wrap, then wrap.
350*4882a593Smuzhiyun *
351*4882a593Smuzhiyun * If the data ends at the end of the SOP above and
352*4882a593Smuzhiyun * the buffer wraps, then pbuf->end == dend == dest
353*4882a593Smuzhiyun * and nothing will get written, but we will wrap in
354*4882a593Smuzhiyun * case there is a dangling DWORD.
355*4882a593Smuzhiyun */
356*4882a593Smuzhiyun if (pbuf->end <= dend) {
357*4882a593Smuzhiyun while (dest < pbuf->end) {
358*4882a593Smuzhiyun writeq(*(u64 *)from, dest);
359*4882a593Smuzhiyun from += sizeof(u64);
360*4882a593Smuzhiyun dest += sizeof(u64);
361*4882a593Smuzhiyun }
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun dest -= pbuf->sc->size;
364*4882a593Smuzhiyun dend -= pbuf->sc->size;
365*4882a593Smuzhiyun }
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun /* write 8-byte non-SOP, non-wrap chunk data */
368*4882a593Smuzhiyun while (dest < dend) {
369*4882a593Smuzhiyun writeq(*(u64 *)from, dest);
370*4882a593Smuzhiyun from += sizeof(u64);
371*4882a593Smuzhiyun dest += sizeof(u64);
372*4882a593Smuzhiyun }
373*4882a593Smuzhiyun }
374*4882a593Smuzhiyun /* at this point we have wrapped if we are going to wrap */
375*4882a593Smuzhiyun
376*4882a593Smuzhiyun /* ...but it doesn't matter as we're done writing */
377*4882a593Smuzhiyun
378*4882a593Smuzhiyun /* save dangling bytes, if any */
379*4882a593Smuzhiyun read_low_bytes(pbuf, from, nbytes & 0x7);
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun pbuf->qw_written = 1 /*PBC*/ + (nbytes >> 3);
382*4882a593Smuzhiyun }
383*4882a593Smuzhiyun
384*4882a593Smuzhiyun /*
385*4882a593Smuzhiyun * Mid copy helper, "mixed case" - source is 64-bit aligned but carry
386*4882a593Smuzhiyun * bytes are non-zero.
387*4882a593Smuzhiyun *
388*4882a593Smuzhiyun * Whole u64s must be written to the chip, so bytes must be manually merged.
389*4882a593Smuzhiyun *
390*4882a593Smuzhiyun * @pbuf: destination buffer
391*4882a593Smuzhiyun * @from: data source, is QWORD aligned.
392*4882a593Smuzhiyun * @nbytes: bytes to copy
393*4882a593Smuzhiyun *
394*4882a593Smuzhiyun * Must handle nbytes < 8.
395*4882a593Smuzhiyun */
mid_copy_mix(struct pio_buf * pbuf,const void * from,size_t nbytes)396*4882a593Smuzhiyun static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
397*4882a593Smuzhiyun {
398*4882a593Smuzhiyun void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
399*4882a593Smuzhiyun void __iomem *dend; /* 8-byte data end */
400*4882a593Smuzhiyun unsigned long qw_to_write = nbytes >> 3;
401*4882a593Smuzhiyun unsigned long bytes_left = nbytes & 0x7;
402*4882a593Smuzhiyun
403*4882a593Smuzhiyun /* calculate 8-byte data end */
404*4882a593Smuzhiyun dend = dest + (qw_to_write * sizeof(u64));
405*4882a593Smuzhiyun
406*4882a593Smuzhiyun if (pbuf->qw_written < PIO_BLOCK_QWS) {
407*4882a593Smuzhiyun /*
408*4882a593Smuzhiyun * Still within SOP block. We don't need to check for
409*4882a593Smuzhiyun * wrap because we are still in the first block and
410*4882a593Smuzhiyun * can only wrap on block boundaries.
411*4882a593Smuzhiyun */
412*4882a593Smuzhiyun void __iomem *send; /* SOP end */
413*4882a593Smuzhiyun void __iomem *xend;
414*4882a593Smuzhiyun
415*4882a593Smuzhiyun /*
416*4882a593Smuzhiyun * calculate the end of data or end of block, whichever
417*4882a593Smuzhiyun * comes first
418*4882a593Smuzhiyun */
419*4882a593Smuzhiyun send = pbuf->start + PIO_BLOCK_SIZE;
420*4882a593Smuzhiyun xend = min(send, dend);
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun /* shift up to SOP=1 space */
423*4882a593Smuzhiyun dest += SOP_DISTANCE;
424*4882a593Smuzhiyun xend += SOP_DISTANCE;
425*4882a593Smuzhiyun
426*4882a593Smuzhiyun /* write 8-byte chunk data */
427*4882a593Smuzhiyun while (dest < xend) {
428*4882a593Smuzhiyun merge_write8(pbuf, dest, from);
429*4882a593Smuzhiyun from += sizeof(u64);
430*4882a593Smuzhiyun dest += sizeof(u64);
431*4882a593Smuzhiyun }
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun /* shift down to SOP=0 space */
434*4882a593Smuzhiyun dest -= SOP_DISTANCE;
435*4882a593Smuzhiyun }
436*4882a593Smuzhiyun /*
437*4882a593Smuzhiyun * At this point dest could be (either, both, or neither):
438*4882a593Smuzhiyun * - at dend
439*4882a593Smuzhiyun * - at the wrap
440*4882a593Smuzhiyun */
441*4882a593Smuzhiyun
442*4882a593Smuzhiyun /*
443*4882a593Smuzhiyun * If the wrap comes before or matches the data end,
444*4882a593Smuzhiyun * copy until until the wrap, then wrap.
445*4882a593Smuzhiyun *
446*4882a593Smuzhiyun * If dest is at the wrap, we will fall into the if,
447*4882a593Smuzhiyun * not do the loop, when wrap.
448*4882a593Smuzhiyun *
449*4882a593Smuzhiyun * If the data ends at the end of the SOP above and
450*4882a593Smuzhiyun * the buffer wraps, then pbuf->end == dend == dest
451*4882a593Smuzhiyun * and nothing will get written.
452*4882a593Smuzhiyun */
453*4882a593Smuzhiyun if (pbuf->end <= dend) {
454*4882a593Smuzhiyun while (dest < pbuf->end) {
455*4882a593Smuzhiyun merge_write8(pbuf, dest, from);
456*4882a593Smuzhiyun from += sizeof(u64);
457*4882a593Smuzhiyun dest += sizeof(u64);
458*4882a593Smuzhiyun }
459*4882a593Smuzhiyun
460*4882a593Smuzhiyun dest -= pbuf->sc->size;
461*4882a593Smuzhiyun dend -= pbuf->sc->size;
462*4882a593Smuzhiyun }
463*4882a593Smuzhiyun
464*4882a593Smuzhiyun /* write 8-byte non-SOP, non-wrap chunk data */
465*4882a593Smuzhiyun while (dest < dend) {
466*4882a593Smuzhiyun merge_write8(pbuf, dest, from);
467*4882a593Smuzhiyun from += sizeof(u64);
468*4882a593Smuzhiyun dest += sizeof(u64);
469*4882a593Smuzhiyun }
470*4882a593Smuzhiyun
471*4882a593Smuzhiyun pbuf->qw_written += qw_to_write;
472*4882a593Smuzhiyun
473*4882a593Smuzhiyun /* handle carry and left-over bytes */
474*4882a593Smuzhiyun if (pbuf->carry_bytes + bytes_left >= 8) {
475*4882a593Smuzhiyun unsigned long nread;
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun /* there is enough to fill another qw - fill carry */
478*4882a593Smuzhiyun nread = 8 - pbuf->carry_bytes;
479*4882a593Smuzhiyun read_extra_bytes(pbuf, from, nread);
480*4882a593Smuzhiyun
481*4882a593Smuzhiyun /*
482*4882a593Smuzhiyun * One more write - but need to make sure dest is correct.
483*4882a593Smuzhiyun * Check for wrap and the possibility the write
484*4882a593Smuzhiyun * should be in SOP space.
485*4882a593Smuzhiyun *
486*4882a593Smuzhiyun * The two checks immediately below cannot both be true, hence
487*4882a593Smuzhiyun * the else. If we have wrapped, we cannot still be within the
488*4882a593Smuzhiyun * first block. Conversely, if we are still in the first block,
489*4882a593Smuzhiyun * we cannot have wrapped. We do the wrap check first as that
490*4882a593Smuzhiyun * is more likely.
491*4882a593Smuzhiyun */
492*4882a593Smuzhiyun /* adjust if we have wrapped */
493*4882a593Smuzhiyun if (dest >= pbuf->end)
494*4882a593Smuzhiyun dest -= pbuf->sc->size;
495*4882a593Smuzhiyun /* jump to the SOP range if within the first block */
496*4882a593Smuzhiyun else if (pbuf->qw_written < PIO_BLOCK_QWS)
497*4882a593Smuzhiyun dest += SOP_DISTANCE;
498*4882a593Smuzhiyun
499*4882a593Smuzhiyun /* flush out full carry */
500*4882a593Smuzhiyun carry8_write8(pbuf->carry, dest);
501*4882a593Smuzhiyun pbuf->qw_written++;
502*4882a593Smuzhiyun
503*4882a593Smuzhiyun /* now adjust and read the rest of the bytes into carry */
504*4882a593Smuzhiyun bytes_left -= nread;
505*4882a593Smuzhiyun from += nread; /* from is now not aligned */
506*4882a593Smuzhiyun read_low_bytes(pbuf, from, bytes_left);
507*4882a593Smuzhiyun } else {
508*4882a593Smuzhiyun /* not enough to fill another qw, append the rest to carry */
509*4882a593Smuzhiyun read_extra_bytes(pbuf, from, bytes_left);
510*4882a593Smuzhiyun }
511*4882a593Smuzhiyun }
512*4882a593Smuzhiyun
513*4882a593Smuzhiyun /*
514*4882a593Smuzhiyun * Mid copy helper, "straight case" - source pointer is 64-bit aligned
515*4882a593Smuzhiyun * with no carry bytes.
516*4882a593Smuzhiyun *
517*4882a593Smuzhiyun * @pbuf: destination buffer
518*4882a593Smuzhiyun * @from: data source, is QWORD aligned
519*4882a593Smuzhiyun * @nbytes: bytes to copy
520*4882a593Smuzhiyun *
521*4882a593Smuzhiyun * Must handle nbytes < 8.
522*4882a593Smuzhiyun */
mid_copy_straight(struct pio_buf * pbuf,const void * from,size_t nbytes)523*4882a593Smuzhiyun static void mid_copy_straight(struct pio_buf *pbuf,
524*4882a593Smuzhiyun const void *from, size_t nbytes)
525*4882a593Smuzhiyun {
526*4882a593Smuzhiyun void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
527*4882a593Smuzhiyun void __iomem *dend; /* 8-byte data end */
528*4882a593Smuzhiyun
529*4882a593Smuzhiyun /* calculate 8-byte data end */
530*4882a593Smuzhiyun dend = dest + ((nbytes >> 3) * sizeof(u64));
531*4882a593Smuzhiyun
532*4882a593Smuzhiyun if (pbuf->qw_written < PIO_BLOCK_QWS) {
533*4882a593Smuzhiyun /*
534*4882a593Smuzhiyun * Still within SOP block. We don't need to check for
535*4882a593Smuzhiyun * wrap because we are still in the first block and
536*4882a593Smuzhiyun * can only wrap on block boundaries.
537*4882a593Smuzhiyun */
538*4882a593Smuzhiyun void __iomem *send; /* SOP end */
539*4882a593Smuzhiyun void __iomem *xend;
540*4882a593Smuzhiyun
541*4882a593Smuzhiyun /*
542*4882a593Smuzhiyun * calculate the end of data or end of block, whichever
543*4882a593Smuzhiyun * comes first
544*4882a593Smuzhiyun */
545*4882a593Smuzhiyun send = pbuf->start + PIO_BLOCK_SIZE;
546*4882a593Smuzhiyun xend = min(send, dend);
547*4882a593Smuzhiyun
548*4882a593Smuzhiyun /* shift up to SOP=1 space */
549*4882a593Smuzhiyun dest += SOP_DISTANCE;
550*4882a593Smuzhiyun xend += SOP_DISTANCE;
551*4882a593Smuzhiyun
552*4882a593Smuzhiyun /* write 8-byte chunk data */
553*4882a593Smuzhiyun while (dest < xend) {
554*4882a593Smuzhiyun writeq(*(u64 *)from, dest);
555*4882a593Smuzhiyun from += sizeof(u64);
556*4882a593Smuzhiyun dest += sizeof(u64);
557*4882a593Smuzhiyun }
558*4882a593Smuzhiyun
559*4882a593Smuzhiyun /* shift down to SOP=0 space */
560*4882a593Smuzhiyun dest -= SOP_DISTANCE;
561*4882a593Smuzhiyun }
562*4882a593Smuzhiyun /*
563*4882a593Smuzhiyun * At this point dest could be (either, both, or neither):
564*4882a593Smuzhiyun * - at dend
565*4882a593Smuzhiyun * - at the wrap
566*4882a593Smuzhiyun */
567*4882a593Smuzhiyun
568*4882a593Smuzhiyun /*
569*4882a593Smuzhiyun * If the wrap comes before or matches the data end,
570*4882a593Smuzhiyun * copy until until the wrap, then wrap.
571*4882a593Smuzhiyun *
572*4882a593Smuzhiyun * If dest is at the wrap, we will fall into the if,
573*4882a593Smuzhiyun * not do the loop, when wrap.
574*4882a593Smuzhiyun *
575*4882a593Smuzhiyun * If the data ends at the end of the SOP above and
576*4882a593Smuzhiyun * the buffer wraps, then pbuf->end == dend == dest
577*4882a593Smuzhiyun * and nothing will get written.
578*4882a593Smuzhiyun */
579*4882a593Smuzhiyun if (pbuf->end <= dend) {
580*4882a593Smuzhiyun while (dest < pbuf->end) {
581*4882a593Smuzhiyun writeq(*(u64 *)from, dest);
582*4882a593Smuzhiyun from += sizeof(u64);
583*4882a593Smuzhiyun dest += sizeof(u64);
584*4882a593Smuzhiyun }
585*4882a593Smuzhiyun
586*4882a593Smuzhiyun dest -= pbuf->sc->size;
587*4882a593Smuzhiyun dend -= pbuf->sc->size;
588*4882a593Smuzhiyun }
589*4882a593Smuzhiyun
590*4882a593Smuzhiyun /* write 8-byte non-SOP, non-wrap chunk data */
591*4882a593Smuzhiyun while (dest < dend) {
592*4882a593Smuzhiyun writeq(*(u64 *)from, dest);
593*4882a593Smuzhiyun from += sizeof(u64);
594*4882a593Smuzhiyun dest += sizeof(u64);
595*4882a593Smuzhiyun }
596*4882a593Smuzhiyun
597*4882a593Smuzhiyun /* we know carry_bytes was zero on entry to this routine */
598*4882a593Smuzhiyun read_low_bytes(pbuf, from, nbytes & 0x7);
599*4882a593Smuzhiyun
600*4882a593Smuzhiyun pbuf->qw_written += nbytes >> 3;
601*4882a593Smuzhiyun }
602*4882a593Smuzhiyun
603*4882a593Smuzhiyun /*
604*4882a593Smuzhiyun * Segmented PIO Copy - middle
605*4882a593Smuzhiyun *
606*4882a593Smuzhiyun * Must handle any aligned tail and any aligned source with any byte count.
607*4882a593Smuzhiyun *
608*4882a593Smuzhiyun * @pbuf: a number of blocks allocated within a PIO send context
609*4882a593Smuzhiyun * @from: data source
610*4882a593Smuzhiyun * @nbytes: number of bytes to copy
611*4882a593Smuzhiyun */
seg_pio_copy_mid(struct pio_buf * pbuf,const void * from,size_t nbytes)612*4882a593Smuzhiyun void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
613*4882a593Smuzhiyun {
614*4882a593Smuzhiyun unsigned long from_align = (unsigned long)from & 0x7;
615*4882a593Smuzhiyun
616*4882a593Smuzhiyun if (pbuf->carry_bytes + nbytes < 8) {
617*4882a593Smuzhiyun /* not enough bytes to fill a QW */
618*4882a593Smuzhiyun read_extra_bytes(pbuf, from, nbytes);
619*4882a593Smuzhiyun return;
620*4882a593Smuzhiyun }
621*4882a593Smuzhiyun
622*4882a593Smuzhiyun if (from_align) {
623*4882a593Smuzhiyun /* misaligned source pointer - align it */
624*4882a593Smuzhiyun unsigned long to_align;
625*4882a593Smuzhiyun
626*4882a593Smuzhiyun /* bytes to read to align "from" */
627*4882a593Smuzhiyun to_align = 8 - from_align;
628*4882a593Smuzhiyun
629*4882a593Smuzhiyun /*
630*4882a593Smuzhiyun * In the advance-to-alignment logic below, we do not need
631*4882a593Smuzhiyun * to check if we are using more than nbytes. This is because
632*4882a593Smuzhiyun * if we are here, we already know that carry+nbytes will
633*4882a593Smuzhiyun * fill at least one QW.
634*4882a593Smuzhiyun */
635*4882a593Smuzhiyun if (pbuf->carry_bytes + to_align < 8) {
636*4882a593Smuzhiyun /* not enough align bytes to fill a QW */
637*4882a593Smuzhiyun read_extra_bytes(pbuf, from, to_align);
638*4882a593Smuzhiyun from += to_align;
639*4882a593Smuzhiyun nbytes -= to_align;
640*4882a593Smuzhiyun } else {
641*4882a593Smuzhiyun /* bytes to fill carry */
642*4882a593Smuzhiyun unsigned long to_fill = 8 - pbuf->carry_bytes;
643*4882a593Smuzhiyun /* bytes left over to be read */
644*4882a593Smuzhiyun unsigned long extra = to_align - to_fill;
645*4882a593Smuzhiyun void __iomem *dest;
646*4882a593Smuzhiyun
647*4882a593Smuzhiyun /* fill carry... */
648*4882a593Smuzhiyun read_extra_bytes(pbuf, from, to_fill);
649*4882a593Smuzhiyun from += to_fill;
650*4882a593Smuzhiyun nbytes -= to_fill;
651*4882a593Smuzhiyun /* may not be enough valid bytes left to align */
652*4882a593Smuzhiyun if (extra > nbytes)
653*4882a593Smuzhiyun extra = nbytes;
654*4882a593Smuzhiyun
655*4882a593Smuzhiyun /* ...now write carry */
656*4882a593Smuzhiyun dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
657*4882a593Smuzhiyun
658*4882a593Smuzhiyun /*
659*4882a593Smuzhiyun * The two checks immediately below cannot both be
660*4882a593Smuzhiyun * true, hence the else. If we have wrapped, we
661*4882a593Smuzhiyun * cannot still be within the first block.
662*4882a593Smuzhiyun * Conversely, if we are still in the first block, we
663*4882a593Smuzhiyun * cannot have wrapped. We do the wrap check first
664*4882a593Smuzhiyun * as that is more likely.
665*4882a593Smuzhiyun */
666*4882a593Smuzhiyun /* adjust if we've wrapped */
667*4882a593Smuzhiyun if (dest >= pbuf->end)
668*4882a593Smuzhiyun dest -= pbuf->sc->size;
669*4882a593Smuzhiyun /* jump to SOP range if within the first block */
670*4882a593Smuzhiyun else if (pbuf->qw_written < PIO_BLOCK_QWS)
671*4882a593Smuzhiyun dest += SOP_DISTANCE;
672*4882a593Smuzhiyun
673*4882a593Smuzhiyun carry8_write8(pbuf->carry, dest);
674*4882a593Smuzhiyun pbuf->qw_written++;
675*4882a593Smuzhiyun
676*4882a593Smuzhiyun /* read any extra bytes to do final alignment */
677*4882a593Smuzhiyun /* this will overwrite anything in pbuf->carry */
678*4882a593Smuzhiyun read_low_bytes(pbuf, from, extra);
679*4882a593Smuzhiyun from += extra;
680*4882a593Smuzhiyun nbytes -= extra;
681*4882a593Smuzhiyun /*
682*4882a593Smuzhiyun * If no bytes are left, return early - we are done.
683*4882a593Smuzhiyun * NOTE: This short-circuit is *required* because
684*4882a593Smuzhiyun * "extra" may have been reduced in size and "from"
685*4882a593Smuzhiyun * is not aligned, as required when leaving this
686*4882a593Smuzhiyun * if block.
687*4882a593Smuzhiyun */
688*4882a593Smuzhiyun if (nbytes == 0)
689*4882a593Smuzhiyun return;
690*4882a593Smuzhiyun }
691*4882a593Smuzhiyun
692*4882a593Smuzhiyun /* at this point, from is QW aligned */
693*4882a593Smuzhiyun }
694*4882a593Smuzhiyun
695*4882a593Smuzhiyun if (pbuf->carry_bytes)
696*4882a593Smuzhiyun mid_copy_mix(pbuf, from, nbytes);
697*4882a593Smuzhiyun else
698*4882a593Smuzhiyun mid_copy_straight(pbuf, from, nbytes);
699*4882a593Smuzhiyun }
700*4882a593Smuzhiyun
701*4882a593Smuzhiyun /*
702*4882a593Smuzhiyun * Segmented PIO Copy - end
703*4882a593Smuzhiyun *
704*4882a593Smuzhiyun * Write any remainder (in pbuf->carry) and finish writing the whole block.
705*4882a593Smuzhiyun *
706*4882a593Smuzhiyun * @pbuf: a number of blocks allocated within a PIO send context
707*4882a593Smuzhiyun */
seg_pio_copy_end(struct pio_buf * pbuf)708*4882a593Smuzhiyun void seg_pio_copy_end(struct pio_buf *pbuf)
709*4882a593Smuzhiyun {
710*4882a593Smuzhiyun void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
711*4882a593Smuzhiyun
712*4882a593Smuzhiyun /*
713*4882a593Smuzhiyun * The two checks immediately below cannot both be true, hence the
714*4882a593Smuzhiyun * else. If we have wrapped, we cannot still be within the first
715*4882a593Smuzhiyun * block. Conversely, if we are still in the first block, we
716*4882a593Smuzhiyun * cannot have wrapped. We do the wrap check first as that is
717*4882a593Smuzhiyun * more likely.
718*4882a593Smuzhiyun */
719*4882a593Smuzhiyun /* adjust if we have wrapped */
720*4882a593Smuzhiyun if (dest >= pbuf->end)
721*4882a593Smuzhiyun dest -= pbuf->sc->size;
722*4882a593Smuzhiyun /* jump to the SOP range if within the first block */
723*4882a593Smuzhiyun else if (pbuf->qw_written < PIO_BLOCK_QWS)
724*4882a593Smuzhiyun dest += SOP_DISTANCE;
725*4882a593Smuzhiyun
726*4882a593Smuzhiyun /* write final bytes, if any */
727*4882a593Smuzhiyun if (carry_write8(pbuf, dest)) {
728*4882a593Smuzhiyun dest += sizeof(u64);
729*4882a593Smuzhiyun /*
730*4882a593Smuzhiyun * NOTE: We do not need to recalculate whether dest needs
731*4882a593Smuzhiyun * SOP_DISTANCE or not.
732*4882a593Smuzhiyun *
733*4882a593Smuzhiyun * If we are in the first block and the dangle write
734*4882a593Smuzhiyun * keeps us in the same block, dest will need
735*4882a593Smuzhiyun * to retain SOP_DISTANCE in the loop below.
736*4882a593Smuzhiyun *
737*4882a593Smuzhiyun * If we are in the first block and the dangle write pushes
738*4882a593Smuzhiyun * us to the next block, then loop below will not run
739*4882a593Smuzhiyun * and dest is not used. Hence we do not need to update
740*4882a593Smuzhiyun * it.
741*4882a593Smuzhiyun *
742*4882a593Smuzhiyun * If we are past the first block, then SOP_DISTANCE
743*4882a593Smuzhiyun * was never added, so there is nothing to do.
744*4882a593Smuzhiyun */
745*4882a593Smuzhiyun }
746*4882a593Smuzhiyun
747*4882a593Smuzhiyun /* fill in rest of block */
748*4882a593Smuzhiyun while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
749*4882a593Smuzhiyun writeq(0, dest);
750*4882a593Smuzhiyun dest += sizeof(u64);
751*4882a593Smuzhiyun }
752*4882a593Smuzhiyun
753*4882a593Smuzhiyun /* finished with this buffer */
754*4882a593Smuzhiyun this_cpu_dec(*pbuf->sc->buffers_allocated);
755*4882a593Smuzhiyun preempt_enable();
756*4882a593Smuzhiyun }
757