xref: /OK3568_Linux_fs/kernel/drivers/infiniband/hw/hfi1/pio_copy.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * Copyright(c) 2015, 2016 Intel Corporation.
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * This file is provided under a dual BSD/GPLv2 license.  When using or
5*4882a593Smuzhiyun  * redistributing this file, you may do so under either license.
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * GPL LICENSE SUMMARY
8*4882a593Smuzhiyun  *
9*4882a593Smuzhiyun  * This program is free software; you can redistribute it and/or modify
10*4882a593Smuzhiyun  * it under the terms of version 2 of the GNU General Public License as
11*4882a593Smuzhiyun  * published by the Free Software Foundation.
12*4882a593Smuzhiyun  *
13*4882a593Smuzhiyun  * This program is distributed in the hope that it will be useful, but
14*4882a593Smuzhiyun  * WITHOUT ANY WARRANTY; without even the implied warranty of
15*4882a593Smuzhiyun  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16*4882a593Smuzhiyun  * General Public License for more details.
17*4882a593Smuzhiyun  *
18*4882a593Smuzhiyun  * BSD LICENSE
19*4882a593Smuzhiyun  *
20*4882a593Smuzhiyun  * Redistribution and use in source and binary forms, with or without
21*4882a593Smuzhiyun  * modification, are permitted provided that the following conditions
22*4882a593Smuzhiyun  * are met:
23*4882a593Smuzhiyun  *
24*4882a593Smuzhiyun  *  - Redistributions of source code must retain the above copyright
25*4882a593Smuzhiyun  *    notice, this list of conditions and the following disclaimer.
26*4882a593Smuzhiyun  *  - Redistributions in binary form must reproduce the above copyright
27*4882a593Smuzhiyun  *    notice, this list of conditions and the following disclaimer in
28*4882a593Smuzhiyun  *    the documentation and/or other materials provided with the
29*4882a593Smuzhiyun  *    distribution.
30*4882a593Smuzhiyun  *  - Neither the name of Intel Corporation nor the names of its
31*4882a593Smuzhiyun  *    contributors may be used to endorse or promote products derived
32*4882a593Smuzhiyun  *    from this software without specific prior written permission.
33*4882a593Smuzhiyun  *
34*4882a593Smuzhiyun  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35*4882a593Smuzhiyun  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36*4882a593Smuzhiyun  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37*4882a593Smuzhiyun  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38*4882a593Smuzhiyun  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39*4882a593Smuzhiyun  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40*4882a593Smuzhiyun  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41*4882a593Smuzhiyun  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42*4882a593Smuzhiyun  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43*4882a593Smuzhiyun  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44*4882a593Smuzhiyun  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45*4882a593Smuzhiyun  *
46*4882a593Smuzhiyun  */
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun #include "hfi.h"
49*4882a593Smuzhiyun 
50*4882a593Smuzhiyun /* additive distance between non-SOP and SOP space */
51*4882a593Smuzhiyun #define SOP_DISTANCE (TXE_PIO_SIZE / 2)
52*4882a593Smuzhiyun #define PIO_BLOCK_MASK (PIO_BLOCK_SIZE - 1)
53*4882a593Smuzhiyun /* number of QUADWORDs in a block */
54*4882a593Smuzhiyun #define PIO_BLOCK_QWS (PIO_BLOCK_SIZE / sizeof(u64))
55*4882a593Smuzhiyun 
56*4882a593Smuzhiyun /**
57*4882a593Smuzhiyun  * pio_copy - copy data block to MMIO space
58*4882a593Smuzhiyun  * @pbuf: a number of blocks allocated within a PIO send context
59*4882a593Smuzhiyun  * @pbc: PBC to send
60*4882a593Smuzhiyun  * @from: source, must be 8 byte aligned
61*4882a593Smuzhiyun  * @count: number of DWORD (32-bit) quantities to copy from source
62*4882a593Smuzhiyun  *
63*4882a593Smuzhiyun  * Copy data from source to PIO Send Buffer memory, 8 bytes at a time.
64*4882a593Smuzhiyun  * Must always write full BLOCK_SIZE bytes blocks.  The first block must
65*4882a593Smuzhiyun  * be written to the corresponding SOP=1 address.
66*4882a593Smuzhiyun  *
67*4882a593Smuzhiyun  * Known:
68*4882a593Smuzhiyun  * o pbuf->start always starts on a block boundary
69*4882a593Smuzhiyun  * o pbuf can wrap only at a block boundary
70*4882a593Smuzhiyun  */
pio_copy(struct hfi1_devdata * dd,struct pio_buf * pbuf,u64 pbc,const void * from,size_t count)71*4882a593Smuzhiyun void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
72*4882a593Smuzhiyun 	      const void *from, size_t count)
73*4882a593Smuzhiyun {
74*4882a593Smuzhiyun 	void __iomem *dest = pbuf->start + SOP_DISTANCE;
75*4882a593Smuzhiyun 	void __iomem *send = dest + PIO_BLOCK_SIZE;
76*4882a593Smuzhiyun 	void __iomem *dend;			/* 8-byte data end */
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun 	/* write the PBC */
79*4882a593Smuzhiyun 	writeq(pbc, dest);
80*4882a593Smuzhiyun 	dest += sizeof(u64);
81*4882a593Smuzhiyun 
82*4882a593Smuzhiyun 	/* calculate where the QWORD data ends - in SOP=1 space */
83*4882a593Smuzhiyun 	dend = dest + ((count >> 1) * sizeof(u64));
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun 	if (dend < send) {
86*4882a593Smuzhiyun 		/*
87*4882a593Smuzhiyun 		 * all QWORD data is within the SOP block, does *not*
88*4882a593Smuzhiyun 		 * reach the end of the SOP block
89*4882a593Smuzhiyun 		 */
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun 		while (dest < dend) {
92*4882a593Smuzhiyun 			writeq(*(u64 *)from, dest);
93*4882a593Smuzhiyun 			from += sizeof(u64);
94*4882a593Smuzhiyun 			dest += sizeof(u64);
95*4882a593Smuzhiyun 		}
96*4882a593Smuzhiyun 		/*
97*4882a593Smuzhiyun 		 * No boundary checks are needed here:
98*4882a593Smuzhiyun 		 * 0. We're not on the SOP block boundary
99*4882a593Smuzhiyun 		 * 1. The possible DWORD dangle will still be within
100*4882a593Smuzhiyun 		 *    the SOP block
101*4882a593Smuzhiyun 		 * 2. We cannot wrap except on a block boundary.
102*4882a593Smuzhiyun 		 */
103*4882a593Smuzhiyun 	} else {
104*4882a593Smuzhiyun 		/* QWORD data extends _to_ or beyond the SOP block */
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun 		/* write 8-byte SOP chunk data */
107*4882a593Smuzhiyun 		while (dest < send) {
108*4882a593Smuzhiyun 			writeq(*(u64 *)from, dest);
109*4882a593Smuzhiyun 			from += sizeof(u64);
110*4882a593Smuzhiyun 			dest += sizeof(u64);
111*4882a593Smuzhiyun 		}
112*4882a593Smuzhiyun 		/* drop out of the SOP range */
113*4882a593Smuzhiyun 		dest -= SOP_DISTANCE;
114*4882a593Smuzhiyun 		dend -= SOP_DISTANCE;
115*4882a593Smuzhiyun 
116*4882a593Smuzhiyun 		/*
117*4882a593Smuzhiyun 		 * If the wrap comes before or matches the data end,
118*4882a593Smuzhiyun 		 * copy until until the wrap, then wrap.
119*4882a593Smuzhiyun 		 *
120*4882a593Smuzhiyun 		 * If the data ends at the end of the SOP above and
121*4882a593Smuzhiyun 		 * the buffer wraps, then pbuf->end == dend == dest
122*4882a593Smuzhiyun 		 * and nothing will get written, but we will wrap in
123*4882a593Smuzhiyun 		 * case there is a dangling DWORD.
124*4882a593Smuzhiyun 		 */
125*4882a593Smuzhiyun 		if (pbuf->end <= dend) {
126*4882a593Smuzhiyun 			while (dest < pbuf->end) {
127*4882a593Smuzhiyun 				writeq(*(u64 *)from, dest);
128*4882a593Smuzhiyun 				from += sizeof(u64);
129*4882a593Smuzhiyun 				dest += sizeof(u64);
130*4882a593Smuzhiyun 			}
131*4882a593Smuzhiyun 
132*4882a593Smuzhiyun 			dest -= pbuf->sc->size;
133*4882a593Smuzhiyun 			dend -= pbuf->sc->size;
134*4882a593Smuzhiyun 		}
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun 		/* write 8-byte non-SOP, non-wrap chunk data */
137*4882a593Smuzhiyun 		while (dest < dend) {
138*4882a593Smuzhiyun 			writeq(*(u64 *)from, dest);
139*4882a593Smuzhiyun 			from += sizeof(u64);
140*4882a593Smuzhiyun 			dest += sizeof(u64);
141*4882a593Smuzhiyun 		}
142*4882a593Smuzhiyun 	}
143*4882a593Smuzhiyun 	/* at this point we have wrapped if we are going to wrap */
144*4882a593Smuzhiyun 
145*4882a593Smuzhiyun 	/* write dangling u32, if any */
146*4882a593Smuzhiyun 	if (count & 1) {
147*4882a593Smuzhiyun 		union mix val;
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 		val.val64 = 0;
150*4882a593Smuzhiyun 		val.val32[0] = *(u32 *)from;
151*4882a593Smuzhiyun 		writeq(val.val64, dest);
152*4882a593Smuzhiyun 		dest += sizeof(u64);
153*4882a593Smuzhiyun 	}
154*4882a593Smuzhiyun 	/*
155*4882a593Smuzhiyun 	 * fill in rest of block, no need to check pbuf->end
156*4882a593Smuzhiyun 	 * as we only wrap on a block boundary
157*4882a593Smuzhiyun 	 */
158*4882a593Smuzhiyun 	while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
159*4882a593Smuzhiyun 		writeq(0, dest);
160*4882a593Smuzhiyun 		dest += sizeof(u64);
161*4882a593Smuzhiyun 	}
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 	/* finished with this buffer */
164*4882a593Smuzhiyun 	this_cpu_dec(*pbuf->sc->buffers_allocated);
165*4882a593Smuzhiyun 	preempt_enable();
166*4882a593Smuzhiyun }
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun /*
169*4882a593Smuzhiyun  * Handle carry bytes using shifts and masks.
170*4882a593Smuzhiyun  *
171*4882a593Smuzhiyun  * NOTE: the value the unused portion of carry is expected to always be zero.
172*4882a593Smuzhiyun  */
173*4882a593Smuzhiyun 
174*4882a593Smuzhiyun /*
175*4882a593Smuzhiyun  * "zero" shift - bit shift used to zero out upper bytes.  Input is
176*4882a593Smuzhiyun  * the count of LSB bytes to preserve.
177*4882a593Smuzhiyun  */
178*4882a593Smuzhiyun #define zshift(x) (8 * (8 - (x)))
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun /*
181*4882a593Smuzhiyun  * "merge" shift - bit shift used to merge with carry bytes.  Input is
182*4882a593Smuzhiyun  * the LSB byte count to move beyond.
183*4882a593Smuzhiyun  */
184*4882a593Smuzhiyun #define mshift(x) (8 * (x))
185*4882a593Smuzhiyun 
186*4882a593Smuzhiyun /*
187*4882a593Smuzhiyun  * Jump copy - no-loop copy for < 8 bytes.
188*4882a593Smuzhiyun  */
jcopy(u8 * dest,const u8 * src,u32 n)189*4882a593Smuzhiyun static inline void jcopy(u8 *dest, const u8 *src, u32 n)
190*4882a593Smuzhiyun {
191*4882a593Smuzhiyun 	switch (n) {
192*4882a593Smuzhiyun 	case 7:
193*4882a593Smuzhiyun 		*dest++ = *src++;
194*4882a593Smuzhiyun 		fallthrough;
195*4882a593Smuzhiyun 	case 6:
196*4882a593Smuzhiyun 		*dest++ = *src++;
197*4882a593Smuzhiyun 		fallthrough;
198*4882a593Smuzhiyun 	case 5:
199*4882a593Smuzhiyun 		*dest++ = *src++;
200*4882a593Smuzhiyun 		fallthrough;
201*4882a593Smuzhiyun 	case 4:
202*4882a593Smuzhiyun 		*dest++ = *src++;
203*4882a593Smuzhiyun 		fallthrough;
204*4882a593Smuzhiyun 	case 3:
205*4882a593Smuzhiyun 		*dest++ = *src++;
206*4882a593Smuzhiyun 		fallthrough;
207*4882a593Smuzhiyun 	case 2:
208*4882a593Smuzhiyun 		*dest++ = *src++;
209*4882a593Smuzhiyun 		fallthrough;
210*4882a593Smuzhiyun 	case 1:
211*4882a593Smuzhiyun 		*dest++ = *src++;
212*4882a593Smuzhiyun 	}
213*4882a593Smuzhiyun }
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun /*
216*4882a593Smuzhiyun  * Read nbytes from "from" and and place them in the low bytes
217*4882a593Smuzhiyun  * of pbuf->carry.  Other bytes are left as-is.  Any previous
218*4882a593Smuzhiyun  * value in pbuf->carry is lost.
219*4882a593Smuzhiyun  *
220*4882a593Smuzhiyun  * NOTES:
221*4882a593Smuzhiyun  * o do not read from from if nbytes is zero
222*4882a593Smuzhiyun  * o from may _not_ be u64 aligned.
223*4882a593Smuzhiyun  */
read_low_bytes(struct pio_buf * pbuf,const void * from,unsigned int nbytes)224*4882a593Smuzhiyun static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
225*4882a593Smuzhiyun 				  unsigned int nbytes)
226*4882a593Smuzhiyun {
227*4882a593Smuzhiyun 	pbuf->carry.val64 = 0;
228*4882a593Smuzhiyun 	jcopy(&pbuf->carry.val8[0], from, nbytes);
229*4882a593Smuzhiyun 	pbuf->carry_bytes = nbytes;
230*4882a593Smuzhiyun }
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun /*
233*4882a593Smuzhiyun  * Read nbytes bytes from "from" and put them at the end of pbuf->carry.
234*4882a593Smuzhiyun  * It is expected that the extra read does not overfill carry.
235*4882a593Smuzhiyun  *
236*4882a593Smuzhiyun  * NOTES:
237*4882a593Smuzhiyun  * o from may _not_ be u64 aligned
238*4882a593Smuzhiyun  * o nbytes may span a QW boundary
239*4882a593Smuzhiyun  */
read_extra_bytes(struct pio_buf * pbuf,const void * from,unsigned int nbytes)240*4882a593Smuzhiyun static inline void read_extra_bytes(struct pio_buf *pbuf,
241*4882a593Smuzhiyun 				    const void *from, unsigned int nbytes)
242*4882a593Smuzhiyun {
243*4882a593Smuzhiyun 	jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes);
244*4882a593Smuzhiyun 	pbuf->carry_bytes += nbytes;
245*4882a593Smuzhiyun }
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun /*
248*4882a593Smuzhiyun  * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
249*4882a593Smuzhiyun  * Put the unused part of the next 8 bytes of src into the LSB bytes of
250*4882a593Smuzhiyun  * pbuf->carry with the upper bytes zeroed..
251*4882a593Smuzhiyun  *
252*4882a593Smuzhiyun  * NOTES:
253*4882a593Smuzhiyun  * o result must keep unused bytes zeroed
254*4882a593Smuzhiyun  * o src must be u64 aligned
255*4882a593Smuzhiyun  */
merge_write8(struct pio_buf * pbuf,void __iomem * dest,const void * src)256*4882a593Smuzhiyun static inline void merge_write8(
257*4882a593Smuzhiyun 	struct pio_buf *pbuf,
258*4882a593Smuzhiyun 	void __iomem *dest,
259*4882a593Smuzhiyun 	const void *src)
260*4882a593Smuzhiyun {
261*4882a593Smuzhiyun 	u64 new, temp;
262*4882a593Smuzhiyun 
263*4882a593Smuzhiyun 	new = *(u64 *)src;
264*4882a593Smuzhiyun 	temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
265*4882a593Smuzhiyun 	writeq(temp, dest);
266*4882a593Smuzhiyun 	pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
267*4882a593Smuzhiyun }
268*4882a593Smuzhiyun 
269*4882a593Smuzhiyun /*
270*4882a593Smuzhiyun  * Write a quad word using all bytes of carry.
271*4882a593Smuzhiyun  */
carry8_write8(union mix carry,void __iomem * dest)272*4882a593Smuzhiyun static inline void carry8_write8(union mix carry, void __iomem *dest)
273*4882a593Smuzhiyun {
274*4882a593Smuzhiyun 	writeq(carry.val64, dest);
275*4882a593Smuzhiyun }
276*4882a593Smuzhiyun 
277*4882a593Smuzhiyun /*
278*4882a593Smuzhiyun  * Write a quad word using all the valid bytes of carry.  If carry
279*4882a593Smuzhiyun  * has zero valid bytes, nothing is written.
280*4882a593Smuzhiyun  * Returns 0 on nothing written, non-zero on quad word written.
281*4882a593Smuzhiyun  */
carry_write8(struct pio_buf * pbuf,void __iomem * dest)282*4882a593Smuzhiyun static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
283*4882a593Smuzhiyun {
284*4882a593Smuzhiyun 	if (pbuf->carry_bytes) {
285*4882a593Smuzhiyun 		/* unused bytes are always kept zeroed, so just write */
286*4882a593Smuzhiyun 		writeq(pbuf->carry.val64, dest);
287*4882a593Smuzhiyun 		return 1;
288*4882a593Smuzhiyun 	}
289*4882a593Smuzhiyun 
290*4882a593Smuzhiyun 	return 0;
291*4882a593Smuzhiyun }
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun /*
294*4882a593Smuzhiyun  * Segmented PIO Copy - start
295*4882a593Smuzhiyun  *
296*4882a593Smuzhiyun  * Start a PIO copy.
297*4882a593Smuzhiyun  *
298*4882a593Smuzhiyun  * @pbuf: destination buffer
299*4882a593Smuzhiyun  * @pbc: the PBC for the PIO buffer
300*4882a593Smuzhiyun  * @from: data source, QWORD aligned
301*4882a593Smuzhiyun  * @nbytes: bytes to copy
302*4882a593Smuzhiyun  */
seg_pio_copy_start(struct pio_buf * pbuf,u64 pbc,const void * from,size_t nbytes)303*4882a593Smuzhiyun void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
304*4882a593Smuzhiyun 			const void *from, size_t nbytes)
305*4882a593Smuzhiyun {
306*4882a593Smuzhiyun 	void __iomem *dest = pbuf->start + SOP_DISTANCE;
307*4882a593Smuzhiyun 	void __iomem *send = dest + PIO_BLOCK_SIZE;
308*4882a593Smuzhiyun 	void __iomem *dend;			/* 8-byte data end */
309*4882a593Smuzhiyun 
310*4882a593Smuzhiyun 	writeq(pbc, dest);
311*4882a593Smuzhiyun 	dest += sizeof(u64);
312*4882a593Smuzhiyun 
313*4882a593Smuzhiyun 	/* calculate where the QWORD data ends - in SOP=1 space */
314*4882a593Smuzhiyun 	dend = dest + ((nbytes >> 3) * sizeof(u64));
315*4882a593Smuzhiyun 
316*4882a593Smuzhiyun 	if (dend < send) {
317*4882a593Smuzhiyun 		/*
318*4882a593Smuzhiyun 		 * all QWORD data is within the SOP block, does *not*
319*4882a593Smuzhiyun 		 * reach the end of the SOP block
320*4882a593Smuzhiyun 		 */
321*4882a593Smuzhiyun 
322*4882a593Smuzhiyun 		while (dest < dend) {
323*4882a593Smuzhiyun 			writeq(*(u64 *)from, dest);
324*4882a593Smuzhiyun 			from += sizeof(u64);
325*4882a593Smuzhiyun 			dest += sizeof(u64);
326*4882a593Smuzhiyun 		}
327*4882a593Smuzhiyun 		/*
328*4882a593Smuzhiyun 		 * No boundary checks are needed here:
329*4882a593Smuzhiyun 		 * 0. We're not on the SOP block boundary
330*4882a593Smuzhiyun 		 * 1. The possible DWORD dangle will still be within
331*4882a593Smuzhiyun 		 *    the SOP block
332*4882a593Smuzhiyun 		 * 2. We cannot wrap except on a block boundary.
333*4882a593Smuzhiyun 		 */
334*4882a593Smuzhiyun 	} else {
335*4882a593Smuzhiyun 		/* QWORD data extends _to_ or beyond the SOP block */
336*4882a593Smuzhiyun 
337*4882a593Smuzhiyun 		/* write 8-byte SOP chunk data */
338*4882a593Smuzhiyun 		while (dest < send) {
339*4882a593Smuzhiyun 			writeq(*(u64 *)from, dest);
340*4882a593Smuzhiyun 			from += sizeof(u64);
341*4882a593Smuzhiyun 			dest += sizeof(u64);
342*4882a593Smuzhiyun 		}
343*4882a593Smuzhiyun 		/* drop out of the SOP range */
344*4882a593Smuzhiyun 		dest -= SOP_DISTANCE;
345*4882a593Smuzhiyun 		dend -= SOP_DISTANCE;
346*4882a593Smuzhiyun 
347*4882a593Smuzhiyun 		/*
348*4882a593Smuzhiyun 		 * If the wrap comes before or matches the data end,
349*4882a593Smuzhiyun 		 * copy until until the wrap, then wrap.
350*4882a593Smuzhiyun 		 *
351*4882a593Smuzhiyun 		 * If the data ends at the end of the SOP above and
352*4882a593Smuzhiyun 		 * the buffer wraps, then pbuf->end == dend == dest
353*4882a593Smuzhiyun 		 * and nothing will get written, but we will wrap in
354*4882a593Smuzhiyun 		 * case there is a dangling DWORD.
355*4882a593Smuzhiyun 		 */
356*4882a593Smuzhiyun 		if (pbuf->end <= dend) {
357*4882a593Smuzhiyun 			while (dest < pbuf->end) {
358*4882a593Smuzhiyun 				writeq(*(u64 *)from, dest);
359*4882a593Smuzhiyun 				from += sizeof(u64);
360*4882a593Smuzhiyun 				dest += sizeof(u64);
361*4882a593Smuzhiyun 			}
362*4882a593Smuzhiyun 
363*4882a593Smuzhiyun 			dest -= pbuf->sc->size;
364*4882a593Smuzhiyun 			dend -= pbuf->sc->size;
365*4882a593Smuzhiyun 		}
366*4882a593Smuzhiyun 
367*4882a593Smuzhiyun 		/* write 8-byte non-SOP, non-wrap chunk data */
368*4882a593Smuzhiyun 		while (dest < dend) {
369*4882a593Smuzhiyun 			writeq(*(u64 *)from, dest);
370*4882a593Smuzhiyun 			from += sizeof(u64);
371*4882a593Smuzhiyun 			dest += sizeof(u64);
372*4882a593Smuzhiyun 		}
373*4882a593Smuzhiyun 	}
374*4882a593Smuzhiyun 	/* at this point we have wrapped if we are going to wrap */
375*4882a593Smuzhiyun 
376*4882a593Smuzhiyun 	/* ...but it doesn't matter as we're done writing */
377*4882a593Smuzhiyun 
378*4882a593Smuzhiyun 	/* save dangling bytes, if any */
379*4882a593Smuzhiyun 	read_low_bytes(pbuf, from, nbytes & 0x7);
380*4882a593Smuzhiyun 
381*4882a593Smuzhiyun 	pbuf->qw_written = 1 /*PBC*/ + (nbytes >> 3);
382*4882a593Smuzhiyun }
383*4882a593Smuzhiyun 
384*4882a593Smuzhiyun /*
385*4882a593Smuzhiyun  * Mid copy helper, "mixed case" - source is 64-bit aligned but carry
386*4882a593Smuzhiyun  * bytes are non-zero.
387*4882a593Smuzhiyun  *
388*4882a593Smuzhiyun  * Whole u64s must be written to the chip, so bytes must be manually merged.
389*4882a593Smuzhiyun  *
390*4882a593Smuzhiyun  * @pbuf: destination buffer
391*4882a593Smuzhiyun  * @from: data source, is QWORD aligned.
392*4882a593Smuzhiyun  * @nbytes: bytes to copy
393*4882a593Smuzhiyun  *
394*4882a593Smuzhiyun  * Must handle nbytes < 8.
395*4882a593Smuzhiyun  */
mid_copy_mix(struct pio_buf * pbuf,const void * from,size_t nbytes)396*4882a593Smuzhiyun static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
397*4882a593Smuzhiyun {
398*4882a593Smuzhiyun 	void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
399*4882a593Smuzhiyun 	void __iomem *dend;			/* 8-byte data end */
400*4882a593Smuzhiyun 	unsigned long qw_to_write = nbytes >> 3;
401*4882a593Smuzhiyun 	unsigned long bytes_left = nbytes & 0x7;
402*4882a593Smuzhiyun 
403*4882a593Smuzhiyun 	/* calculate 8-byte data end */
404*4882a593Smuzhiyun 	dend = dest + (qw_to_write * sizeof(u64));
405*4882a593Smuzhiyun 
406*4882a593Smuzhiyun 	if (pbuf->qw_written < PIO_BLOCK_QWS) {
407*4882a593Smuzhiyun 		/*
408*4882a593Smuzhiyun 		 * Still within SOP block.  We don't need to check for
409*4882a593Smuzhiyun 		 * wrap because we are still in the first block and
410*4882a593Smuzhiyun 		 * can only wrap on block boundaries.
411*4882a593Smuzhiyun 		 */
412*4882a593Smuzhiyun 		void __iomem *send;		/* SOP end */
413*4882a593Smuzhiyun 		void __iomem *xend;
414*4882a593Smuzhiyun 
415*4882a593Smuzhiyun 		/*
416*4882a593Smuzhiyun 		 * calculate the end of data or end of block, whichever
417*4882a593Smuzhiyun 		 * comes first
418*4882a593Smuzhiyun 		 */
419*4882a593Smuzhiyun 		send = pbuf->start + PIO_BLOCK_SIZE;
420*4882a593Smuzhiyun 		xend = min(send, dend);
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun 		/* shift up to SOP=1 space */
423*4882a593Smuzhiyun 		dest += SOP_DISTANCE;
424*4882a593Smuzhiyun 		xend += SOP_DISTANCE;
425*4882a593Smuzhiyun 
426*4882a593Smuzhiyun 		/* write 8-byte chunk data */
427*4882a593Smuzhiyun 		while (dest < xend) {
428*4882a593Smuzhiyun 			merge_write8(pbuf, dest, from);
429*4882a593Smuzhiyun 			from += sizeof(u64);
430*4882a593Smuzhiyun 			dest += sizeof(u64);
431*4882a593Smuzhiyun 		}
432*4882a593Smuzhiyun 
433*4882a593Smuzhiyun 		/* shift down to SOP=0 space */
434*4882a593Smuzhiyun 		dest -= SOP_DISTANCE;
435*4882a593Smuzhiyun 	}
436*4882a593Smuzhiyun 	/*
437*4882a593Smuzhiyun 	 * At this point dest could be (either, both, or neither):
438*4882a593Smuzhiyun 	 * - at dend
439*4882a593Smuzhiyun 	 * - at the wrap
440*4882a593Smuzhiyun 	 */
441*4882a593Smuzhiyun 
442*4882a593Smuzhiyun 	/*
443*4882a593Smuzhiyun 	 * If the wrap comes before or matches the data end,
444*4882a593Smuzhiyun 	 * copy until until the wrap, then wrap.
445*4882a593Smuzhiyun 	 *
446*4882a593Smuzhiyun 	 * If dest is at the wrap, we will fall into the if,
447*4882a593Smuzhiyun 	 * not do the loop, when wrap.
448*4882a593Smuzhiyun 	 *
449*4882a593Smuzhiyun 	 * If the data ends at the end of the SOP above and
450*4882a593Smuzhiyun 	 * the buffer wraps, then pbuf->end == dend == dest
451*4882a593Smuzhiyun 	 * and nothing will get written.
452*4882a593Smuzhiyun 	 */
453*4882a593Smuzhiyun 	if (pbuf->end <= dend) {
454*4882a593Smuzhiyun 		while (dest < pbuf->end) {
455*4882a593Smuzhiyun 			merge_write8(pbuf, dest, from);
456*4882a593Smuzhiyun 			from += sizeof(u64);
457*4882a593Smuzhiyun 			dest += sizeof(u64);
458*4882a593Smuzhiyun 		}
459*4882a593Smuzhiyun 
460*4882a593Smuzhiyun 		dest -= pbuf->sc->size;
461*4882a593Smuzhiyun 		dend -= pbuf->sc->size;
462*4882a593Smuzhiyun 	}
463*4882a593Smuzhiyun 
464*4882a593Smuzhiyun 	/* write 8-byte non-SOP, non-wrap chunk data */
465*4882a593Smuzhiyun 	while (dest < dend) {
466*4882a593Smuzhiyun 		merge_write8(pbuf, dest, from);
467*4882a593Smuzhiyun 		from += sizeof(u64);
468*4882a593Smuzhiyun 		dest += sizeof(u64);
469*4882a593Smuzhiyun 	}
470*4882a593Smuzhiyun 
471*4882a593Smuzhiyun 	pbuf->qw_written += qw_to_write;
472*4882a593Smuzhiyun 
473*4882a593Smuzhiyun 	/* handle carry and left-over bytes */
474*4882a593Smuzhiyun 	if (pbuf->carry_bytes + bytes_left >= 8) {
475*4882a593Smuzhiyun 		unsigned long nread;
476*4882a593Smuzhiyun 
477*4882a593Smuzhiyun 		/* there is enough to fill another qw - fill carry */
478*4882a593Smuzhiyun 		nread = 8 - pbuf->carry_bytes;
479*4882a593Smuzhiyun 		read_extra_bytes(pbuf, from, nread);
480*4882a593Smuzhiyun 
481*4882a593Smuzhiyun 		/*
482*4882a593Smuzhiyun 		 * One more write - but need to make sure dest is correct.
483*4882a593Smuzhiyun 		 * Check for wrap and the possibility the write
484*4882a593Smuzhiyun 		 * should be in SOP space.
485*4882a593Smuzhiyun 		 *
486*4882a593Smuzhiyun 		 * The two checks immediately below cannot both be true, hence
487*4882a593Smuzhiyun 		 * the else. If we have wrapped, we cannot still be within the
488*4882a593Smuzhiyun 		 * first block. Conversely, if we are still in the first block,
489*4882a593Smuzhiyun 		 * we cannot have wrapped. We do the wrap check first as that
490*4882a593Smuzhiyun 		 * is more likely.
491*4882a593Smuzhiyun 		 */
492*4882a593Smuzhiyun 		/* adjust if we have wrapped */
493*4882a593Smuzhiyun 		if (dest >= pbuf->end)
494*4882a593Smuzhiyun 			dest -= pbuf->sc->size;
495*4882a593Smuzhiyun 		/* jump to the SOP range if within the first block */
496*4882a593Smuzhiyun 		else if (pbuf->qw_written < PIO_BLOCK_QWS)
497*4882a593Smuzhiyun 			dest += SOP_DISTANCE;
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 		/* flush out full carry */
500*4882a593Smuzhiyun 		carry8_write8(pbuf->carry, dest);
501*4882a593Smuzhiyun 		pbuf->qw_written++;
502*4882a593Smuzhiyun 
503*4882a593Smuzhiyun 		/* now adjust and read the rest of the bytes into carry */
504*4882a593Smuzhiyun 		bytes_left -= nread;
505*4882a593Smuzhiyun 		from += nread; /* from is now not aligned */
506*4882a593Smuzhiyun 		read_low_bytes(pbuf, from, bytes_left);
507*4882a593Smuzhiyun 	} else {
508*4882a593Smuzhiyun 		/* not enough to fill another qw, append the rest to carry */
509*4882a593Smuzhiyun 		read_extra_bytes(pbuf, from, bytes_left);
510*4882a593Smuzhiyun 	}
511*4882a593Smuzhiyun }
512*4882a593Smuzhiyun 
513*4882a593Smuzhiyun /*
514*4882a593Smuzhiyun  * Mid copy helper, "straight case" - source pointer is 64-bit aligned
515*4882a593Smuzhiyun  * with no carry bytes.
516*4882a593Smuzhiyun  *
517*4882a593Smuzhiyun  * @pbuf: destination buffer
518*4882a593Smuzhiyun  * @from: data source, is QWORD aligned
519*4882a593Smuzhiyun  * @nbytes: bytes to copy
520*4882a593Smuzhiyun  *
521*4882a593Smuzhiyun  * Must handle nbytes < 8.
522*4882a593Smuzhiyun  */
mid_copy_straight(struct pio_buf * pbuf,const void * from,size_t nbytes)523*4882a593Smuzhiyun static void mid_copy_straight(struct pio_buf *pbuf,
524*4882a593Smuzhiyun 			      const void *from, size_t nbytes)
525*4882a593Smuzhiyun {
526*4882a593Smuzhiyun 	void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
527*4882a593Smuzhiyun 	void __iomem *dend;			/* 8-byte data end */
528*4882a593Smuzhiyun 
529*4882a593Smuzhiyun 	/* calculate 8-byte data end */
530*4882a593Smuzhiyun 	dend = dest + ((nbytes >> 3) * sizeof(u64));
531*4882a593Smuzhiyun 
532*4882a593Smuzhiyun 	if (pbuf->qw_written < PIO_BLOCK_QWS) {
533*4882a593Smuzhiyun 		/*
534*4882a593Smuzhiyun 		 * Still within SOP block.  We don't need to check for
535*4882a593Smuzhiyun 		 * wrap because we are still in the first block and
536*4882a593Smuzhiyun 		 * can only wrap on block boundaries.
537*4882a593Smuzhiyun 		 */
538*4882a593Smuzhiyun 		void __iomem *send;		/* SOP end */
539*4882a593Smuzhiyun 		void __iomem *xend;
540*4882a593Smuzhiyun 
541*4882a593Smuzhiyun 		/*
542*4882a593Smuzhiyun 		 * calculate the end of data or end of block, whichever
543*4882a593Smuzhiyun 		 * comes first
544*4882a593Smuzhiyun 		 */
545*4882a593Smuzhiyun 		send = pbuf->start + PIO_BLOCK_SIZE;
546*4882a593Smuzhiyun 		xend = min(send, dend);
547*4882a593Smuzhiyun 
548*4882a593Smuzhiyun 		/* shift up to SOP=1 space */
549*4882a593Smuzhiyun 		dest += SOP_DISTANCE;
550*4882a593Smuzhiyun 		xend += SOP_DISTANCE;
551*4882a593Smuzhiyun 
552*4882a593Smuzhiyun 		/* write 8-byte chunk data */
553*4882a593Smuzhiyun 		while (dest < xend) {
554*4882a593Smuzhiyun 			writeq(*(u64 *)from, dest);
555*4882a593Smuzhiyun 			from += sizeof(u64);
556*4882a593Smuzhiyun 			dest += sizeof(u64);
557*4882a593Smuzhiyun 		}
558*4882a593Smuzhiyun 
559*4882a593Smuzhiyun 		/* shift down to SOP=0 space */
560*4882a593Smuzhiyun 		dest -= SOP_DISTANCE;
561*4882a593Smuzhiyun 	}
562*4882a593Smuzhiyun 	/*
563*4882a593Smuzhiyun 	 * At this point dest could be (either, both, or neither):
564*4882a593Smuzhiyun 	 * - at dend
565*4882a593Smuzhiyun 	 * - at the wrap
566*4882a593Smuzhiyun 	 */
567*4882a593Smuzhiyun 
568*4882a593Smuzhiyun 	/*
569*4882a593Smuzhiyun 	 * If the wrap comes before or matches the data end,
570*4882a593Smuzhiyun 	 * copy until until the wrap, then wrap.
571*4882a593Smuzhiyun 	 *
572*4882a593Smuzhiyun 	 * If dest is at the wrap, we will fall into the if,
573*4882a593Smuzhiyun 	 * not do the loop, when wrap.
574*4882a593Smuzhiyun 	 *
575*4882a593Smuzhiyun 	 * If the data ends at the end of the SOP above and
576*4882a593Smuzhiyun 	 * the buffer wraps, then pbuf->end == dend == dest
577*4882a593Smuzhiyun 	 * and nothing will get written.
578*4882a593Smuzhiyun 	 */
579*4882a593Smuzhiyun 	if (pbuf->end <= dend) {
580*4882a593Smuzhiyun 		while (dest < pbuf->end) {
581*4882a593Smuzhiyun 			writeq(*(u64 *)from, dest);
582*4882a593Smuzhiyun 			from += sizeof(u64);
583*4882a593Smuzhiyun 			dest += sizeof(u64);
584*4882a593Smuzhiyun 		}
585*4882a593Smuzhiyun 
586*4882a593Smuzhiyun 		dest -= pbuf->sc->size;
587*4882a593Smuzhiyun 		dend -= pbuf->sc->size;
588*4882a593Smuzhiyun 	}
589*4882a593Smuzhiyun 
590*4882a593Smuzhiyun 	/* write 8-byte non-SOP, non-wrap chunk data */
591*4882a593Smuzhiyun 	while (dest < dend) {
592*4882a593Smuzhiyun 		writeq(*(u64 *)from, dest);
593*4882a593Smuzhiyun 		from += sizeof(u64);
594*4882a593Smuzhiyun 		dest += sizeof(u64);
595*4882a593Smuzhiyun 	}
596*4882a593Smuzhiyun 
597*4882a593Smuzhiyun 	/* we know carry_bytes was zero on entry to this routine */
598*4882a593Smuzhiyun 	read_low_bytes(pbuf, from, nbytes & 0x7);
599*4882a593Smuzhiyun 
600*4882a593Smuzhiyun 	pbuf->qw_written += nbytes >> 3;
601*4882a593Smuzhiyun }
602*4882a593Smuzhiyun 
603*4882a593Smuzhiyun /*
604*4882a593Smuzhiyun  * Segmented PIO Copy - middle
605*4882a593Smuzhiyun  *
606*4882a593Smuzhiyun  * Must handle any aligned tail and any aligned source with any byte count.
607*4882a593Smuzhiyun  *
608*4882a593Smuzhiyun  * @pbuf: a number of blocks allocated within a PIO send context
609*4882a593Smuzhiyun  * @from: data source
610*4882a593Smuzhiyun  * @nbytes: number of bytes to copy
611*4882a593Smuzhiyun  */
seg_pio_copy_mid(struct pio_buf * pbuf,const void * from,size_t nbytes)612*4882a593Smuzhiyun void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
613*4882a593Smuzhiyun {
614*4882a593Smuzhiyun 	unsigned long from_align = (unsigned long)from & 0x7;
615*4882a593Smuzhiyun 
616*4882a593Smuzhiyun 	if (pbuf->carry_bytes + nbytes < 8) {
617*4882a593Smuzhiyun 		/* not enough bytes to fill a QW */
618*4882a593Smuzhiyun 		read_extra_bytes(pbuf, from, nbytes);
619*4882a593Smuzhiyun 		return;
620*4882a593Smuzhiyun 	}
621*4882a593Smuzhiyun 
622*4882a593Smuzhiyun 	if (from_align) {
623*4882a593Smuzhiyun 		/* misaligned source pointer - align it */
624*4882a593Smuzhiyun 		unsigned long to_align;
625*4882a593Smuzhiyun 
626*4882a593Smuzhiyun 		/* bytes to read to align "from" */
627*4882a593Smuzhiyun 		to_align = 8 - from_align;
628*4882a593Smuzhiyun 
629*4882a593Smuzhiyun 		/*
630*4882a593Smuzhiyun 		 * In the advance-to-alignment logic below, we do not need
631*4882a593Smuzhiyun 		 * to check if we are using more than nbytes.  This is because
632*4882a593Smuzhiyun 		 * if we are here, we already know that carry+nbytes will
633*4882a593Smuzhiyun 		 * fill at least one QW.
634*4882a593Smuzhiyun 		 */
635*4882a593Smuzhiyun 		if (pbuf->carry_bytes + to_align < 8) {
636*4882a593Smuzhiyun 			/* not enough align bytes to fill a QW */
637*4882a593Smuzhiyun 			read_extra_bytes(pbuf, from, to_align);
638*4882a593Smuzhiyun 			from += to_align;
639*4882a593Smuzhiyun 			nbytes -= to_align;
640*4882a593Smuzhiyun 		} else {
641*4882a593Smuzhiyun 			/* bytes to fill carry */
642*4882a593Smuzhiyun 			unsigned long to_fill = 8 - pbuf->carry_bytes;
643*4882a593Smuzhiyun 			/* bytes left over to be read */
644*4882a593Smuzhiyun 			unsigned long extra = to_align - to_fill;
645*4882a593Smuzhiyun 			void __iomem *dest;
646*4882a593Smuzhiyun 
647*4882a593Smuzhiyun 			/* fill carry... */
648*4882a593Smuzhiyun 			read_extra_bytes(pbuf, from, to_fill);
649*4882a593Smuzhiyun 			from += to_fill;
650*4882a593Smuzhiyun 			nbytes -= to_fill;
651*4882a593Smuzhiyun 			/* may not be enough valid bytes left to align */
652*4882a593Smuzhiyun 			if (extra > nbytes)
653*4882a593Smuzhiyun 				extra = nbytes;
654*4882a593Smuzhiyun 
655*4882a593Smuzhiyun 			/* ...now write carry */
656*4882a593Smuzhiyun 			dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
657*4882a593Smuzhiyun 
658*4882a593Smuzhiyun 			/*
659*4882a593Smuzhiyun 			 * The two checks immediately below cannot both be
660*4882a593Smuzhiyun 			 * true, hence the else.  If we have wrapped, we
661*4882a593Smuzhiyun 			 * cannot still be within the first block.
662*4882a593Smuzhiyun 			 * Conversely, if we are still in the first block, we
663*4882a593Smuzhiyun 			 * cannot have wrapped.  We do the wrap check first
664*4882a593Smuzhiyun 			 * as that is more likely.
665*4882a593Smuzhiyun 			 */
666*4882a593Smuzhiyun 			/* adjust if we've wrapped */
667*4882a593Smuzhiyun 			if (dest >= pbuf->end)
668*4882a593Smuzhiyun 				dest -= pbuf->sc->size;
669*4882a593Smuzhiyun 			/* jump to SOP range if within the first block */
670*4882a593Smuzhiyun 			else if (pbuf->qw_written < PIO_BLOCK_QWS)
671*4882a593Smuzhiyun 				dest += SOP_DISTANCE;
672*4882a593Smuzhiyun 
673*4882a593Smuzhiyun 			carry8_write8(pbuf->carry, dest);
674*4882a593Smuzhiyun 			pbuf->qw_written++;
675*4882a593Smuzhiyun 
676*4882a593Smuzhiyun 			/* read any extra bytes to do final alignment */
677*4882a593Smuzhiyun 			/* this will overwrite anything in pbuf->carry */
678*4882a593Smuzhiyun 			read_low_bytes(pbuf, from, extra);
679*4882a593Smuzhiyun 			from += extra;
680*4882a593Smuzhiyun 			nbytes -= extra;
681*4882a593Smuzhiyun 			/*
682*4882a593Smuzhiyun 			 * If no bytes are left, return early - we are done.
683*4882a593Smuzhiyun 			 * NOTE: This short-circuit is *required* because
684*4882a593Smuzhiyun 			 * "extra" may have been reduced in size and "from"
685*4882a593Smuzhiyun 			 * is not aligned, as required when leaving this
686*4882a593Smuzhiyun 			 * if block.
687*4882a593Smuzhiyun 			 */
688*4882a593Smuzhiyun 			if (nbytes == 0)
689*4882a593Smuzhiyun 				return;
690*4882a593Smuzhiyun 		}
691*4882a593Smuzhiyun 
692*4882a593Smuzhiyun 		/* at this point, from is QW aligned */
693*4882a593Smuzhiyun 	}
694*4882a593Smuzhiyun 
695*4882a593Smuzhiyun 	if (pbuf->carry_bytes)
696*4882a593Smuzhiyun 		mid_copy_mix(pbuf, from, nbytes);
697*4882a593Smuzhiyun 	else
698*4882a593Smuzhiyun 		mid_copy_straight(pbuf, from, nbytes);
699*4882a593Smuzhiyun }
700*4882a593Smuzhiyun 
701*4882a593Smuzhiyun /*
702*4882a593Smuzhiyun  * Segmented PIO Copy - end
703*4882a593Smuzhiyun  *
704*4882a593Smuzhiyun  * Write any remainder (in pbuf->carry) and finish writing the whole block.
705*4882a593Smuzhiyun  *
706*4882a593Smuzhiyun  * @pbuf: a number of blocks allocated within a PIO send context
707*4882a593Smuzhiyun  */
seg_pio_copy_end(struct pio_buf * pbuf)708*4882a593Smuzhiyun void seg_pio_copy_end(struct pio_buf *pbuf)
709*4882a593Smuzhiyun {
710*4882a593Smuzhiyun 	void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
711*4882a593Smuzhiyun 
712*4882a593Smuzhiyun 	/*
713*4882a593Smuzhiyun 	 * The two checks immediately below cannot both be true, hence the
714*4882a593Smuzhiyun 	 * else.  If we have wrapped, we cannot still be within the first
715*4882a593Smuzhiyun 	 * block.  Conversely, if we are still in the first block, we
716*4882a593Smuzhiyun 	 * cannot have wrapped.  We do the wrap check first as that is
717*4882a593Smuzhiyun 	 * more likely.
718*4882a593Smuzhiyun 	 */
719*4882a593Smuzhiyun 	/* adjust if we have wrapped */
720*4882a593Smuzhiyun 	if (dest >= pbuf->end)
721*4882a593Smuzhiyun 		dest -= pbuf->sc->size;
722*4882a593Smuzhiyun 	/* jump to the SOP range if within the first block */
723*4882a593Smuzhiyun 	else if (pbuf->qw_written < PIO_BLOCK_QWS)
724*4882a593Smuzhiyun 		dest += SOP_DISTANCE;
725*4882a593Smuzhiyun 
726*4882a593Smuzhiyun 	/* write final bytes, if any */
727*4882a593Smuzhiyun 	if (carry_write8(pbuf, dest)) {
728*4882a593Smuzhiyun 		dest += sizeof(u64);
729*4882a593Smuzhiyun 		/*
730*4882a593Smuzhiyun 		 * NOTE: We do not need to recalculate whether dest needs
731*4882a593Smuzhiyun 		 * SOP_DISTANCE or not.
732*4882a593Smuzhiyun 		 *
733*4882a593Smuzhiyun 		 * If we are in the first block and the dangle write
734*4882a593Smuzhiyun 		 * keeps us in the same block, dest will need
735*4882a593Smuzhiyun 		 * to retain SOP_DISTANCE in the loop below.
736*4882a593Smuzhiyun 		 *
737*4882a593Smuzhiyun 		 * If we are in the first block and the dangle write pushes
738*4882a593Smuzhiyun 		 * us to the next block, then loop below will not run
739*4882a593Smuzhiyun 		 * and dest is not used.  Hence we do not need to update
740*4882a593Smuzhiyun 		 * it.
741*4882a593Smuzhiyun 		 *
742*4882a593Smuzhiyun 		 * If we are past the first block, then SOP_DISTANCE
743*4882a593Smuzhiyun 		 * was never added, so there is nothing to do.
744*4882a593Smuzhiyun 		 */
745*4882a593Smuzhiyun 	}
746*4882a593Smuzhiyun 
747*4882a593Smuzhiyun 	/* fill in rest of block */
748*4882a593Smuzhiyun 	while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
749*4882a593Smuzhiyun 		writeq(0, dest);
750*4882a593Smuzhiyun 		dest += sizeof(u64);
751*4882a593Smuzhiyun 	}
752*4882a593Smuzhiyun 
753*4882a593Smuzhiyun 	/* finished with this buffer */
754*4882a593Smuzhiyun 	this_cpu_dec(*pbuf->sc->buffers_allocated);
755*4882a593Smuzhiyun 	preempt_enable();
756*4882a593Smuzhiyun }
757