xref: /OK3568_Linux_fs/kernel/arch/parisc/lib/io.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * arch/parisc/lib/io.c
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (c) Matthew Wilcox 2001 for Hewlett-Packard
6*4882a593Smuzhiyun  * Copyright (c) Randolph Chung 2001 <tausq@debian.org>
7*4882a593Smuzhiyun  *
8*4882a593Smuzhiyun  * IO accessing functions which shouldn't be inlined because they're too big
9*4882a593Smuzhiyun  */
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun #include <linux/kernel.h>
12*4882a593Smuzhiyun #include <linux/module.h>
13*4882a593Smuzhiyun #include <asm/io.h>
14*4882a593Smuzhiyun 
15*4882a593Smuzhiyun /* Copies a block of memory to a device in an efficient manner.
16*4882a593Smuzhiyun  * Assumes the device can cope with 32-bit transfers.  If it can't,
17*4882a593Smuzhiyun  * don't use this function.
18*4882a593Smuzhiyun  */
memcpy_toio(volatile void __iomem * dst,const void * src,int count)19*4882a593Smuzhiyun void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
20*4882a593Smuzhiyun {
21*4882a593Smuzhiyun 	if (((unsigned long)dst & 3) != ((unsigned long)src & 3))
22*4882a593Smuzhiyun 		goto bytecopy;
23*4882a593Smuzhiyun 	while ((unsigned long)dst & 3) {
24*4882a593Smuzhiyun 		writeb(*(char *)src, dst++);
25*4882a593Smuzhiyun 		src++;
26*4882a593Smuzhiyun 		count--;
27*4882a593Smuzhiyun 	}
28*4882a593Smuzhiyun 	while (count > 3) {
29*4882a593Smuzhiyun 		__raw_writel(*(u32 *)src, dst);
30*4882a593Smuzhiyun 		src += 4;
31*4882a593Smuzhiyun 		dst += 4;
32*4882a593Smuzhiyun 		count -= 4;
33*4882a593Smuzhiyun 	}
34*4882a593Smuzhiyun  bytecopy:
35*4882a593Smuzhiyun 	while (count--) {
36*4882a593Smuzhiyun 		writeb(*(char *)src, dst++);
37*4882a593Smuzhiyun 		src++;
38*4882a593Smuzhiyun 	}
39*4882a593Smuzhiyun }
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun /*
42*4882a593Smuzhiyun ** Copies a block of memory from a device in an efficient manner.
43*4882a593Smuzhiyun ** Assumes the device can cope with 32-bit transfers.  If it can't,
44*4882a593Smuzhiyun ** don't use this function.
45*4882a593Smuzhiyun **
46*4882a593Smuzhiyun ** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM:
47*4882a593Smuzhiyun **	27341/64    = 427 cyc per int
48*4882a593Smuzhiyun **	61311/128   = 478 cyc per short
49*4882a593Smuzhiyun **	122637/256  = 479 cyc per byte
50*4882a593Smuzhiyun ** Ergo bus latencies dominant (not transfer size).
51*4882a593Smuzhiyun **      Minimize total number of transfers at cost of CPU cycles.
52*4882a593Smuzhiyun **	TODO: only look at src alignment and adjust the stores to dest.
53*4882a593Smuzhiyun */
memcpy_fromio(void * dst,const volatile void __iomem * src,int count)54*4882a593Smuzhiyun void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
55*4882a593Smuzhiyun {
56*4882a593Smuzhiyun 	/* first compare alignment of src/dst */
57*4882a593Smuzhiyun 	if ( (((unsigned long)dst ^ (unsigned long)src) & 1) || (count < 2) )
58*4882a593Smuzhiyun 		goto bytecopy;
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun 	if ( (((unsigned long)dst ^ (unsigned long)src) & 2) || (count < 4) )
61*4882a593Smuzhiyun 		goto shortcopy;
62*4882a593Smuzhiyun 
63*4882a593Smuzhiyun 	/* Then check for misaligned start address */
64*4882a593Smuzhiyun 	if ((unsigned long)src & 1) {
65*4882a593Smuzhiyun 		*(u8 *)dst = readb(src);
66*4882a593Smuzhiyun 		src++;
67*4882a593Smuzhiyun 		dst++;
68*4882a593Smuzhiyun 		count--;
69*4882a593Smuzhiyun 		if (count < 2) goto bytecopy;
70*4882a593Smuzhiyun 	}
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun 	if ((unsigned long)src & 2) {
73*4882a593Smuzhiyun 		*(u16 *)dst = __raw_readw(src);
74*4882a593Smuzhiyun 		src += 2;
75*4882a593Smuzhiyun 		dst += 2;
76*4882a593Smuzhiyun 		count -= 2;
77*4882a593Smuzhiyun 	}
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun 	while (count > 3) {
80*4882a593Smuzhiyun 		*(u32 *)dst = __raw_readl(src);
81*4882a593Smuzhiyun 		dst += 4;
82*4882a593Smuzhiyun 		src += 4;
83*4882a593Smuzhiyun 		count -= 4;
84*4882a593Smuzhiyun 	}
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun  shortcopy:
87*4882a593Smuzhiyun 	while (count > 1) {
88*4882a593Smuzhiyun 		*(u16 *)dst = __raw_readw(src);
89*4882a593Smuzhiyun 		src += 2;
90*4882a593Smuzhiyun 		dst += 2;
91*4882a593Smuzhiyun 		count -= 2;
92*4882a593Smuzhiyun 	}
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun  bytecopy:
95*4882a593Smuzhiyun 	while (count--) {
96*4882a593Smuzhiyun 		*(char *)dst = readb(src);
97*4882a593Smuzhiyun 		src++;
98*4882a593Smuzhiyun 		dst++;
99*4882a593Smuzhiyun 	}
100*4882a593Smuzhiyun }
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun /* Sets a block of memory on a device to a given value.
103*4882a593Smuzhiyun  * Assumes the device can cope with 32-bit transfers.  If it can't,
104*4882a593Smuzhiyun  * don't use this function.
105*4882a593Smuzhiyun  */
memset_io(volatile void __iomem * addr,unsigned char val,int count)106*4882a593Smuzhiyun void memset_io(volatile void __iomem *addr, unsigned char val, int count)
107*4882a593Smuzhiyun {
108*4882a593Smuzhiyun 	u32 val32 = (val << 24) | (val << 16) | (val << 8) | val;
109*4882a593Smuzhiyun 	while ((unsigned long)addr & 3) {
110*4882a593Smuzhiyun 		writeb(val, addr++);
111*4882a593Smuzhiyun 		count--;
112*4882a593Smuzhiyun 	}
113*4882a593Smuzhiyun 	while (count > 3) {
114*4882a593Smuzhiyun 		__raw_writel(val32, addr);
115*4882a593Smuzhiyun 		addr += 4;
116*4882a593Smuzhiyun 		count -= 4;
117*4882a593Smuzhiyun 	}
118*4882a593Smuzhiyun 	while (count--) {
119*4882a593Smuzhiyun 		writeb(val, addr++);
120*4882a593Smuzhiyun 	}
121*4882a593Smuzhiyun }
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun /*
124*4882a593Smuzhiyun  * Read COUNT 8-bit bytes from port PORT into memory starting at
125*4882a593Smuzhiyun  * SRC.
126*4882a593Smuzhiyun  */
insb(unsigned long port,void * dst,unsigned long count)127*4882a593Smuzhiyun void insb (unsigned long port, void *dst, unsigned long count)
128*4882a593Smuzhiyun {
129*4882a593Smuzhiyun 	unsigned char *p;
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun 	p = (unsigned char *)dst;
132*4882a593Smuzhiyun 
133*4882a593Smuzhiyun 	while (((unsigned long)p) & 0x3) {
134*4882a593Smuzhiyun 		if (!count)
135*4882a593Smuzhiyun 			return;
136*4882a593Smuzhiyun 		count--;
137*4882a593Smuzhiyun 		*p = inb(port);
138*4882a593Smuzhiyun 		p++;
139*4882a593Smuzhiyun 	}
140*4882a593Smuzhiyun 
141*4882a593Smuzhiyun 	while (count >= 4) {
142*4882a593Smuzhiyun 		unsigned int w;
143*4882a593Smuzhiyun 		count -= 4;
144*4882a593Smuzhiyun 		w = inb(port) << 24;
145*4882a593Smuzhiyun 		w |= inb(port) << 16;
146*4882a593Smuzhiyun 		w |= inb(port) << 8;
147*4882a593Smuzhiyun 		w |= inb(port);
148*4882a593Smuzhiyun 		*(unsigned int *) p = w;
149*4882a593Smuzhiyun 		p += 4;
150*4882a593Smuzhiyun 	}
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun 	while (count) {
153*4882a593Smuzhiyun 		--count;
154*4882a593Smuzhiyun 		*p = inb(port);
155*4882a593Smuzhiyun 		p++;
156*4882a593Smuzhiyun 	}
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun /*
161*4882a593Smuzhiyun  * Read COUNT 16-bit words from port PORT into memory starting at
162*4882a593Smuzhiyun  * SRC.  SRC must be at least short aligned.  This is used by the
163*4882a593Smuzhiyun  * IDE driver to read disk sectors.  Performance is important, but
164*4882a593Smuzhiyun  * the interfaces seems to be slow: just using the inlined version
165*4882a593Smuzhiyun  * of the inw() breaks things.
166*4882a593Smuzhiyun  */
insw(unsigned long port,void * dst,unsigned long count)167*4882a593Smuzhiyun void insw (unsigned long port, void *dst, unsigned long count)
168*4882a593Smuzhiyun {
169*4882a593Smuzhiyun 	unsigned int l = 0, l2;
170*4882a593Smuzhiyun 	unsigned char *p;
171*4882a593Smuzhiyun 
172*4882a593Smuzhiyun 	p = (unsigned char *)dst;
173*4882a593Smuzhiyun 
174*4882a593Smuzhiyun 	if (!count)
175*4882a593Smuzhiyun 		return;
176*4882a593Smuzhiyun 
177*4882a593Smuzhiyun 	switch (((unsigned long)p) & 0x3)
178*4882a593Smuzhiyun 	{
179*4882a593Smuzhiyun 	 case 0x00:			/* Buffer 32-bit aligned */
180*4882a593Smuzhiyun 		while (count>=2) {
181*4882a593Smuzhiyun 
182*4882a593Smuzhiyun 			count -= 2;
183*4882a593Smuzhiyun 			l = cpu_to_le16(inw(port)) << 16;
184*4882a593Smuzhiyun 			l |= cpu_to_le16(inw(port));
185*4882a593Smuzhiyun 			*(unsigned int *)p = l;
186*4882a593Smuzhiyun 			p += 4;
187*4882a593Smuzhiyun 		}
188*4882a593Smuzhiyun 		if (count) {
189*4882a593Smuzhiyun 			*(unsigned short *)p = cpu_to_le16(inw(port));
190*4882a593Smuzhiyun 		}
191*4882a593Smuzhiyun 		break;
192*4882a593Smuzhiyun 
193*4882a593Smuzhiyun 	 case 0x02:			/* Buffer 16-bit aligned */
194*4882a593Smuzhiyun 		*(unsigned short *)p = cpu_to_le16(inw(port));
195*4882a593Smuzhiyun 		p += 2;
196*4882a593Smuzhiyun 		count--;
197*4882a593Smuzhiyun 		while (count>=2) {
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 			count -= 2;
200*4882a593Smuzhiyun 			l = cpu_to_le16(inw(port)) << 16;
201*4882a593Smuzhiyun 			l |= cpu_to_le16(inw(port));
202*4882a593Smuzhiyun 			*(unsigned int *)p = l;
203*4882a593Smuzhiyun 			p += 4;
204*4882a593Smuzhiyun 		}
205*4882a593Smuzhiyun 		if (count) {
206*4882a593Smuzhiyun 			*(unsigned short *)p = cpu_to_le16(inw(port));
207*4882a593Smuzhiyun 		}
208*4882a593Smuzhiyun 		break;
209*4882a593Smuzhiyun 
210*4882a593Smuzhiyun 	 case 0x01:			/* Buffer 8-bit aligned */
211*4882a593Smuzhiyun 	 case 0x03:
212*4882a593Smuzhiyun 		/* I don't bother with 32bit transfers
213*4882a593Smuzhiyun 		 * in this case, 16bit will have to do -- DE */
214*4882a593Smuzhiyun 		--count;
215*4882a593Smuzhiyun 
216*4882a593Smuzhiyun 		l = cpu_to_le16(inw(port));
217*4882a593Smuzhiyun 		*p = l >> 8;
218*4882a593Smuzhiyun 		p++;
219*4882a593Smuzhiyun 		while (count--)
220*4882a593Smuzhiyun 		{
221*4882a593Smuzhiyun 			l2 = cpu_to_le16(inw(port));
222*4882a593Smuzhiyun 			*(unsigned short *)p = (l & 0xff) << 8 | (l2 >> 8);
223*4882a593Smuzhiyun 			p += 2;
224*4882a593Smuzhiyun 			l = l2;
225*4882a593Smuzhiyun 		}
226*4882a593Smuzhiyun 		*p = l & 0xff;
227*4882a593Smuzhiyun 		break;
228*4882a593Smuzhiyun 	}
229*4882a593Smuzhiyun }
230*4882a593Smuzhiyun 
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun /*
234*4882a593Smuzhiyun  * Read COUNT 32-bit words from port PORT into memory starting at
235*4882a593Smuzhiyun  * SRC. Now works with any alignment in SRC. Performance is important,
236*4882a593Smuzhiyun  * but the interfaces seems to be slow: just using the inlined version
237*4882a593Smuzhiyun  * of the inl() breaks things.
238*4882a593Smuzhiyun  */
insl(unsigned long port,void * dst,unsigned long count)239*4882a593Smuzhiyun void insl (unsigned long port, void *dst, unsigned long count)
240*4882a593Smuzhiyun {
241*4882a593Smuzhiyun 	unsigned int l = 0, l2;
242*4882a593Smuzhiyun 	unsigned char *p;
243*4882a593Smuzhiyun 
244*4882a593Smuzhiyun 	p = (unsigned char *)dst;
245*4882a593Smuzhiyun 
246*4882a593Smuzhiyun 	if (!count)
247*4882a593Smuzhiyun 		return;
248*4882a593Smuzhiyun 
249*4882a593Smuzhiyun 	switch (((unsigned long) dst) & 0x3)
250*4882a593Smuzhiyun 	{
251*4882a593Smuzhiyun 	 case 0x00:			/* Buffer 32-bit aligned */
252*4882a593Smuzhiyun 		while (count--)
253*4882a593Smuzhiyun 		{
254*4882a593Smuzhiyun 			*(unsigned int *)p = cpu_to_le32(inl(port));
255*4882a593Smuzhiyun 			p += 4;
256*4882a593Smuzhiyun 		}
257*4882a593Smuzhiyun 		break;
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun 	 case 0x02:			/* Buffer 16-bit aligned */
260*4882a593Smuzhiyun 		--count;
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun 		l = cpu_to_le32(inl(port));
263*4882a593Smuzhiyun 		*(unsigned short *)p = l >> 16;
264*4882a593Smuzhiyun 		p += 2;
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 		while (count--)
267*4882a593Smuzhiyun 		{
268*4882a593Smuzhiyun 			l2 = cpu_to_le32(inl(port));
269*4882a593Smuzhiyun 			*(unsigned int *)p = (l & 0xffff) << 16 | (l2 >> 16);
270*4882a593Smuzhiyun 			p += 4;
271*4882a593Smuzhiyun 			l = l2;
272*4882a593Smuzhiyun 		}
273*4882a593Smuzhiyun 		*(unsigned short *)p = l & 0xffff;
274*4882a593Smuzhiyun 		break;
275*4882a593Smuzhiyun 	 case 0x01:			/* Buffer 8-bit aligned */
276*4882a593Smuzhiyun 		--count;
277*4882a593Smuzhiyun 
278*4882a593Smuzhiyun 		l = cpu_to_le32(inl(port));
279*4882a593Smuzhiyun 		*(unsigned char *)p = l >> 24;
280*4882a593Smuzhiyun 		p++;
281*4882a593Smuzhiyun 		*(unsigned short *)p = (l >> 8) & 0xffff;
282*4882a593Smuzhiyun 		p += 2;
283*4882a593Smuzhiyun 		while (count--)
284*4882a593Smuzhiyun 		{
285*4882a593Smuzhiyun 			l2 = cpu_to_le32(inl(port));
286*4882a593Smuzhiyun 			*(unsigned int *)p = (l & 0xff) << 24 | (l2 >> 8);
287*4882a593Smuzhiyun 			p += 4;
288*4882a593Smuzhiyun 			l = l2;
289*4882a593Smuzhiyun 		}
290*4882a593Smuzhiyun 		*p = l & 0xff;
291*4882a593Smuzhiyun 		break;
292*4882a593Smuzhiyun 	 case 0x03:			/* Buffer 8-bit aligned */
293*4882a593Smuzhiyun 		--count;
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun 		l = cpu_to_le32(inl(port));
296*4882a593Smuzhiyun 		*p = l >> 24;
297*4882a593Smuzhiyun 		p++;
298*4882a593Smuzhiyun 		while (count--)
299*4882a593Smuzhiyun 		{
300*4882a593Smuzhiyun 			l2 = cpu_to_le32(inl(port));
301*4882a593Smuzhiyun 			*(unsigned int *)p = (l & 0xffffff) << 8 | l2 >> 24;
302*4882a593Smuzhiyun 			p += 4;
303*4882a593Smuzhiyun 			l = l2;
304*4882a593Smuzhiyun 		}
305*4882a593Smuzhiyun 		*(unsigned short *)p = (l >> 8) & 0xffff;
306*4882a593Smuzhiyun 		p += 2;
307*4882a593Smuzhiyun 		*p = l & 0xff;
308*4882a593Smuzhiyun 		break;
309*4882a593Smuzhiyun 	}
310*4882a593Smuzhiyun }
311*4882a593Smuzhiyun 
312*4882a593Smuzhiyun 
313*4882a593Smuzhiyun /*
314*4882a593Smuzhiyun  * Like insb but in the opposite direction.
315*4882a593Smuzhiyun  * Don't worry as much about doing aligned memory transfers:
316*4882a593Smuzhiyun  * doing byte reads the "slow" way isn't nearly as slow as
317*4882a593Smuzhiyun  * doing byte writes the slow way (no r-m-w cycle).
318*4882a593Smuzhiyun  */
outsb(unsigned long port,const void * src,unsigned long count)319*4882a593Smuzhiyun void outsb(unsigned long port, const void * src, unsigned long count)
320*4882a593Smuzhiyun {
321*4882a593Smuzhiyun 	const unsigned char *p;
322*4882a593Smuzhiyun 
323*4882a593Smuzhiyun 	p = (const unsigned char *)src;
324*4882a593Smuzhiyun 	while (count) {
325*4882a593Smuzhiyun 		count--;
326*4882a593Smuzhiyun 		outb(*p, port);
327*4882a593Smuzhiyun 		p++;
328*4882a593Smuzhiyun 	}
329*4882a593Smuzhiyun }
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun /*
332*4882a593Smuzhiyun  * Like insw but in the opposite direction.  This is used by the IDE
333*4882a593Smuzhiyun  * driver to write disk sectors.  Performance is important, but the
334*4882a593Smuzhiyun  * interfaces seems to be slow: just using the inlined version of the
335*4882a593Smuzhiyun  * outw() breaks things.
336*4882a593Smuzhiyun  */
outsw(unsigned long port,const void * src,unsigned long count)337*4882a593Smuzhiyun void outsw (unsigned long port, const void *src, unsigned long count)
338*4882a593Smuzhiyun {
339*4882a593Smuzhiyun 	unsigned int l = 0, l2;
340*4882a593Smuzhiyun 	const unsigned char *p;
341*4882a593Smuzhiyun 
342*4882a593Smuzhiyun 	p = (const unsigned char *)src;
343*4882a593Smuzhiyun 
344*4882a593Smuzhiyun 	if (!count)
345*4882a593Smuzhiyun 		return;
346*4882a593Smuzhiyun 
347*4882a593Smuzhiyun 	switch (((unsigned long)p) & 0x3)
348*4882a593Smuzhiyun 	{
349*4882a593Smuzhiyun 	 case 0x00:			/* Buffer 32-bit aligned */
350*4882a593Smuzhiyun 		while (count>=2) {
351*4882a593Smuzhiyun 			count -= 2;
352*4882a593Smuzhiyun 			l = *(unsigned int *)p;
353*4882a593Smuzhiyun 			p += 4;
354*4882a593Smuzhiyun 			outw(le16_to_cpu(l >> 16), port);
355*4882a593Smuzhiyun 			outw(le16_to_cpu(l & 0xffff), port);
356*4882a593Smuzhiyun 		}
357*4882a593Smuzhiyun 		if (count) {
358*4882a593Smuzhiyun 			outw(le16_to_cpu(*(unsigned short*)p), port);
359*4882a593Smuzhiyun 		}
360*4882a593Smuzhiyun 		break;
361*4882a593Smuzhiyun 
362*4882a593Smuzhiyun 	 case 0x02:			/* Buffer 16-bit aligned */
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 		outw(le16_to_cpu(*(unsigned short*)p), port);
365*4882a593Smuzhiyun 		p += 2;
366*4882a593Smuzhiyun 		count--;
367*4882a593Smuzhiyun 
368*4882a593Smuzhiyun 		while (count>=2) {
369*4882a593Smuzhiyun 			count -= 2;
370*4882a593Smuzhiyun 			l = *(unsigned int *)p;
371*4882a593Smuzhiyun 			p += 4;
372*4882a593Smuzhiyun 			outw(le16_to_cpu(l >> 16), port);
373*4882a593Smuzhiyun 			outw(le16_to_cpu(l & 0xffff), port);
374*4882a593Smuzhiyun 		}
375*4882a593Smuzhiyun 		if (count) {
376*4882a593Smuzhiyun 			outw(le16_to_cpu(*(unsigned short *)p), port);
377*4882a593Smuzhiyun 		}
378*4882a593Smuzhiyun 		break;
379*4882a593Smuzhiyun 
380*4882a593Smuzhiyun 	 case 0x01:			/* Buffer 8-bit aligned */
381*4882a593Smuzhiyun 		/* I don't bother with 32bit transfers
382*4882a593Smuzhiyun 		 * in this case, 16bit will have to do -- DE */
383*4882a593Smuzhiyun 
384*4882a593Smuzhiyun 		l  = *p << 8;
385*4882a593Smuzhiyun 		p++;
386*4882a593Smuzhiyun 		count--;
387*4882a593Smuzhiyun 		while (count)
388*4882a593Smuzhiyun 		{
389*4882a593Smuzhiyun 			count--;
390*4882a593Smuzhiyun 			l2 = *(unsigned short *)p;
391*4882a593Smuzhiyun 			p += 2;
392*4882a593Smuzhiyun 			outw(le16_to_cpu(l | l2 >> 8), port);
393*4882a593Smuzhiyun 		        l = l2 << 8;
394*4882a593Smuzhiyun 		}
395*4882a593Smuzhiyun 		l2 = *(unsigned char *)p;
396*4882a593Smuzhiyun 		outw (le16_to_cpu(l | l2>>8), port);
397*4882a593Smuzhiyun 		break;
398*4882a593Smuzhiyun 
399*4882a593Smuzhiyun 	}
400*4882a593Smuzhiyun }
401*4882a593Smuzhiyun 
402*4882a593Smuzhiyun 
403*4882a593Smuzhiyun /*
404*4882a593Smuzhiyun  * Like insl but in the opposite direction.  This is used by the IDE
405*4882a593Smuzhiyun  * driver to write disk sectors.  Works with any alignment in SRC.
406*4882a593Smuzhiyun  *  Performance is important, but the interfaces seems to be slow:
407*4882a593Smuzhiyun  * just using the inlined version of the outl() breaks things.
408*4882a593Smuzhiyun  */
outsl(unsigned long port,const void * src,unsigned long count)409*4882a593Smuzhiyun void outsl (unsigned long port, const void *src, unsigned long count)
410*4882a593Smuzhiyun {
411*4882a593Smuzhiyun 	unsigned int l = 0, l2;
412*4882a593Smuzhiyun 	const unsigned char *p;
413*4882a593Smuzhiyun 
414*4882a593Smuzhiyun 	p = (const unsigned char *)src;
415*4882a593Smuzhiyun 
416*4882a593Smuzhiyun 	if (!count)
417*4882a593Smuzhiyun 		return;
418*4882a593Smuzhiyun 
419*4882a593Smuzhiyun 	switch (((unsigned long)p) & 0x3)
420*4882a593Smuzhiyun 	{
421*4882a593Smuzhiyun 	 case 0x00:			/* Buffer 32-bit aligned */
422*4882a593Smuzhiyun 		while (count--)
423*4882a593Smuzhiyun 		{
424*4882a593Smuzhiyun 			outl(le32_to_cpu(*(unsigned int *)p), port);
425*4882a593Smuzhiyun 			p += 4;
426*4882a593Smuzhiyun 		}
427*4882a593Smuzhiyun 		break;
428*4882a593Smuzhiyun 
429*4882a593Smuzhiyun 	 case 0x02:			/* Buffer 16-bit aligned */
430*4882a593Smuzhiyun 		--count;
431*4882a593Smuzhiyun 
432*4882a593Smuzhiyun 		l = *(unsigned short *)p;
433*4882a593Smuzhiyun 		p += 2;
434*4882a593Smuzhiyun 
435*4882a593Smuzhiyun 		while (count--)
436*4882a593Smuzhiyun 		{
437*4882a593Smuzhiyun 			l2 = *(unsigned int *)p;
438*4882a593Smuzhiyun 			p += 4;
439*4882a593Smuzhiyun 			outl (le32_to_cpu(l << 16 | l2 >> 16), port);
440*4882a593Smuzhiyun 			l = l2;
441*4882a593Smuzhiyun 		}
442*4882a593Smuzhiyun 		l2 = *(unsigned short *)p;
443*4882a593Smuzhiyun 		outl (le32_to_cpu(l << 16 | l2), port);
444*4882a593Smuzhiyun 		break;
445*4882a593Smuzhiyun 	 case 0x01:			/* Buffer 8-bit aligned */
446*4882a593Smuzhiyun 		--count;
447*4882a593Smuzhiyun 
448*4882a593Smuzhiyun 		l = *p << 24;
449*4882a593Smuzhiyun 		p++;
450*4882a593Smuzhiyun 		l |= *(unsigned short *)p << 8;
451*4882a593Smuzhiyun 		p += 2;
452*4882a593Smuzhiyun 
453*4882a593Smuzhiyun 		while (count--)
454*4882a593Smuzhiyun 		{
455*4882a593Smuzhiyun 			l2 = *(unsigned int *)p;
456*4882a593Smuzhiyun 			p += 4;
457*4882a593Smuzhiyun 			outl (le32_to_cpu(l | l2 >> 24), port);
458*4882a593Smuzhiyun 			l = l2 << 8;
459*4882a593Smuzhiyun 		}
460*4882a593Smuzhiyun 		l2 = *p;
461*4882a593Smuzhiyun 		outl (le32_to_cpu(l | l2), port);
462*4882a593Smuzhiyun 		break;
463*4882a593Smuzhiyun 	 case 0x03:			/* Buffer 8-bit aligned */
464*4882a593Smuzhiyun 		--count;
465*4882a593Smuzhiyun 
466*4882a593Smuzhiyun 		l = *p << 24;
467*4882a593Smuzhiyun 		p++;
468*4882a593Smuzhiyun 
469*4882a593Smuzhiyun 		while (count--)
470*4882a593Smuzhiyun 		{
471*4882a593Smuzhiyun 			l2 = *(unsigned int *)p;
472*4882a593Smuzhiyun 			p += 4;
473*4882a593Smuzhiyun 			outl (le32_to_cpu(l | l2 >> 8), port);
474*4882a593Smuzhiyun 			l = l2 << 24;
475*4882a593Smuzhiyun 		}
476*4882a593Smuzhiyun 		l2 = *(unsigned short *)p << 16;
477*4882a593Smuzhiyun 		p += 2;
478*4882a593Smuzhiyun 		l2 |= *p;
479*4882a593Smuzhiyun 		outl (le32_to_cpu(l | l2), port);
480*4882a593Smuzhiyun 		break;
481*4882a593Smuzhiyun 	}
482*4882a593Smuzhiyun }
483*4882a593Smuzhiyun 
484*4882a593Smuzhiyun EXPORT_SYMBOL(insb);
485*4882a593Smuzhiyun EXPORT_SYMBOL(insw);
486*4882a593Smuzhiyun EXPORT_SYMBOL(insl);
487*4882a593Smuzhiyun EXPORT_SYMBOL(outsb);
488*4882a593Smuzhiyun EXPORT_SYMBOL(outsw);
489*4882a593Smuzhiyun EXPORT_SYMBOL(outsl);
490