xref: /OK3568_Linux_fs/kernel/arch/microblaze/lib/memcpy.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
3*4882a593Smuzhiyun  * Copyright (C) 2008-2009 PetaLogix
4*4882a593Smuzhiyun  * Copyright (C) 2007 John Williams
5*4882a593Smuzhiyun  *
6*4882a593Smuzhiyun  * Reasonably optimised generic C-code for memcpy on Microblaze
7*4882a593Smuzhiyun  * This is generic C code to do efficient, alignment-aware memcpy.
8*4882a593Smuzhiyun  *
9*4882a593Smuzhiyun  * It is based on demo code originally Copyright 2001 by Intel Corp, taken from
10*4882a593Smuzhiyun  * http://www.embedded.com/showArticle.jhtml?articleID=19205567
11*4882a593Smuzhiyun  *
12*4882a593Smuzhiyun  * Attempts were made, unsuccessfully, to contact the original
13*4882a593Smuzhiyun  * author of this code (Michael Morrow, Intel).  Below is the original
14*4882a593Smuzhiyun  * copyright notice.
15*4882a593Smuzhiyun  *
16*4882a593Smuzhiyun  * This software has been developed by Intel Corporation.
17*4882a593Smuzhiyun  * Intel specifically disclaims all warranties, express or
18*4882a593Smuzhiyun  * implied, and all liability, including consequential and
19*4882a593Smuzhiyun  * other indirect damages, for the use of this program, including
20*4882a593Smuzhiyun  * liability for infringement of any proprietary rights,
21*4882a593Smuzhiyun  * and including the warranties of merchantability and fitness
22*4882a593Smuzhiyun  * for a particular purpose. Intel does not assume any
23*4882a593Smuzhiyun  * responsibility for and errors which may appear in this program
24*4882a593Smuzhiyun  * not any responsibility to update it.
25*4882a593Smuzhiyun  */
26*4882a593Smuzhiyun 
27*4882a593Smuzhiyun #include <linux/export.h>
28*4882a593Smuzhiyun #include <linux/types.h>
29*4882a593Smuzhiyun #include <linux/stddef.h>
30*4882a593Smuzhiyun #include <linux/compiler.h>
31*4882a593Smuzhiyun 
32*4882a593Smuzhiyun #include <linux/string.h>
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun #ifdef __HAVE_ARCH_MEMCPY
35*4882a593Smuzhiyun #ifndef CONFIG_OPT_LIB_FUNCTION
memcpy(void * v_dst,const void * v_src,__kernel_size_t c)36*4882a593Smuzhiyun void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
37*4882a593Smuzhiyun {
38*4882a593Smuzhiyun 	const char *src = v_src;
39*4882a593Smuzhiyun 	char *dst = v_dst;
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun 	/* Simple, byte oriented memcpy. */
42*4882a593Smuzhiyun 	while (c--)
43*4882a593Smuzhiyun 		*dst++ = *src++;
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun 	return v_dst;
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun #else /* CONFIG_OPT_LIB_FUNCTION */
memcpy(void * v_dst,const void * v_src,__kernel_size_t c)48*4882a593Smuzhiyun void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun 	const char *src = v_src;
51*4882a593Smuzhiyun 	char *dst = v_dst;
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun 	/* The following code tries to optimize the copy by using unsigned
54*4882a593Smuzhiyun 	 * alignment. This will work fine if both source and destination are
55*4882a593Smuzhiyun 	 * aligned on the same boundary. However, if they are aligned on
56*4882a593Smuzhiyun 	 * different boundaries shifts will be necessary. This might result in
57*4882a593Smuzhiyun 	 * bad performance on MicroBlaze systems without a barrel shifter.
58*4882a593Smuzhiyun 	 */
59*4882a593Smuzhiyun 	const uint32_t *i_src;
60*4882a593Smuzhiyun 	uint32_t *i_dst;
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun 	if (likely(c >= 4)) {
63*4882a593Smuzhiyun 		unsigned  value, buf_hold;
64*4882a593Smuzhiyun 
65*4882a593Smuzhiyun 		/* Align the destination to a word boundary. */
66*4882a593Smuzhiyun 		/* This is done in an endian independent manner. */
67*4882a593Smuzhiyun 		switch ((unsigned long)dst & 3) {
68*4882a593Smuzhiyun 		case 1:
69*4882a593Smuzhiyun 			*dst++ = *src++;
70*4882a593Smuzhiyun 			--c;
71*4882a593Smuzhiyun 		case 2:
72*4882a593Smuzhiyun 			*dst++ = *src++;
73*4882a593Smuzhiyun 			--c;
74*4882a593Smuzhiyun 		case 3:
75*4882a593Smuzhiyun 			*dst++ = *src++;
76*4882a593Smuzhiyun 			--c;
77*4882a593Smuzhiyun 		}
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun 		i_dst = (void *)dst;
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun 		/* Choose a copy scheme based on the source */
82*4882a593Smuzhiyun 		/* alignment relative to destination. */
83*4882a593Smuzhiyun 		switch ((unsigned long)src & 3) {
84*4882a593Smuzhiyun 		case 0x0:	/* Both byte offsets are aligned */
85*4882a593Smuzhiyun 			i_src  = (const void *)src;
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun 			for (; c >= 4; c -= 4)
88*4882a593Smuzhiyun 				*i_dst++ = *i_src++;
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun 			src  = (const void *)i_src;
91*4882a593Smuzhiyun 			break;
92*4882a593Smuzhiyun 		case 0x1:	/* Unaligned - Off by 1 */
93*4882a593Smuzhiyun 			/* Word align the source */
94*4882a593Smuzhiyun 			i_src = (const void *) ((unsigned)src & ~3);
95*4882a593Smuzhiyun #ifndef __MICROBLAZEEL__
96*4882a593Smuzhiyun 			/* Load the holding buffer */
97*4882a593Smuzhiyun 			buf_hold = *i_src++ << 8;
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun 			for (; c >= 4; c -= 4) {
100*4882a593Smuzhiyun 				value = *i_src++;
101*4882a593Smuzhiyun 				*i_dst++ = buf_hold | value >> 24;
102*4882a593Smuzhiyun 				buf_hold = value << 8;
103*4882a593Smuzhiyun 			}
104*4882a593Smuzhiyun #else
105*4882a593Smuzhiyun 			/* Load the holding buffer */
106*4882a593Smuzhiyun 			buf_hold = (*i_src++ & 0xFFFFFF00) >> 8;
107*4882a593Smuzhiyun 
108*4882a593Smuzhiyun 			for (; c >= 4; c -= 4) {
109*4882a593Smuzhiyun 				value = *i_src++;
110*4882a593Smuzhiyun 				*i_dst++ = buf_hold | ((value & 0xFF) << 24);
111*4882a593Smuzhiyun 				buf_hold = (value & 0xFFFFFF00) >> 8;
112*4882a593Smuzhiyun 			}
113*4882a593Smuzhiyun #endif
114*4882a593Smuzhiyun 			/* Realign the source */
115*4882a593Smuzhiyun 			src = (const void *)i_src;
116*4882a593Smuzhiyun 			src -= 3;
117*4882a593Smuzhiyun 			break;
118*4882a593Smuzhiyun 		case 0x2:	/* Unaligned - Off by 2 */
119*4882a593Smuzhiyun 			/* Word align the source */
120*4882a593Smuzhiyun 			i_src = (const void *) ((unsigned)src & ~3);
121*4882a593Smuzhiyun #ifndef __MICROBLAZEEL__
122*4882a593Smuzhiyun 			/* Load the holding buffer */
123*4882a593Smuzhiyun 			buf_hold = *i_src++ << 16;
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun 			for (; c >= 4; c -= 4) {
126*4882a593Smuzhiyun 				value = *i_src++;
127*4882a593Smuzhiyun 				*i_dst++ = buf_hold | value >> 16;
128*4882a593Smuzhiyun 				buf_hold = value << 16;
129*4882a593Smuzhiyun 			}
130*4882a593Smuzhiyun #else
131*4882a593Smuzhiyun 			/* Load the holding buffer */
132*4882a593Smuzhiyun 			buf_hold = (*i_src++ & 0xFFFF0000) >> 16;
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun 			for (; c >= 4; c -= 4) {
135*4882a593Smuzhiyun 				value = *i_src++;
136*4882a593Smuzhiyun 				*i_dst++ = buf_hold | ((value & 0xFFFF) << 16);
137*4882a593Smuzhiyun 				buf_hold = (value & 0xFFFF0000) >> 16;
138*4882a593Smuzhiyun 			}
139*4882a593Smuzhiyun #endif
140*4882a593Smuzhiyun 			/* Realign the source */
141*4882a593Smuzhiyun 			src = (const void *)i_src;
142*4882a593Smuzhiyun 			src -= 2;
143*4882a593Smuzhiyun 			break;
144*4882a593Smuzhiyun 		case 0x3:	/* Unaligned - Off by 3 */
145*4882a593Smuzhiyun 			/* Word align the source */
146*4882a593Smuzhiyun 			i_src = (const void *) ((unsigned)src & ~3);
147*4882a593Smuzhiyun #ifndef __MICROBLAZEEL__
148*4882a593Smuzhiyun 			/* Load the holding buffer */
149*4882a593Smuzhiyun 			buf_hold = *i_src++ << 24;
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 			for (; c >= 4; c -= 4) {
152*4882a593Smuzhiyun 				value = *i_src++;
153*4882a593Smuzhiyun 				*i_dst++ = buf_hold | value >> 8;
154*4882a593Smuzhiyun 				buf_hold = value << 24;
155*4882a593Smuzhiyun 			}
156*4882a593Smuzhiyun #else
157*4882a593Smuzhiyun 			/* Load the holding buffer */
158*4882a593Smuzhiyun 			buf_hold = (*i_src++ & 0xFF000000) >> 24;
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun 			for (; c >= 4; c -= 4) {
161*4882a593Smuzhiyun 				value = *i_src++;
162*4882a593Smuzhiyun 				*i_dst++ = buf_hold | ((value & 0xFFFFFF) << 8);
163*4882a593Smuzhiyun 				buf_hold = (value & 0xFF000000) >> 24;
164*4882a593Smuzhiyun 			}
165*4882a593Smuzhiyun #endif
166*4882a593Smuzhiyun 			/* Realign the source */
167*4882a593Smuzhiyun 			src = (const void *)i_src;
168*4882a593Smuzhiyun 			src -= 1;
169*4882a593Smuzhiyun 			break;
170*4882a593Smuzhiyun 		}
171*4882a593Smuzhiyun 		dst = (void *)i_dst;
172*4882a593Smuzhiyun 	}
173*4882a593Smuzhiyun 
174*4882a593Smuzhiyun 	/* Finish off any remaining bytes */
175*4882a593Smuzhiyun 	/* simple fast copy, ... unless a cache boundary is crossed */
176*4882a593Smuzhiyun 	switch (c) {
177*4882a593Smuzhiyun 	case 3:
178*4882a593Smuzhiyun 		*dst++ = *src++;
179*4882a593Smuzhiyun 	case 2:
180*4882a593Smuzhiyun 		*dst++ = *src++;
181*4882a593Smuzhiyun 	case 1:
182*4882a593Smuzhiyun 		*dst++ = *src++;
183*4882a593Smuzhiyun 	}
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	return v_dst;
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun #endif /* CONFIG_OPT_LIB_FUNCTION */
188*4882a593Smuzhiyun EXPORT_SYMBOL(memcpy);
189*4882a593Smuzhiyun #endif /* __HAVE_ARCH_MEMCPY */
190