1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
3*4882a593Smuzhiyun * Copyright (C) 2008-2009 PetaLogix
4*4882a593Smuzhiyun * Copyright (C) 2007 John Williams
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * Reasonably optimised generic C-code for memcpy on Microblaze
7*4882a593Smuzhiyun * This is generic C code to do efficient, alignment-aware memcpy.
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * It is based on demo code originally Copyright 2001 by Intel Corp, taken from
10*4882a593Smuzhiyun * http://www.embedded.com/showArticle.jhtml?articleID=19205567
11*4882a593Smuzhiyun *
12*4882a593Smuzhiyun * Attempts were made, unsuccessfully, to contact the original
13*4882a593Smuzhiyun * author of this code (Michael Morrow, Intel). Below is the original
14*4882a593Smuzhiyun * copyright notice.
15*4882a593Smuzhiyun *
16*4882a593Smuzhiyun * This software has been developed by Intel Corporation.
17*4882a593Smuzhiyun * Intel specifically disclaims all warranties, express or
18*4882a593Smuzhiyun * implied, and all liability, including consequential and
19*4882a593Smuzhiyun * other indirect damages, for the use of this program, including
20*4882a593Smuzhiyun * liability for infringement of any proprietary rights,
21*4882a593Smuzhiyun * and including the warranties of merchantability and fitness
22*4882a593Smuzhiyun * for a particular purpose. Intel does not assume any
23*4882a593Smuzhiyun * responsibility for and errors which may appear in this program
24*4882a593Smuzhiyun * not any responsibility to update it.
25*4882a593Smuzhiyun */
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun #include <linux/export.h>
28*4882a593Smuzhiyun #include <linux/types.h>
29*4882a593Smuzhiyun #include <linux/stddef.h>
30*4882a593Smuzhiyun #include <linux/compiler.h>
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun #include <linux/string.h>
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun #ifdef __HAVE_ARCH_MEMCPY
35*4882a593Smuzhiyun #ifndef CONFIG_OPT_LIB_FUNCTION
memcpy(void * v_dst,const void * v_src,__kernel_size_t c)36*4882a593Smuzhiyun void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
37*4882a593Smuzhiyun {
38*4882a593Smuzhiyun const char *src = v_src;
39*4882a593Smuzhiyun char *dst = v_dst;
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun /* Simple, byte oriented memcpy. */
42*4882a593Smuzhiyun while (c--)
43*4882a593Smuzhiyun *dst++ = *src++;
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun return v_dst;
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun #else /* CONFIG_OPT_LIB_FUNCTION */
memcpy(void * v_dst,const void * v_src,__kernel_size_t c)48*4882a593Smuzhiyun void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun const char *src = v_src;
51*4882a593Smuzhiyun char *dst = v_dst;
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun /* The following code tries to optimize the copy by using unsigned
54*4882a593Smuzhiyun * alignment. This will work fine if both source and destination are
55*4882a593Smuzhiyun * aligned on the same boundary. However, if they are aligned on
56*4882a593Smuzhiyun * different boundaries shifts will be necessary. This might result in
57*4882a593Smuzhiyun * bad performance on MicroBlaze systems without a barrel shifter.
58*4882a593Smuzhiyun */
59*4882a593Smuzhiyun const uint32_t *i_src;
60*4882a593Smuzhiyun uint32_t *i_dst;
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun if (likely(c >= 4)) {
63*4882a593Smuzhiyun unsigned value, buf_hold;
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun /* Align the destination to a word boundary. */
66*4882a593Smuzhiyun /* This is done in an endian independent manner. */
67*4882a593Smuzhiyun switch ((unsigned long)dst & 3) {
68*4882a593Smuzhiyun case 1:
69*4882a593Smuzhiyun *dst++ = *src++;
70*4882a593Smuzhiyun --c;
71*4882a593Smuzhiyun case 2:
72*4882a593Smuzhiyun *dst++ = *src++;
73*4882a593Smuzhiyun --c;
74*4882a593Smuzhiyun case 3:
75*4882a593Smuzhiyun *dst++ = *src++;
76*4882a593Smuzhiyun --c;
77*4882a593Smuzhiyun }
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun i_dst = (void *)dst;
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun /* Choose a copy scheme based on the source */
82*4882a593Smuzhiyun /* alignment relative to destination. */
83*4882a593Smuzhiyun switch ((unsigned long)src & 3) {
84*4882a593Smuzhiyun case 0x0: /* Both byte offsets are aligned */
85*4882a593Smuzhiyun i_src = (const void *)src;
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun for (; c >= 4; c -= 4)
88*4882a593Smuzhiyun *i_dst++ = *i_src++;
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun src = (const void *)i_src;
91*4882a593Smuzhiyun break;
92*4882a593Smuzhiyun case 0x1: /* Unaligned - Off by 1 */
93*4882a593Smuzhiyun /* Word align the source */
94*4882a593Smuzhiyun i_src = (const void *) ((unsigned)src & ~3);
95*4882a593Smuzhiyun #ifndef __MICROBLAZEEL__
96*4882a593Smuzhiyun /* Load the holding buffer */
97*4882a593Smuzhiyun buf_hold = *i_src++ << 8;
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun for (; c >= 4; c -= 4) {
100*4882a593Smuzhiyun value = *i_src++;
101*4882a593Smuzhiyun *i_dst++ = buf_hold | value >> 24;
102*4882a593Smuzhiyun buf_hold = value << 8;
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun #else
105*4882a593Smuzhiyun /* Load the holding buffer */
106*4882a593Smuzhiyun buf_hold = (*i_src++ & 0xFFFFFF00) >> 8;
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun for (; c >= 4; c -= 4) {
109*4882a593Smuzhiyun value = *i_src++;
110*4882a593Smuzhiyun *i_dst++ = buf_hold | ((value & 0xFF) << 24);
111*4882a593Smuzhiyun buf_hold = (value & 0xFFFFFF00) >> 8;
112*4882a593Smuzhiyun }
113*4882a593Smuzhiyun #endif
114*4882a593Smuzhiyun /* Realign the source */
115*4882a593Smuzhiyun src = (const void *)i_src;
116*4882a593Smuzhiyun src -= 3;
117*4882a593Smuzhiyun break;
118*4882a593Smuzhiyun case 0x2: /* Unaligned - Off by 2 */
119*4882a593Smuzhiyun /* Word align the source */
120*4882a593Smuzhiyun i_src = (const void *) ((unsigned)src & ~3);
121*4882a593Smuzhiyun #ifndef __MICROBLAZEEL__
122*4882a593Smuzhiyun /* Load the holding buffer */
123*4882a593Smuzhiyun buf_hold = *i_src++ << 16;
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun for (; c >= 4; c -= 4) {
126*4882a593Smuzhiyun value = *i_src++;
127*4882a593Smuzhiyun *i_dst++ = buf_hold | value >> 16;
128*4882a593Smuzhiyun buf_hold = value << 16;
129*4882a593Smuzhiyun }
130*4882a593Smuzhiyun #else
131*4882a593Smuzhiyun /* Load the holding buffer */
132*4882a593Smuzhiyun buf_hold = (*i_src++ & 0xFFFF0000) >> 16;
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun for (; c >= 4; c -= 4) {
135*4882a593Smuzhiyun value = *i_src++;
136*4882a593Smuzhiyun *i_dst++ = buf_hold | ((value & 0xFFFF) << 16);
137*4882a593Smuzhiyun buf_hold = (value & 0xFFFF0000) >> 16;
138*4882a593Smuzhiyun }
139*4882a593Smuzhiyun #endif
140*4882a593Smuzhiyun /* Realign the source */
141*4882a593Smuzhiyun src = (const void *)i_src;
142*4882a593Smuzhiyun src -= 2;
143*4882a593Smuzhiyun break;
144*4882a593Smuzhiyun case 0x3: /* Unaligned - Off by 3 */
145*4882a593Smuzhiyun /* Word align the source */
146*4882a593Smuzhiyun i_src = (const void *) ((unsigned)src & ~3);
147*4882a593Smuzhiyun #ifndef __MICROBLAZEEL__
148*4882a593Smuzhiyun /* Load the holding buffer */
149*4882a593Smuzhiyun buf_hold = *i_src++ << 24;
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun for (; c >= 4; c -= 4) {
152*4882a593Smuzhiyun value = *i_src++;
153*4882a593Smuzhiyun *i_dst++ = buf_hold | value >> 8;
154*4882a593Smuzhiyun buf_hold = value << 24;
155*4882a593Smuzhiyun }
156*4882a593Smuzhiyun #else
157*4882a593Smuzhiyun /* Load the holding buffer */
158*4882a593Smuzhiyun buf_hold = (*i_src++ & 0xFF000000) >> 24;
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun for (; c >= 4; c -= 4) {
161*4882a593Smuzhiyun value = *i_src++;
162*4882a593Smuzhiyun *i_dst++ = buf_hold | ((value & 0xFFFFFF) << 8);
163*4882a593Smuzhiyun buf_hold = (value & 0xFF000000) >> 24;
164*4882a593Smuzhiyun }
165*4882a593Smuzhiyun #endif
166*4882a593Smuzhiyun /* Realign the source */
167*4882a593Smuzhiyun src = (const void *)i_src;
168*4882a593Smuzhiyun src -= 1;
169*4882a593Smuzhiyun break;
170*4882a593Smuzhiyun }
171*4882a593Smuzhiyun dst = (void *)i_dst;
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun /* Finish off any remaining bytes */
175*4882a593Smuzhiyun /* simple fast copy, ... unless a cache boundary is crossed */
176*4882a593Smuzhiyun switch (c) {
177*4882a593Smuzhiyun case 3:
178*4882a593Smuzhiyun *dst++ = *src++;
179*4882a593Smuzhiyun case 2:
180*4882a593Smuzhiyun *dst++ = *src++;
181*4882a593Smuzhiyun case 1:
182*4882a593Smuzhiyun *dst++ = *src++;
183*4882a593Smuzhiyun }
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun return v_dst;
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun #endif /* CONFIG_OPT_LIB_FUNCTION */
188*4882a593Smuzhiyun EXPORT_SYMBOL(memcpy);
189*4882a593Smuzhiyun #endif /* __HAVE_ARCH_MEMCPY */
190