1*4882a593Smuzhiyun/* 2*4882a593Smuzhiyun * This file is subject to the terms and conditions of the GNU General Public 3*4882a593Smuzhiyun * License. See the file "COPYING" in the main directory of this archive 4*4882a593Smuzhiyun * for more details. 5*4882a593Smuzhiyun * 6*4882a593Smuzhiyun * Unified implementation of memcpy, memmove and the __copy_user backend. 7*4882a593Smuzhiyun * 8*4882a593Smuzhiyun * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org) 9*4882a593Smuzhiyun * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. 10*4882a593Smuzhiyun * Copyright (C) 2002 Broadcom, Inc. 11*4882a593Smuzhiyun * memcpy/copy_user author: Mark Vandevoorde 12*4882a593Smuzhiyun * 13*4882a593Smuzhiyun * Mnemonic names for arguments to memcpy/__copy_user 14*4882a593Smuzhiyun */ 15*4882a593Smuzhiyun 16*4882a593Smuzhiyun#include <asm/asm.h> 17*4882a593Smuzhiyun#include <asm/asm-offsets.h> 18*4882a593Smuzhiyun#include <asm/export.h> 19*4882a593Smuzhiyun#include <asm/regdef.h> 20*4882a593Smuzhiyun 21*4882a593Smuzhiyun#define dst a0 22*4882a593Smuzhiyun#define src a1 23*4882a593Smuzhiyun#define len a2 24*4882a593Smuzhiyun 25*4882a593Smuzhiyun/* 26*4882a593Smuzhiyun * Spec 27*4882a593Smuzhiyun * 28*4882a593Smuzhiyun * memcpy copies len bytes from src to dst and sets v0 to dst. 29*4882a593Smuzhiyun * It assumes that 30*4882a593Smuzhiyun * - src and dst don't overlap 31*4882a593Smuzhiyun * - src is readable 32*4882a593Smuzhiyun * - dst is writable 33*4882a593Smuzhiyun * memcpy uses the standard calling convention 34*4882a593Smuzhiyun * 35*4882a593Smuzhiyun * __copy_user copies up to len bytes from src to dst and sets a2 (len) to 36*4882a593Smuzhiyun * the number of uncopied bytes due to an exception caused by a read or write. 37*4882a593Smuzhiyun * __copy_user assumes that src and dst don't overlap, and that the call is 38*4882a593Smuzhiyun * implementing one of the following: 39*4882a593Smuzhiyun * copy_to_user 40*4882a593Smuzhiyun * - src is readable (no exceptions when reading src) 41*4882a593Smuzhiyun * copy_from_user 42*4882a593Smuzhiyun * - dst is writable (no exceptions when writing dst) 43*4882a593Smuzhiyun * __copy_user uses a non-standard calling convention; see 44*4882a593Smuzhiyun * arch/mips/include/asm/uaccess.h 45*4882a593Smuzhiyun * 46*4882a593Smuzhiyun * When an exception happens on a load, the handler must 47*4882a593Smuzhiyun # ensure that all of the destination buffer is overwritten to prevent 48*4882a593Smuzhiyun * leaking information to user mode programs. 49*4882a593Smuzhiyun */ 50*4882a593Smuzhiyun 51*4882a593Smuzhiyun/* 52*4882a593Smuzhiyun * Implementation 53*4882a593Smuzhiyun */ 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun/* 56*4882a593Smuzhiyun * The exception handler for loads requires that: 57*4882a593Smuzhiyun * 1- AT contain the address of the byte just past the end of the source 58*4882a593Smuzhiyun * of the copy, 59*4882a593Smuzhiyun * 2- src_entry <= src < AT, and 60*4882a593Smuzhiyun * 3- (dst - src) == (dst_entry - src_entry), 61*4882a593Smuzhiyun * The _entry suffix denotes values when __copy_user was called. 62*4882a593Smuzhiyun * 63*4882a593Smuzhiyun * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user 64*4882a593Smuzhiyun * (2) is met by incrementing src by the number of bytes copied 65*4882a593Smuzhiyun * (3) is met by not doing loads between a pair of increments of dst and src 66*4882a593Smuzhiyun * 67*4882a593Smuzhiyun * The exception handlers for stores adjust len (if necessary) and return. 68*4882a593Smuzhiyun * These handlers do not need to overwrite any data. 69*4882a593Smuzhiyun * 70*4882a593Smuzhiyun * For __rmemcpy and memmove an exception is always a kernel bug, therefore 71*4882a593Smuzhiyun * they're not protected. 72*4882a593Smuzhiyun */ 73*4882a593Smuzhiyun 74*4882a593Smuzhiyun#define EXC(inst_reg,addr,handler) \ 75*4882a593Smuzhiyun9: inst_reg, addr; \ 76*4882a593Smuzhiyun .section __ex_table,"a"; \ 77*4882a593Smuzhiyun PTR 9b, handler; \ 78*4882a593Smuzhiyun .previous 79*4882a593Smuzhiyun 80*4882a593Smuzhiyun/* 81*4882a593Smuzhiyun * Only on the 64-bit kernel we can made use of 64-bit registers. 82*4882a593Smuzhiyun */ 83*4882a593Smuzhiyun 84*4882a593Smuzhiyun#define LOAD ld 85*4882a593Smuzhiyun#define LOADL ldl 86*4882a593Smuzhiyun#define LOADR ldr 87*4882a593Smuzhiyun#define STOREL sdl 88*4882a593Smuzhiyun#define STORER sdr 89*4882a593Smuzhiyun#define STORE sd 90*4882a593Smuzhiyun#define ADD daddu 91*4882a593Smuzhiyun#define SUB dsubu 92*4882a593Smuzhiyun#define SRL dsrl 93*4882a593Smuzhiyun#define SRA dsra 94*4882a593Smuzhiyun#define SLL dsll 95*4882a593Smuzhiyun#define SLLV dsllv 96*4882a593Smuzhiyun#define SRLV dsrlv 97*4882a593Smuzhiyun#define NBYTES 8 98*4882a593Smuzhiyun#define LOG_NBYTES 3 99*4882a593Smuzhiyun 100*4882a593Smuzhiyun/* 101*4882a593Smuzhiyun * As we are sharing code base with the mips32 tree (which use the o32 ABI 102*4882a593Smuzhiyun * register definitions). We need to redefine the register definitions from 103*4882a593Smuzhiyun * the n64 ABI register naming to the o32 ABI register naming. 104*4882a593Smuzhiyun */ 105*4882a593Smuzhiyun#undef t0 106*4882a593Smuzhiyun#undef t1 107*4882a593Smuzhiyun#undef t2 108*4882a593Smuzhiyun#undef t3 109*4882a593Smuzhiyun#define t0 $8 110*4882a593Smuzhiyun#define t1 $9 111*4882a593Smuzhiyun#define t2 $10 112*4882a593Smuzhiyun#define t3 $11 113*4882a593Smuzhiyun#define t4 $12 114*4882a593Smuzhiyun#define t5 $13 115*4882a593Smuzhiyun#define t6 $14 116*4882a593Smuzhiyun#define t7 $15 117*4882a593Smuzhiyun 118*4882a593Smuzhiyun#ifdef CONFIG_CPU_LITTLE_ENDIAN 119*4882a593Smuzhiyun#define LDFIRST LOADR 120*4882a593Smuzhiyun#define LDREST LOADL 121*4882a593Smuzhiyun#define STFIRST STORER 122*4882a593Smuzhiyun#define STREST STOREL 123*4882a593Smuzhiyun#define SHIFT_DISCARD SLLV 124*4882a593Smuzhiyun#else 125*4882a593Smuzhiyun#define LDFIRST LOADL 126*4882a593Smuzhiyun#define LDREST LOADR 127*4882a593Smuzhiyun#define STFIRST STOREL 128*4882a593Smuzhiyun#define STREST STORER 129*4882a593Smuzhiyun#define SHIFT_DISCARD SRLV 130*4882a593Smuzhiyun#endif 131*4882a593Smuzhiyun 132*4882a593Smuzhiyun#define FIRST(unit) ((unit)*NBYTES) 133*4882a593Smuzhiyun#define REST(unit) (FIRST(unit)+NBYTES-1) 134*4882a593Smuzhiyun#define UNIT(unit) FIRST(unit) 135*4882a593Smuzhiyun 136*4882a593Smuzhiyun#define ADDRMASK (NBYTES-1) 137*4882a593Smuzhiyun 138*4882a593Smuzhiyun .text 139*4882a593Smuzhiyun .set noreorder 140*4882a593Smuzhiyun .set noat 141*4882a593Smuzhiyun 142*4882a593Smuzhiyun/* 143*4882a593Smuzhiyun * A combined memcpy/__copy_user 144*4882a593Smuzhiyun * __copy_user sets len to 0 for success; else to an upper bound of 145*4882a593Smuzhiyun * the number of uncopied bytes. 146*4882a593Smuzhiyun * memcpy sets v0 to dst. 147*4882a593Smuzhiyun */ 148*4882a593Smuzhiyun .align 5 149*4882a593SmuzhiyunLEAF(memcpy) /* a0=dst a1=src a2=len */ 150*4882a593SmuzhiyunEXPORT_SYMBOL(memcpy) 151*4882a593Smuzhiyun move v0, dst /* return value */ 152*4882a593Smuzhiyun__memcpy: 153*4882a593SmuzhiyunFEXPORT(__copy_user) 154*4882a593SmuzhiyunEXPORT_SYMBOL(__copy_user) 155*4882a593Smuzhiyun /* 156*4882a593Smuzhiyun * Note: dst & src may be unaligned, len may be 0 157*4882a593Smuzhiyun * Temps 158*4882a593Smuzhiyun */ 159*4882a593Smuzhiyun # 160*4882a593Smuzhiyun # Octeon doesn't care if the destination is unaligned. The hardware 161*4882a593Smuzhiyun # can fix it faster than we can special case the assembly. 162*4882a593Smuzhiyun # 163*4882a593Smuzhiyun pref 0, 0(src) 164*4882a593Smuzhiyun sltu t0, len, NBYTES # Check if < 1 word 165*4882a593Smuzhiyun bnez t0, copy_bytes_checklen 166*4882a593Smuzhiyun and t0, src, ADDRMASK # Check if src unaligned 167*4882a593Smuzhiyun bnez t0, src_unaligned 168*4882a593Smuzhiyun sltu t0, len, 4*NBYTES # Check if < 4 words 169*4882a593Smuzhiyun bnez t0, less_than_4units 170*4882a593Smuzhiyun sltu t0, len, 8*NBYTES # Check if < 8 words 171*4882a593Smuzhiyun bnez t0, less_than_8units 172*4882a593Smuzhiyun sltu t0, len, 16*NBYTES # Check if < 16 words 173*4882a593Smuzhiyun bnez t0, cleanup_both_aligned 174*4882a593Smuzhiyun sltu t0, len, 128+1 # Check if len < 129 175*4882a593Smuzhiyun bnez t0, 1f # Skip prefetch if len is too short 176*4882a593Smuzhiyun sltu t0, len, 256+1 # Check if len < 257 177*4882a593Smuzhiyun bnez t0, 1f # Skip prefetch if len is too short 178*4882a593Smuzhiyun pref 0, 128(src) # We must not prefetch invalid addresses 179*4882a593Smuzhiyun # 180*4882a593Smuzhiyun # This is where we loop if there is more than 128 bytes left 181*4882a593Smuzhiyun2: pref 0, 256(src) # We must not prefetch invalid addresses 182*4882a593Smuzhiyun # 183*4882a593Smuzhiyun # This is where we loop if we can't prefetch anymore 184*4882a593Smuzhiyun1: 185*4882a593SmuzhiyunEXC( LOAD t0, UNIT(0)(src), l_exc) 186*4882a593SmuzhiyunEXC( LOAD t1, UNIT(1)(src), l_exc_copy) 187*4882a593SmuzhiyunEXC( LOAD t2, UNIT(2)(src), l_exc_copy) 188*4882a593SmuzhiyunEXC( LOAD t3, UNIT(3)(src), l_exc_copy) 189*4882a593Smuzhiyun SUB len, len, 16*NBYTES 190*4882a593SmuzhiyunEXC( STORE t0, UNIT(0)(dst), s_exc_p16u) 191*4882a593SmuzhiyunEXC( STORE t1, UNIT(1)(dst), s_exc_p15u) 192*4882a593SmuzhiyunEXC( STORE t2, UNIT(2)(dst), s_exc_p14u) 193*4882a593SmuzhiyunEXC( STORE t3, UNIT(3)(dst), s_exc_p13u) 194*4882a593SmuzhiyunEXC( LOAD t0, UNIT(4)(src), l_exc_copy) 195*4882a593SmuzhiyunEXC( LOAD t1, UNIT(5)(src), l_exc_copy) 196*4882a593SmuzhiyunEXC( LOAD t2, UNIT(6)(src), l_exc_copy) 197*4882a593SmuzhiyunEXC( LOAD t3, UNIT(7)(src), l_exc_copy) 198*4882a593SmuzhiyunEXC( STORE t0, UNIT(4)(dst), s_exc_p12u) 199*4882a593SmuzhiyunEXC( STORE t1, UNIT(5)(dst), s_exc_p11u) 200*4882a593SmuzhiyunEXC( STORE t2, UNIT(6)(dst), s_exc_p10u) 201*4882a593Smuzhiyun ADD src, src, 16*NBYTES 202*4882a593SmuzhiyunEXC( STORE t3, UNIT(7)(dst), s_exc_p9u) 203*4882a593Smuzhiyun ADD dst, dst, 16*NBYTES 204*4882a593SmuzhiyunEXC( LOAD t0, UNIT(-8)(src), l_exc_copy_rewind16) 205*4882a593SmuzhiyunEXC( LOAD t1, UNIT(-7)(src), l_exc_copy_rewind16) 206*4882a593SmuzhiyunEXC( LOAD t2, UNIT(-6)(src), l_exc_copy_rewind16) 207*4882a593SmuzhiyunEXC( LOAD t3, UNIT(-5)(src), l_exc_copy_rewind16) 208*4882a593SmuzhiyunEXC( STORE t0, UNIT(-8)(dst), s_exc_p8u) 209*4882a593SmuzhiyunEXC( STORE t1, UNIT(-7)(dst), s_exc_p7u) 210*4882a593SmuzhiyunEXC( STORE t2, UNIT(-6)(dst), s_exc_p6u) 211*4882a593SmuzhiyunEXC( STORE t3, UNIT(-5)(dst), s_exc_p5u) 212*4882a593SmuzhiyunEXC( LOAD t0, UNIT(-4)(src), l_exc_copy_rewind16) 213*4882a593SmuzhiyunEXC( LOAD t1, UNIT(-3)(src), l_exc_copy_rewind16) 214*4882a593SmuzhiyunEXC( LOAD t2, UNIT(-2)(src), l_exc_copy_rewind16) 215*4882a593SmuzhiyunEXC( LOAD t3, UNIT(-1)(src), l_exc_copy_rewind16) 216*4882a593SmuzhiyunEXC( STORE t0, UNIT(-4)(dst), s_exc_p4u) 217*4882a593SmuzhiyunEXC( STORE t1, UNIT(-3)(dst), s_exc_p3u) 218*4882a593SmuzhiyunEXC( STORE t2, UNIT(-2)(dst), s_exc_p2u) 219*4882a593SmuzhiyunEXC( STORE t3, UNIT(-1)(dst), s_exc_p1u) 220*4882a593Smuzhiyun sltu t0, len, 256+1 # See if we can prefetch more 221*4882a593Smuzhiyun beqz t0, 2b 222*4882a593Smuzhiyun sltu t0, len, 128 # See if we can loop more time 223*4882a593Smuzhiyun beqz t0, 1b 224*4882a593Smuzhiyun nop 225*4882a593Smuzhiyun # 226*4882a593Smuzhiyun # Jump here if there are less than 16*NBYTES left. 227*4882a593Smuzhiyun # 228*4882a593Smuzhiyuncleanup_both_aligned: 229*4882a593Smuzhiyun beqz len, done 230*4882a593Smuzhiyun sltu t0, len, 8*NBYTES 231*4882a593Smuzhiyun bnez t0, less_than_8units 232*4882a593Smuzhiyun nop 233*4882a593SmuzhiyunEXC( LOAD t0, UNIT(0)(src), l_exc) 234*4882a593SmuzhiyunEXC( LOAD t1, UNIT(1)(src), l_exc_copy) 235*4882a593SmuzhiyunEXC( LOAD t2, UNIT(2)(src), l_exc_copy) 236*4882a593SmuzhiyunEXC( LOAD t3, UNIT(3)(src), l_exc_copy) 237*4882a593Smuzhiyun SUB len, len, 8*NBYTES 238*4882a593SmuzhiyunEXC( STORE t0, UNIT(0)(dst), s_exc_p8u) 239*4882a593SmuzhiyunEXC( STORE t1, UNIT(1)(dst), s_exc_p7u) 240*4882a593SmuzhiyunEXC( STORE t2, UNIT(2)(dst), s_exc_p6u) 241*4882a593SmuzhiyunEXC( STORE t3, UNIT(3)(dst), s_exc_p5u) 242*4882a593SmuzhiyunEXC( LOAD t0, UNIT(4)(src), l_exc_copy) 243*4882a593SmuzhiyunEXC( LOAD t1, UNIT(5)(src), l_exc_copy) 244*4882a593SmuzhiyunEXC( LOAD t2, UNIT(6)(src), l_exc_copy) 245*4882a593SmuzhiyunEXC( LOAD t3, UNIT(7)(src), l_exc_copy) 246*4882a593SmuzhiyunEXC( STORE t0, UNIT(4)(dst), s_exc_p4u) 247*4882a593SmuzhiyunEXC( STORE t1, UNIT(5)(dst), s_exc_p3u) 248*4882a593SmuzhiyunEXC( STORE t2, UNIT(6)(dst), s_exc_p2u) 249*4882a593SmuzhiyunEXC( STORE t3, UNIT(7)(dst), s_exc_p1u) 250*4882a593Smuzhiyun ADD src, src, 8*NBYTES 251*4882a593Smuzhiyun beqz len, done 252*4882a593Smuzhiyun ADD dst, dst, 8*NBYTES 253*4882a593Smuzhiyun # 254*4882a593Smuzhiyun # Jump here if there are less than 8*NBYTES left. 255*4882a593Smuzhiyun # 256*4882a593Smuzhiyunless_than_8units: 257*4882a593Smuzhiyun sltu t0, len, 4*NBYTES 258*4882a593Smuzhiyun bnez t0, less_than_4units 259*4882a593Smuzhiyun nop 260*4882a593SmuzhiyunEXC( LOAD t0, UNIT(0)(src), l_exc) 261*4882a593SmuzhiyunEXC( LOAD t1, UNIT(1)(src), l_exc_copy) 262*4882a593SmuzhiyunEXC( LOAD t2, UNIT(2)(src), l_exc_copy) 263*4882a593SmuzhiyunEXC( LOAD t3, UNIT(3)(src), l_exc_copy) 264*4882a593Smuzhiyun SUB len, len, 4*NBYTES 265*4882a593SmuzhiyunEXC( STORE t0, UNIT(0)(dst), s_exc_p4u) 266*4882a593SmuzhiyunEXC( STORE t1, UNIT(1)(dst), s_exc_p3u) 267*4882a593SmuzhiyunEXC( STORE t2, UNIT(2)(dst), s_exc_p2u) 268*4882a593SmuzhiyunEXC( STORE t3, UNIT(3)(dst), s_exc_p1u) 269*4882a593Smuzhiyun ADD src, src, 4*NBYTES 270*4882a593Smuzhiyun beqz len, done 271*4882a593Smuzhiyun ADD dst, dst, 4*NBYTES 272*4882a593Smuzhiyun # 273*4882a593Smuzhiyun # Jump here if there are less than 4*NBYTES left. This means 274*4882a593Smuzhiyun # we may need to copy up to 3 NBYTES words. 275*4882a593Smuzhiyun # 276*4882a593Smuzhiyunless_than_4units: 277*4882a593Smuzhiyun sltu t0, len, 1*NBYTES 278*4882a593Smuzhiyun bnez t0, copy_bytes_checklen 279*4882a593Smuzhiyun nop 280*4882a593Smuzhiyun # 281*4882a593Smuzhiyun # 1) Copy NBYTES, then check length again 282*4882a593Smuzhiyun # 283*4882a593SmuzhiyunEXC( LOAD t0, 0(src), l_exc) 284*4882a593Smuzhiyun SUB len, len, NBYTES 285*4882a593Smuzhiyun sltu t1, len, 8 286*4882a593SmuzhiyunEXC( STORE t0, 0(dst), s_exc_p1u) 287*4882a593Smuzhiyun ADD src, src, NBYTES 288*4882a593Smuzhiyun bnez t1, copy_bytes_checklen 289*4882a593Smuzhiyun ADD dst, dst, NBYTES 290*4882a593Smuzhiyun # 291*4882a593Smuzhiyun # 2) Copy NBYTES, then check length again 292*4882a593Smuzhiyun # 293*4882a593SmuzhiyunEXC( LOAD t0, 0(src), l_exc) 294*4882a593Smuzhiyun SUB len, len, NBYTES 295*4882a593Smuzhiyun sltu t1, len, 8 296*4882a593SmuzhiyunEXC( STORE t0, 0(dst), s_exc_p1u) 297*4882a593Smuzhiyun ADD src, src, NBYTES 298*4882a593Smuzhiyun bnez t1, copy_bytes_checklen 299*4882a593Smuzhiyun ADD dst, dst, NBYTES 300*4882a593Smuzhiyun # 301*4882a593Smuzhiyun # 3) Copy NBYTES, then check length again 302*4882a593Smuzhiyun # 303*4882a593SmuzhiyunEXC( LOAD t0, 0(src), l_exc) 304*4882a593Smuzhiyun SUB len, len, NBYTES 305*4882a593Smuzhiyun ADD src, src, NBYTES 306*4882a593Smuzhiyun ADD dst, dst, NBYTES 307*4882a593Smuzhiyun b copy_bytes_checklen 308*4882a593SmuzhiyunEXC( STORE t0, -8(dst), s_exc_p1u) 309*4882a593Smuzhiyun 310*4882a593Smuzhiyunsrc_unaligned: 311*4882a593Smuzhiyun#define rem t8 312*4882a593Smuzhiyun SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter 313*4882a593Smuzhiyun beqz t0, cleanup_src_unaligned 314*4882a593Smuzhiyun and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES 315*4882a593Smuzhiyun1: 316*4882a593Smuzhiyun/* 317*4882a593Smuzhiyun * Avoid consecutive LD*'s to the same register since some mips 318*4882a593Smuzhiyun * implementations can't issue them in the same cycle. 319*4882a593Smuzhiyun * It's OK to load FIRST(N+1) before REST(N) because the two addresses 320*4882a593Smuzhiyun * are to the same unit (unless src is aligned, but it's not). 321*4882a593Smuzhiyun */ 322*4882a593SmuzhiyunEXC( LDFIRST t0, FIRST(0)(src), l_exc) 323*4882a593SmuzhiyunEXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) 324*4882a593Smuzhiyun SUB len, len, 4*NBYTES 325*4882a593SmuzhiyunEXC( LDREST t0, REST(0)(src), l_exc_copy) 326*4882a593SmuzhiyunEXC( LDREST t1, REST(1)(src), l_exc_copy) 327*4882a593SmuzhiyunEXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) 328*4882a593SmuzhiyunEXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) 329*4882a593SmuzhiyunEXC( LDREST t2, REST(2)(src), l_exc_copy) 330*4882a593SmuzhiyunEXC( LDREST t3, REST(3)(src), l_exc_copy) 331*4882a593Smuzhiyun ADD src, src, 4*NBYTES 332*4882a593SmuzhiyunEXC( STORE t0, UNIT(0)(dst), s_exc_p4u) 333*4882a593SmuzhiyunEXC( STORE t1, UNIT(1)(dst), s_exc_p3u) 334*4882a593SmuzhiyunEXC( STORE t2, UNIT(2)(dst), s_exc_p2u) 335*4882a593SmuzhiyunEXC( STORE t3, UNIT(3)(dst), s_exc_p1u) 336*4882a593Smuzhiyun bne len, rem, 1b 337*4882a593Smuzhiyun ADD dst, dst, 4*NBYTES 338*4882a593Smuzhiyun 339*4882a593Smuzhiyuncleanup_src_unaligned: 340*4882a593Smuzhiyun beqz len, done 341*4882a593Smuzhiyun and rem, len, NBYTES-1 # rem = len % NBYTES 342*4882a593Smuzhiyun beq rem, len, copy_bytes 343*4882a593Smuzhiyun nop 344*4882a593Smuzhiyun1: 345*4882a593SmuzhiyunEXC( LDFIRST t0, FIRST(0)(src), l_exc) 346*4882a593SmuzhiyunEXC( LDREST t0, REST(0)(src), l_exc_copy) 347*4882a593Smuzhiyun SUB len, len, NBYTES 348*4882a593SmuzhiyunEXC( STORE t0, 0(dst), s_exc_p1u) 349*4882a593Smuzhiyun ADD src, src, NBYTES 350*4882a593Smuzhiyun bne len, rem, 1b 351*4882a593Smuzhiyun ADD dst, dst, NBYTES 352*4882a593Smuzhiyun 353*4882a593Smuzhiyuncopy_bytes_checklen: 354*4882a593Smuzhiyun beqz len, done 355*4882a593Smuzhiyun nop 356*4882a593Smuzhiyuncopy_bytes: 357*4882a593Smuzhiyun /* 0 < len < NBYTES */ 358*4882a593Smuzhiyun#define COPY_BYTE(N) \ 359*4882a593SmuzhiyunEXC( lb t0, N(src), l_exc); \ 360*4882a593Smuzhiyun SUB len, len, 1; \ 361*4882a593Smuzhiyun beqz len, done; \ 362*4882a593SmuzhiyunEXC( sb t0, N(dst), s_exc_p1) 363*4882a593Smuzhiyun 364*4882a593Smuzhiyun COPY_BYTE(0) 365*4882a593Smuzhiyun COPY_BYTE(1) 366*4882a593Smuzhiyun COPY_BYTE(2) 367*4882a593Smuzhiyun COPY_BYTE(3) 368*4882a593Smuzhiyun COPY_BYTE(4) 369*4882a593Smuzhiyun COPY_BYTE(5) 370*4882a593SmuzhiyunEXC( lb t0, NBYTES-2(src), l_exc) 371*4882a593Smuzhiyun SUB len, len, 1 372*4882a593Smuzhiyun jr ra 373*4882a593SmuzhiyunEXC( sb t0, NBYTES-2(dst), s_exc_p1) 374*4882a593Smuzhiyundone: 375*4882a593Smuzhiyun jr ra 376*4882a593Smuzhiyun nop 377*4882a593Smuzhiyun END(memcpy) 378*4882a593Smuzhiyun 379*4882a593Smuzhiyunl_exc_copy_rewind16: 380*4882a593Smuzhiyun /* Rewind src and dst by 16*NBYTES for l_exc_copy */ 381*4882a593Smuzhiyun SUB src, src, 16*NBYTES 382*4882a593Smuzhiyun SUB dst, dst, 16*NBYTES 383*4882a593Smuzhiyunl_exc_copy: 384*4882a593Smuzhiyun /* 385*4882a593Smuzhiyun * Copy bytes from src until faulting load address (or until a 386*4882a593Smuzhiyun * lb faults) 387*4882a593Smuzhiyun * 388*4882a593Smuzhiyun * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28) 389*4882a593Smuzhiyun * may be more than a byte beyond the last address. 390*4882a593Smuzhiyun * Hence, the lb below may get an exception. 391*4882a593Smuzhiyun * 392*4882a593Smuzhiyun * Assumes src < THREAD_BUADDR($28) 393*4882a593Smuzhiyun */ 394*4882a593Smuzhiyun LOAD t0, TI_TASK($28) 395*4882a593Smuzhiyun LOAD t0, THREAD_BUADDR(t0) 396*4882a593Smuzhiyun1: 397*4882a593SmuzhiyunEXC( lb t1, 0(src), l_exc) 398*4882a593Smuzhiyun ADD src, src, 1 399*4882a593Smuzhiyun sb t1, 0(dst) # can't fault -- we're copy_from_user 400*4882a593Smuzhiyun bne src, t0, 1b 401*4882a593Smuzhiyun ADD dst, dst, 1 402*4882a593Smuzhiyunl_exc: 403*4882a593Smuzhiyun LOAD t0, TI_TASK($28) 404*4882a593Smuzhiyun LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address 405*4882a593Smuzhiyun SUB len, AT, t0 # len number of uncopied bytes 406*4882a593Smuzhiyun jr ra 407*4882a593Smuzhiyun nop 408*4882a593Smuzhiyun 409*4882a593Smuzhiyun 410*4882a593Smuzhiyun#define SEXC(n) \ 411*4882a593Smuzhiyuns_exc_p ## n ## u: \ 412*4882a593Smuzhiyun jr ra; \ 413*4882a593Smuzhiyun ADD len, len, n*NBYTES 414*4882a593Smuzhiyun 415*4882a593SmuzhiyunSEXC(16) 416*4882a593SmuzhiyunSEXC(15) 417*4882a593SmuzhiyunSEXC(14) 418*4882a593SmuzhiyunSEXC(13) 419*4882a593SmuzhiyunSEXC(12) 420*4882a593SmuzhiyunSEXC(11) 421*4882a593SmuzhiyunSEXC(10) 422*4882a593SmuzhiyunSEXC(9) 423*4882a593SmuzhiyunSEXC(8) 424*4882a593SmuzhiyunSEXC(7) 425*4882a593SmuzhiyunSEXC(6) 426*4882a593SmuzhiyunSEXC(5) 427*4882a593SmuzhiyunSEXC(4) 428*4882a593SmuzhiyunSEXC(3) 429*4882a593SmuzhiyunSEXC(2) 430*4882a593SmuzhiyunSEXC(1) 431*4882a593Smuzhiyun 432*4882a593Smuzhiyuns_exc_p1: 433*4882a593Smuzhiyun jr ra 434*4882a593Smuzhiyun ADD len, len, 1 435*4882a593Smuzhiyuns_exc: 436*4882a593Smuzhiyun jr ra 437*4882a593Smuzhiyun nop 438*4882a593Smuzhiyun 439*4882a593Smuzhiyun .align 5 440*4882a593SmuzhiyunLEAF(memmove) 441*4882a593SmuzhiyunEXPORT_SYMBOL(memmove) 442*4882a593Smuzhiyun ADD t0, a0, a2 443*4882a593Smuzhiyun ADD t1, a1, a2 444*4882a593Smuzhiyun sltu t0, a1, t0 # dst + len <= src -> memcpy 445*4882a593Smuzhiyun sltu t1, a0, t1 # dst >= src + len -> memcpy 446*4882a593Smuzhiyun and t0, t1 447*4882a593Smuzhiyun beqz t0, __memcpy 448*4882a593Smuzhiyun move v0, a0 /* return value */ 449*4882a593Smuzhiyun beqz a2, r_out 450*4882a593Smuzhiyun END(memmove) 451*4882a593Smuzhiyun 452*4882a593Smuzhiyun /* fall through to __rmemcpy */ 453*4882a593SmuzhiyunLEAF(__rmemcpy) /* a0=dst a1=src a2=len */ 454*4882a593Smuzhiyun sltu t0, a1, a0 455*4882a593Smuzhiyun beqz t0, r_end_bytes_up # src >= dst 456*4882a593Smuzhiyun nop 457*4882a593Smuzhiyun ADD a0, a2 # dst = dst + len 458*4882a593Smuzhiyun ADD a1, a2 # src = src + len 459*4882a593Smuzhiyun 460*4882a593Smuzhiyunr_end_bytes: 461*4882a593Smuzhiyun lb t0, -1(a1) 462*4882a593Smuzhiyun SUB a2, a2, 0x1 463*4882a593Smuzhiyun sb t0, -1(a0) 464*4882a593Smuzhiyun SUB a1, a1, 0x1 465*4882a593Smuzhiyun bnez a2, r_end_bytes 466*4882a593Smuzhiyun SUB a0, a0, 0x1 467*4882a593Smuzhiyun 468*4882a593Smuzhiyunr_out: 469*4882a593Smuzhiyun jr ra 470*4882a593Smuzhiyun move a2, zero 471*4882a593Smuzhiyun 472*4882a593Smuzhiyunr_end_bytes_up: 473*4882a593Smuzhiyun lb t0, (a1) 474*4882a593Smuzhiyun SUB a2, a2, 0x1 475*4882a593Smuzhiyun sb t0, (a0) 476*4882a593Smuzhiyun ADD a1, a1, 0x1 477*4882a593Smuzhiyun bnez a2, r_end_bytes_up 478*4882a593Smuzhiyun ADD a0, a0, 0x1 479*4882a593Smuzhiyun 480*4882a593Smuzhiyun jr ra 481*4882a593Smuzhiyun move a2, zero 482*4882a593Smuzhiyun END(__rmemcpy) 483