1*4882a593Smuzhiyun/* 2*4882a593Smuzhiyun * This file is subject to the terms and conditions of the GNU General Public 3*4882a593Smuzhiyun * License. See the file "COPYING" in the main directory of this archive 4*4882a593Smuzhiyun * for more details. 5*4882a593Smuzhiyun * 6*4882a593Smuzhiyun * Unified implementation of memcpy, memmove and the __copy_user backend. 7*4882a593Smuzhiyun * 8*4882a593Smuzhiyun * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org) 9*4882a593Smuzhiyun * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. 10*4882a593Smuzhiyun * Copyright (C) 2002 Broadcom, Inc. 11*4882a593Smuzhiyun * memcpy/copy_user author: Mark Vandevoorde 12*4882a593Smuzhiyun * Copyright (C) 2007 Maciej W. Rozycki 13*4882a593Smuzhiyun * Copyright (C) 2014 Imagination Technologies Ltd. 14*4882a593Smuzhiyun * 15*4882a593Smuzhiyun * Mnemonic names for arguments to memcpy/__copy_user 16*4882a593Smuzhiyun */ 17*4882a593Smuzhiyun 18*4882a593Smuzhiyun/* 19*4882a593Smuzhiyun * Hack to resolve longstanding prefetch issue 20*4882a593Smuzhiyun * 21*4882a593Smuzhiyun * Prefetching may be fatal on some systems if we're prefetching beyond the 22*4882a593Smuzhiyun * end of memory on some systems. It's also a seriously bad idea on non 23*4882a593Smuzhiyun * dma-coherent systems. 24*4882a593Smuzhiyun */ 25*4882a593Smuzhiyun#ifdef CONFIG_DMA_NONCOHERENT 26*4882a593Smuzhiyun#undef CONFIG_CPU_HAS_PREFETCH 27*4882a593Smuzhiyun#endif 28*4882a593Smuzhiyun#ifdef CONFIG_MIPS_MALTA 29*4882a593Smuzhiyun#undef CONFIG_CPU_HAS_PREFETCH 30*4882a593Smuzhiyun#endif 31*4882a593Smuzhiyun#ifdef CONFIG_CPU_MIPSR6 32*4882a593Smuzhiyun#undef CONFIG_CPU_HAS_PREFETCH 33*4882a593Smuzhiyun#endif 34*4882a593Smuzhiyun 35*4882a593Smuzhiyun#include <asm/asm.h> 36*4882a593Smuzhiyun#include <asm/asm-offsets.h> 37*4882a593Smuzhiyun#include <asm/export.h> 38*4882a593Smuzhiyun#include <asm/regdef.h> 39*4882a593Smuzhiyun 40*4882a593Smuzhiyun#define dst a0 41*4882a593Smuzhiyun#define src a1 42*4882a593Smuzhiyun#define len a2 43*4882a593Smuzhiyun 44*4882a593Smuzhiyun/* 45*4882a593Smuzhiyun * Spec 46*4882a593Smuzhiyun * 47*4882a593Smuzhiyun * memcpy copies len bytes from src to dst and sets v0 to dst. 48*4882a593Smuzhiyun * It assumes that 49*4882a593Smuzhiyun * - src and dst don't overlap 50*4882a593Smuzhiyun * - src is readable 51*4882a593Smuzhiyun * - dst is writable 52*4882a593Smuzhiyun * memcpy uses the standard calling convention 53*4882a593Smuzhiyun * 54*4882a593Smuzhiyun * __copy_user copies up to len bytes from src to dst and sets a2 (len) to 55*4882a593Smuzhiyun * the number of uncopied bytes due to an exception caused by a read or write. 56*4882a593Smuzhiyun * __copy_user assumes that src and dst don't overlap, and that the call is 57*4882a593Smuzhiyun * implementing one of the following: 58*4882a593Smuzhiyun * copy_to_user 59*4882a593Smuzhiyun * - src is readable (no exceptions when reading src) 60*4882a593Smuzhiyun * copy_from_user 61*4882a593Smuzhiyun * - dst is writable (no exceptions when writing dst) 62*4882a593Smuzhiyun * __copy_user uses a non-standard calling convention; see 63*4882a593Smuzhiyun * include/asm-mips/uaccess.h 64*4882a593Smuzhiyun * 65*4882a593Smuzhiyun * When an exception happens on a load, the handler must 66*4882a593Smuzhiyun # ensure that all of the destination buffer is overwritten to prevent 67*4882a593Smuzhiyun * leaking information to user mode programs. 68*4882a593Smuzhiyun */ 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun/* 71*4882a593Smuzhiyun * Implementation 72*4882a593Smuzhiyun */ 73*4882a593Smuzhiyun 74*4882a593Smuzhiyun/* 75*4882a593Smuzhiyun * The exception handler for loads requires that: 76*4882a593Smuzhiyun * 1- AT contain the address of the byte just past the end of the source 77*4882a593Smuzhiyun * of the copy, 78*4882a593Smuzhiyun * 2- src_entry <= src < AT, and 79*4882a593Smuzhiyun * 3- (dst - src) == (dst_entry - src_entry), 80*4882a593Smuzhiyun * The _entry suffix denotes values when __copy_user was called. 81*4882a593Smuzhiyun * 82*4882a593Smuzhiyun * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user 83*4882a593Smuzhiyun * (2) is met by incrementing src by the number of bytes copied 84*4882a593Smuzhiyun * (3) is met by not doing loads between a pair of increments of dst and src 85*4882a593Smuzhiyun * 86*4882a593Smuzhiyun * The exception handlers for stores adjust len (if necessary) and return. 87*4882a593Smuzhiyun * These handlers do not need to overwrite any data. 88*4882a593Smuzhiyun * 89*4882a593Smuzhiyun * For __rmemcpy and memmove an exception is always a kernel bug, therefore 90*4882a593Smuzhiyun * they're not protected. 91*4882a593Smuzhiyun */ 92*4882a593Smuzhiyun 93*4882a593Smuzhiyun/* Instruction type */ 94*4882a593Smuzhiyun#define LD_INSN 1 95*4882a593Smuzhiyun#define ST_INSN 2 96*4882a593Smuzhiyun/* Pretech type */ 97*4882a593Smuzhiyun#define SRC_PREFETCH 1 98*4882a593Smuzhiyun#define DST_PREFETCH 2 99*4882a593Smuzhiyun#define LEGACY_MODE 1 100*4882a593Smuzhiyun#define EVA_MODE 2 101*4882a593Smuzhiyun#define USEROP 1 102*4882a593Smuzhiyun#define KERNELOP 2 103*4882a593Smuzhiyun 104*4882a593Smuzhiyun/* 105*4882a593Smuzhiyun * Wrapper to add an entry in the exception table 106*4882a593Smuzhiyun * in case the insn causes a memory exception. 107*4882a593Smuzhiyun * Arguments: 108*4882a593Smuzhiyun * insn : Load/store instruction 109*4882a593Smuzhiyun * type : Instruction type 110*4882a593Smuzhiyun * reg : Register 111*4882a593Smuzhiyun * addr : Address 112*4882a593Smuzhiyun * handler : Exception handler 113*4882a593Smuzhiyun */ 114*4882a593Smuzhiyun 115*4882a593Smuzhiyun#define EXC(insn, type, reg, addr, handler) \ 116*4882a593Smuzhiyun .if \mode == LEGACY_MODE; \ 117*4882a593Smuzhiyun9: insn reg, addr; \ 118*4882a593Smuzhiyun .section __ex_table,"a"; \ 119*4882a593Smuzhiyun PTR 9b, handler; \ 120*4882a593Smuzhiyun .previous; \ 121*4882a593Smuzhiyun /* This is assembled in EVA mode */ \ 122*4882a593Smuzhiyun .else; \ 123*4882a593Smuzhiyun /* If loading from user or storing to user */ \ 124*4882a593Smuzhiyun .if ((\from == USEROP) && (type == LD_INSN)) || \ 125*4882a593Smuzhiyun ((\to == USEROP) && (type == ST_INSN)); \ 126*4882a593Smuzhiyun9: __BUILD_EVA_INSN(insn##e, reg, addr); \ 127*4882a593Smuzhiyun .section __ex_table,"a"; \ 128*4882a593Smuzhiyun PTR 9b, handler; \ 129*4882a593Smuzhiyun .previous; \ 130*4882a593Smuzhiyun .else; \ 131*4882a593Smuzhiyun /* \ 132*4882a593Smuzhiyun * Still in EVA, but no need for \ 133*4882a593Smuzhiyun * exception handler or EVA insn \ 134*4882a593Smuzhiyun */ \ 135*4882a593Smuzhiyun insn reg, addr; \ 136*4882a593Smuzhiyun .endif; \ 137*4882a593Smuzhiyun .endif 138*4882a593Smuzhiyun 139*4882a593Smuzhiyun/* 140*4882a593Smuzhiyun * Only on the 64-bit kernel we can made use of 64-bit registers. 141*4882a593Smuzhiyun */ 142*4882a593Smuzhiyun#ifdef CONFIG_64BIT 143*4882a593Smuzhiyun#define USE_DOUBLE 144*4882a593Smuzhiyun#endif 145*4882a593Smuzhiyun 146*4882a593Smuzhiyun#ifdef USE_DOUBLE 147*4882a593Smuzhiyun 148*4882a593Smuzhiyun#define LOADK ld /* No exception */ 149*4882a593Smuzhiyun#define LOAD(reg, addr, handler) EXC(ld, LD_INSN, reg, addr, handler) 150*4882a593Smuzhiyun#define LOADL(reg, addr, handler) EXC(ldl, LD_INSN, reg, addr, handler) 151*4882a593Smuzhiyun#define LOADR(reg, addr, handler) EXC(ldr, LD_INSN, reg, addr, handler) 152*4882a593Smuzhiyun#define STOREL(reg, addr, handler) EXC(sdl, ST_INSN, reg, addr, handler) 153*4882a593Smuzhiyun#define STORER(reg, addr, handler) EXC(sdr, ST_INSN, reg, addr, handler) 154*4882a593Smuzhiyun#define STORE(reg, addr, handler) EXC(sd, ST_INSN, reg, addr, handler) 155*4882a593Smuzhiyun#define ADD daddu 156*4882a593Smuzhiyun#define SUB dsubu 157*4882a593Smuzhiyun#define SRL dsrl 158*4882a593Smuzhiyun#define SRA dsra 159*4882a593Smuzhiyun#define SLL dsll 160*4882a593Smuzhiyun#define SLLV dsllv 161*4882a593Smuzhiyun#define SRLV dsrlv 162*4882a593Smuzhiyun#define NBYTES 8 163*4882a593Smuzhiyun#define LOG_NBYTES 3 164*4882a593Smuzhiyun 165*4882a593Smuzhiyun/* 166*4882a593Smuzhiyun * As we are sharing code base with the mips32 tree (which use the o32 ABI 167*4882a593Smuzhiyun * register definitions). We need to redefine the register definitions from 168*4882a593Smuzhiyun * the n64 ABI register naming to the o32 ABI register naming. 169*4882a593Smuzhiyun */ 170*4882a593Smuzhiyun#undef t0 171*4882a593Smuzhiyun#undef t1 172*4882a593Smuzhiyun#undef t2 173*4882a593Smuzhiyun#undef t3 174*4882a593Smuzhiyun#define t0 $8 175*4882a593Smuzhiyun#define t1 $9 176*4882a593Smuzhiyun#define t2 $10 177*4882a593Smuzhiyun#define t3 $11 178*4882a593Smuzhiyun#define t4 $12 179*4882a593Smuzhiyun#define t5 $13 180*4882a593Smuzhiyun#define t6 $14 181*4882a593Smuzhiyun#define t7 $15 182*4882a593Smuzhiyun 183*4882a593Smuzhiyun#else 184*4882a593Smuzhiyun 185*4882a593Smuzhiyun#define LOADK lw /* No exception */ 186*4882a593Smuzhiyun#define LOAD(reg, addr, handler) EXC(lw, LD_INSN, reg, addr, handler) 187*4882a593Smuzhiyun#define LOADL(reg, addr, handler) EXC(lwl, LD_INSN, reg, addr, handler) 188*4882a593Smuzhiyun#define LOADR(reg, addr, handler) EXC(lwr, LD_INSN, reg, addr, handler) 189*4882a593Smuzhiyun#define STOREL(reg, addr, handler) EXC(swl, ST_INSN, reg, addr, handler) 190*4882a593Smuzhiyun#define STORER(reg, addr, handler) EXC(swr, ST_INSN, reg, addr, handler) 191*4882a593Smuzhiyun#define STORE(reg, addr, handler) EXC(sw, ST_INSN, reg, addr, handler) 192*4882a593Smuzhiyun#define ADD addu 193*4882a593Smuzhiyun#define SUB subu 194*4882a593Smuzhiyun#define SRL srl 195*4882a593Smuzhiyun#define SLL sll 196*4882a593Smuzhiyun#define SRA sra 197*4882a593Smuzhiyun#define SLLV sllv 198*4882a593Smuzhiyun#define SRLV srlv 199*4882a593Smuzhiyun#define NBYTES 4 200*4882a593Smuzhiyun#define LOG_NBYTES 2 201*4882a593Smuzhiyun 202*4882a593Smuzhiyun#endif /* USE_DOUBLE */ 203*4882a593Smuzhiyun 204*4882a593Smuzhiyun#define LOADB(reg, addr, handler) EXC(lb, LD_INSN, reg, addr, handler) 205*4882a593Smuzhiyun#define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler) 206*4882a593Smuzhiyun 207*4882a593Smuzhiyun#ifdef CONFIG_CPU_HAS_PREFETCH 208*4882a593Smuzhiyun# define _PREF(hint, addr, type) \ 209*4882a593Smuzhiyun .if \mode == LEGACY_MODE; \ 210*4882a593Smuzhiyun kernel_pref(hint, addr); \ 211*4882a593Smuzhiyun .else; \ 212*4882a593Smuzhiyun .if ((\from == USEROP) && (type == SRC_PREFETCH)) || \ 213*4882a593Smuzhiyun ((\to == USEROP) && (type == DST_PREFETCH)); \ 214*4882a593Smuzhiyun /* \ 215*4882a593Smuzhiyun * PREFE has only 9 bits for the offset \ 216*4882a593Smuzhiyun * compared to PREF which has 16, so it may \ 217*4882a593Smuzhiyun * need to use the $at register but this \ 218*4882a593Smuzhiyun * register should remain intact because it's \ 219*4882a593Smuzhiyun * used later on. Therefore use $v1. \ 220*4882a593Smuzhiyun */ \ 221*4882a593Smuzhiyun .set at=v1; \ 222*4882a593Smuzhiyun user_pref(hint, addr); \ 223*4882a593Smuzhiyun .set noat; \ 224*4882a593Smuzhiyun .else; \ 225*4882a593Smuzhiyun kernel_pref(hint, addr); \ 226*4882a593Smuzhiyun .endif; \ 227*4882a593Smuzhiyun .endif 228*4882a593Smuzhiyun#else 229*4882a593Smuzhiyun# define _PREF(hint, addr, type) 230*4882a593Smuzhiyun#endif 231*4882a593Smuzhiyun 232*4882a593Smuzhiyun#define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH) 233*4882a593Smuzhiyun#define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH) 234*4882a593Smuzhiyun 235*4882a593Smuzhiyun#ifdef CONFIG_CPU_LITTLE_ENDIAN 236*4882a593Smuzhiyun#define LDFIRST LOADR 237*4882a593Smuzhiyun#define LDREST LOADL 238*4882a593Smuzhiyun#define STFIRST STORER 239*4882a593Smuzhiyun#define STREST STOREL 240*4882a593Smuzhiyun#define SHIFT_DISCARD SLLV 241*4882a593Smuzhiyun#else 242*4882a593Smuzhiyun#define LDFIRST LOADL 243*4882a593Smuzhiyun#define LDREST LOADR 244*4882a593Smuzhiyun#define STFIRST STOREL 245*4882a593Smuzhiyun#define STREST STORER 246*4882a593Smuzhiyun#define SHIFT_DISCARD SRLV 247*4882a593Smuzhiyun#endif 248*4882a593Smuzhiyun 249*4882a593Smuzhiyun#define FIRST(unit) ((unit)*NBYTES) 250*4882a593Smuzhiyun#define REST(unit) (FIRST(unit)+NBYTES-1) 251*4882a593Smuzhiyun#define UNIT(unit) FIRST(unit) 252*4882a593Smuzhiyun 253*4882a593Smuzhiyun#define ADDRMASK (NBYTES-1) 254*4882a593Smuzhiyun 255*4882a593Smuzhiyun .text 256*4882a593Smuzhiyun .set noreorder 257*4882a593Smuzhiyun#ifndef CONFIG_CPU_DADDI_WORKAROUNDS 258*4882a593Smuzhiyun .set noat 259*4882a593Smuzhiyun#else 260*4882a593Smuzhiyun .set at=v1 261*4882a593Smuzhiyun#endif 262*4882a593Smuzhiyun 263*4882a593Smuzhiyun .align 5 264*4882a593Smuzhiyun 265*4882a593Smuzhiyun /* 266*4882a593Smuzhiyun * Macro to build the __copy_user common code 267*4882a593Smuzhiyun * Arguments: 268*4882a593Smuzhiyun * mode : LEGACY_MODE or EVA_MODE 269*4882a593Smuzhiyun * from : Source operand. USEROP or KERNELOP 270*4882a593Smuzhiyun * to : Destination operand. USEROP or KERNELOP 271*4882a593Smuzhiyun */ 272*4882a593Smuzhiyun .macro __BUILD_COPY_USER mode, from, to 273*4882a593Smuzhiyun 274*4882a593Smuzhiyun /* initialize __memcpy if this the first time we execute this macro */ 275*4882a593Smuzhiyun .ifnotdef __memcpy 276*4882a593Smuzhiyun .set __memcpy, 1 277*4882a593Smuzhiyun .hidden __memcpy /* make sure it does not leak */ 278*4882a593Smuzhiyun .endif 279*4882a593Smuzhiyun 280*4882a593Smuzhiyun /* 281*4882a593Smuzhiyun * Note: dst & src may be unaligned, len may be 0 282*4882a593Smuzhiyun * Temps 283*4882a593Smuzhiyun */ 284*4882a593Smuzhiyun#define rem t8 285*4882a593Smuzhiyun 286*4882a593Smuzhiyun R10KCBARRIER(0(ra)) 287*4882a593Smuzhiyun /* 288*4882a593Smuzhiyun * The "issue break"s below are very approximate. 289*4882a593Smuzhiyun * Issue delays for dcache fills will perturb the schedule, as will 290*4882a593Smuzhiyun * load queue full replay traps, etc. 291*4882a593Smuzhiyun * 292*4882a593Smuzhiyun * If len < NBYTES use byte operations. 293*4882a593Smuzhiyun */ 294*4882a593Smuzhiyun PREFS( 0, 0(src) ) 295*4882a593Smuzhiyun PREFD( 1, 0(dst) ) 296*4882a593Smuzhiyun sltu t2, len, NBYTES 297*4882a593Smuzhiyun and t1, dst, ADDRMASK 298*4882a593Smuzhiyun PREFS( 0, 1*32(src) ) 299*4882a593Smuzhiyun PREFD( 1, 1*32(dst) ) 300*4882a593Smuzhiyun bnez t2, .Lcopy_bytes_checklen\@ 301*4882a593Smuzhiyun and t0, src, ADDRMASK 302*4882a593Smuzhiyun PREFS( 0, 2*32(src) ) 303*4882a593Smuzhiyun PREFD( 1, 2*32(dst) ) 304*4882a593Smuzhiyun#ifndef CONFIG_CPU_NO_LOAD_STORE_LR 305*4882a593Smuzhiyun bnez t1, .Ldst_unaligned\@ 306*4882a593Smuzhiyun nop 307*4882a593Smuzhiyun bnez t0, .Lsrc_unaligned_dst_aligned\@ 308*4882a593Smuzhiyun#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ 309*4882a593Smuzhiyun or t0, t0, t1 310*4882a593Smuzhiyun bnez t0, .Lcopy_unaligned_bytes\@ 311*4882a593Smuzhiyun#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ 312*4882a593Smuzhiyun /* 313*4882a593Smuzhiyun * use delay slot for fall-through 314*4882a593Smuzhiyun * src and dst are aligned; need to compute rem 315*4882a593Smuzhiyun */ 316*4882a593Smuzhiyun.Lboth_aligned\@: 317*4882a593Smuzhiyun SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter 318*4882a593Smuzhiyun beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES 319*4882a593Smuzhiyun and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) 320*4882a593Smuzhiyun PREFS( 0, 3*32(src) ) 321*4882a593Smuzhiyun PREFD( 1, 3*32(dst) ) 322*4882a593Smuzhiyun .align 4 323*4882a593Smuzhiyun1: 324*4882a593Smuzhiyun R10KCBARRIER(0(ra)) 325*4882a593Smuzhiyun LOAD(t0, UNIT(0)(src), .Ll_exc\@) 326*4882a593Smuzhiyun LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) 327*4882a593Smuzhiyun LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) 328*4882a593Smuzhiyun LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@) 329*4882a593Smuzhiyun SUB len, len, 8*NBYTES 330*4882a593Smuzhiyun LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@) 331*4882a593Smuzhiyun LOAD(t7, UNIT(5)(src), .Ll_exc_copy\@) 332*4882a593Smuzhiyun STORE(t0, UNIT(0)(dst), .Ls_exc_p8u\@) 333*4882a593Smuzhiyun STORE(t1, UNIT(1)(dst), .Ls_exc_p7u\@) 334*4882a593Smuzhiyun LOAD(t0, UNIT(6)(src), .Ll_exc_copy\@) 335*4882a593Smuzhiyun LOAD(t1, UNIT(7)(src), .Ll_exc_copy\@) 336*4882a593Smuzhiyun ADD src, src, 8*NBYTES 337*4882a593Smuzhiyun ADD dst, dst, 8*NBYTES 338*4882a593Smuzhiyun STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u\@) 339*4882a593Smuzhiyun STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u\@) 340*4882a593Smuzhiyun STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u\@) 341*4882a593Smuzhiyun STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u\@) 342*4882a593Smuzhiyun STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u\@) 343*4882a593Smuzhiyun STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u\@) 344*4882a593Smuzhiyun PREFS( 0, 8*32(src) ) 345*4882a593Smuzhiyun PREFD( 1, 8*32(dst) ) 346*4882a593Smuzhiyun bne len, rem, 1b 347*4882a593Smuzhiyun nop 348*4882a593Smuzhiyun 349*4882a593Smuzhiyun /* 350*4882a593Smuzhiyun * len == rem == the number of bytes left to copy < 8*NBYTES 351*4882a593Smuzhiyun */ 352*4882a593Smuzhiyun.Lcleanup_both_aligned\@: 353*4882a593Smuzhiyun beqz len, .Ldone\@ 354*4882a593Smuzhiyun sltu t0, len, 4*NBYTES 355*4882a593Smuzhiyun bnez t0, .Lless_than_4units\@ 356*4882a593Smuzhiyun and rem, len, (NBYTES-1) # rem = len % NBYTES 357*4882a593Smuzhiyun /* 358*4882a593Smuzhiyun * len >= 4*NBYTES 359*4882a593Smuzhiyun */ 360*4882a593Smuzhiyun LOAD( t0, UNIT(0)(src), .Ll_exc\@) 361*4882a593Smuzhiyun LOAD( t1, UNIT(1)(src), .Ll_exc_copy\@) 362*4882a593Smuzhiyun LOAD( t2, UNIT(2)(src), .Ll_exc_copy\@) 363*4882a593Smuzhiyun LOAD( t3, UNIT(3)(src), .Ll_exc_copy\@) 364*4882a593Smuzhiyun SUB len, len, 4*NBYTES 365*4882a593Smuzhiyun ADD src, src, 4*NBYTES 366*4882a593Smuzhiyun R10KCBARRIER(0(ra)) 367*4882a593Smuzhiyun STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@) 368*4882a593Smuzhiyun STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@) 369*4882a593Smuzhiyun STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@) 370*4882a593Smuzhiyun STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@) 371*4882a593Smuzhiyun .set reorder /* DADDI_WAR */ 372*4882a593Smuzhiyun ADD dst, dst, 4*NBYTES 373*4882a593Smuzhiyun beqz len, .Ldone\@ 374*4882a593Smuzhiyun .set noreorder 375*4882a593Smuzhiyun.Lless_than_4units\@: 376*4882a593Smuzhiyun /* 377*4882a593Smuzhiyun * rem = len % NBYTES 378*4882a593Smuzhiyun */ 379*4882a593Smuzhiyun beq rem, len, .Lcopy_bytes\@ 380*4882a593Smuzhiyun nop 381*4882a593Smuzhiyun1: 382*4882a593Smuzhiyun R10KCBARRIER(0(ra)) 383*4882a593Smuzhiyun LOAD(t0, 0(src), .Ll_exc\@) 384*4882a593Smuzhiyun ADD src, src, NBYTES 385*4882a593Smuzhiyun SUB len, len, NBYTES 386*4882a593Smuzhiyun STORE(t0, 0(dst), .Ls_exc_p1u\@) 387*4882a593Smuzhiyun .set reorder /* DADDI_WAR */ 388*4882a593Smuzhiyun ADD dst, dst, NBYTES 389*4882a593Smuzhiyun bne rem, len, 1b 390*4882a593Smuzhiyun .set noreorder 391*4882a593Smuzhiyun 392*4882a593Smuzhiyun#ifndef CONFIG_CPU_NO_LOAD_STORE_LR 393*4882a593Smuzhiyun /* 394*4882a593Smuzhiyun * src and dst are aligned, need to copy rem bytes (rem < NBYTES) 395*4882a593Smuzhiyun * A loop would do only a byte at a time with possible branch 396*4882a593Smuzhiyun * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE 397*4882a593Smuzhiyun * because can't assume read-access to dst. Instead, use 398*4882a593Smuzhiyun * STREST dst, which doesn't require read access to dst. 399*4882a593Smuzhiyun * 400*4882a593Smuzhiyun * This code should perform better than a simple loop on modern, 401*4882a593Smuzhiyun * wide-issue mips processors because the code has fewer branches and 402*4882a593Smuzhiyun * more instruction-level parallelism. 403*4882a593Smuzhiyun */ 404*4882a593Smuzhiyun#define bits t2 405*4882a593Smuzhiyun beqz len, .Ldone\@ 406*4882a593Smuzhiyun ADD t1, dst, len # t1 is just past last byte of dst 407*4882a593Smuzhiyun li bits, 8*NBYTES 408*4882a593Smuzhiyun SLL rem, len, 3 # rem = number of bits to keep 409*4882a593Smuzhiyun LOAD(t0, 0(src), .Ll_exc\@) 410*4882a593Smuzhiyun SUB bits, bits, rem # bits = number of bits to discard 411*4882a593Smuzhiyun SHIFT_DISCARD t0, t0, bits 412*4882a593Smuzhiyun STREST(t0, -1(t1), .Ls_exc\@) 413*4882a593Smuzhiyun jr ra 414*4882a593Smuzhiyun move len, zero 415*4882a593Smuzhiyun.Ldst_unaligned\@: 416*4882a593Smuzhiyun /* 417*4882a593Smuzhiyun * dst is unaligned 418*4882a593Smuzhiyun * t0 = src & ADDRMASK 419*4882a593Smuzhiyun * t1 = dst & ADDRMASK; T1 > 0 420*4882a593Smuzhiyun * len >= NBYTES 421*4882a593Smuzhiyun * 422*4882a593Smuzhiyun * Copy enough bytes to align dst 423*4882a593Smuzhiyun * Set match = (src and dst have same alignment) 424*4882a593Smuzhiyun */ 425*4882a593Smuzhiyun#define match rem 426*4882a593Smuzhiyun LDFIRST(t3, FIRST(0)(src), .Ll_exc\@) 427*4882a593Smuzhiyun ADD t2, zero, NBYTES 428*4882a593Smuzhiyun LDREST(t3, REST(0)(src), .Ll_exc_copy\@) 429*4882a593Smuzhiyun SUB t2, t2, t1 # t2 = number of bytes copied 430*4882a593Smuzhiyun xor match, t0, t1 431*4882a593Smuzhiyun R10KCBARRIER(0(ra)) 432*4882a593Smuzhiyun STFIRST(t3, FIRST(0)(dst), .Ls_exc\@) 433*4882a593Smuzhiyun beq len, t2, .Ldone\@ 434*4882a593Smuzhiyun SUB len, len, t2 435*4882a593Smuzhiyun ADD dst, dst, t2 436*4882a593Smuzhiyun beqz match, .Lboth_aligned\@ 437*4882a593Smuzhiyun ADD src, src, t2 438*4882a593Smuzhiyun 439*4882a593Smuzhiyun.Lsrc_unaligned_dst_aligned\@: 440*4882a593Smuzhiyun SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter 441*4882a593Smuzhiyun PREFS( 0, 3*32(src) ) 442*4882a593Smuzhiyun beqz t0, .Lcleanup_src_unaligned\@ 443*4882a593Smuzhiyun and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES 444*4882a593Smuzhiyun PREFD( 1, 3*32(dst) ) 445*4882a593Smuzhiyun1: 446*4882a593Smuzhiyun/* 447*4882a593Smuzhiyun * Avoid consecutive LD*'s to the same register since some mips 448*4882a593Smuzhiyun * implementations can't issue them in the same cycle. 449*4882a593Smuzhiyun * It's OK to load FIRST(N+1) before REST(N) because the two addresses 450*4882a593Smuzhiyun * are to the same unit (unless src is aligned, but it's not). 451*4882a593Smuzhiyun */ 452*4882a593Smuzhiyun R10KCBARRIER(0(ra)) 453*4882a593Smuzhiyun LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) 454*4882a593Smuzhiyun LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@) 455*4882a593Smuzhiyun SUB len, len, 4*NBYTES 456*4882a593Smuzhiyun LDREST(t0, REST(0)(src), .Ll_exc_copy\@) 457*4882a593Smuzhiyun LDREST(t1, REST(1)(src), .Ll_exc_copy\@) 458*4882a593Smuzhiyun LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@) 459*4882a593Smuzhiyun LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@) 460*4882a593Smuzhiyun LDREST(t2, REST(2)(src), .Ll_exc_copy\@) 461*4882a593Smuzhiyun LDREST(t3, REST(3)(src), .Ll_exc_copy\@) 462*4882a593Smuzhiyun PREFS( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) 463*4882a593Smuzhiyun ADD src, src, 4*NBYTES 464*4882a593Smuzhiyun#ifdef CONFIG_CPU_SB1 465*4882a593Smuzhiyun nop # improves slotting 466*4882a593Smuzhiyun#endif 467*4882a593Smuzhiyun STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@) 468*4882a593Smuzhiyun STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@) 469*4882a593Smuzhiyun STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@) 470*4882a593Smuzhiyun STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@) 471*4882a593Smuzhiyun PREFD( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) 472*4882a593Smuzhiyun .set reorder /* DADDI_WAR */ 473*4882a593Smuzhiyun ADD dst, dst, 4*NBYTES 474*4882a593Smuzhiyun bne len, rem, 1b 475*4882a593Smuzhiyun .set noreorder 476*4882a593Smuzhiyun 477*4882a593Smuzhiyun.Lcleanup_src_unaligned\@: 478*4882a593Smuzhiyun beqz len, .Ldone\@ 479*4882a593Smuzhiyun and rem, len, NBYTES-1 # rem = len % NBYTES 480*4882a593Smuzhiyun beq rem, len, .Lcopy_bytes\@ 481*4882a593Smuzhiyun nop 482*4882a593Smuzhiyun1: 483*4882a593Smuzhiyun R10KCBARRIER(0(ra)) 484*4882a593Smuzhiyun LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) 485*4882a593Smuzhiyun LDREST(t0, REST(0)(src), .Ll_exc_copy\@) 486*4882a593Smuzhiyun ADD src, src, NBYTES 487*4882a593Smuzhiyun SUB len, len, NBYTES 488*4882a593Smuzhiyun STORE(t0, 0(dst), .Ls_exc_p1u\@) 489*4882a593Smuzhiyun .set reorder /* DADDI_WAR */ 490*4882a593Smuzhiyun ADD dst, dst, NBYTES 491*4882a593Smuzhiyun bne len, rem, 1b 492*4882a593Smuzhiyun .set noreorder 493*4882a593Smuzhiyun 494*4882a593Smuzhiyun#endif /* !CONFIG_CPU_NO_LOAD_STORE_LR */ 495*4882a593Smuzhiyun.Lcopy_bytes_checklen\@: 496*4882a593Smuzhiyun beqz len, .Ldone\@ 497*4882a593Smuzhiyun nop 498*4882a593Smuzhiyun.Lcopy_bytes\@: 499*4882a593Smuzhiyun /* 0 < len < NBYTES */ 500*4882a593Smuzhiyun R10KCBARRIER(0(ra)) 501*4882a593Smuzhiyun#define COPY_BYTE(N) \ 502*4882a593Smuzhiyun LOADB(t0, N(src), .Ll_exc\@); \ 503*4882a593Smuzhiyun SUB len, len, 1; \ 504*4882a593Smuzhiyun beqz len, .Ldone\@; \ 505*4882a593Smuzhiyun STOREB(t0, N(dst), .Ls_exc_p1\@) 506*4882a593Smuzhiyun 507*4882a593Smuzhiyun COPY_BYTE(0) 508*4882a593Smuzhiyun COPY_BYTE(1) 509*4882a593Smuzhiyun#ifdef USE_DOUBLE 510*4882a593Smuzhiyun COPY_BYTE(2) 511*4882a593Smuzhiyun COPY_BYTE(3) 512*4882a593Smuzhiyun COPY_BYTE(4) 513*4882a593Smuzhiyun COPY_BYTE(5) 514*4882a593Smuzhiyun#endif 515*4882a593Smuzhiyun LOADB(t0, NBYTES-2(src), .Ll_exc\@) 516*4882a593Smuzhiyun SUB len, len, 1 517*4882a593Smuzhiyun jr ra 518*4882a593Smuzhiyun STOREB(t0, NBYTES-2(dst), .Ls_exc_p1\@) 519*4882a593Smuzhiyun.Ldone\@: 520*4882a593Smuzhiyun jr ra 521*4882a593Smuzhiyun nop 522*4882a593Smuzhiyun 523*4882a593Smuzhiyun#ifdef CONFIG_CPU_NO_LOAD_STORE_LR 524*4882a593Smuzhiyun.Lcopy_unaligned_bytes\@: 525*4882a593Smuzhiyun1: 526*4882a593Smuzhiyun COPY_BYTE(0) 527*4882a593Smuzhiyun COPY_BYTE(1) 528*4882a593Smuzhiyun COPY_BYTE(2) 529*4882a593Smuzhiyun COPY_BYTE(3) 530*4882a593Smuzhiyun COPY_BYTE(4) 531*4882a593Smuzhiyun COPY_BYTE(5) 532*4882a593Smuzhiyun COPY_BYTE(6) 533*4882a593Smuzhiyun COPY_BYTE(7) 534*4882a593Smuzhiyun ADD src, src, 8 535*4882a593Smuzhiyun b 1b 536*4882a593Smuzhiyun ADD dst, dst, 8 537*4882a593Smuzhiyun#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ 538*4882a593Smuzhiyun .if __memcpy == 1 539*4882a593Smuzhiyun END(memcpy) 540*4882a593Smuzhiyun .set __memcpy, 0 541*4882a593Smuzhiyun .hidden __memcpy 542*4882a593Smuzhiyun .endif 543*4882a593Smuzhiyun 544*4882a593Smuzhiyun.Ll_exc_copy\@: 545*4882a593Smuzhiyun /* 546*4882a593Smuzhiyun * Copy bytes from src until faulting load address (or until a 547*4882a593Smuzhiyun * lb faults) 548*4882a593Smuzhiyun * 549*4882a593Smuzhiyun * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28) 550*4882a593Smuzhiyun * may be more than a byte beyond the last address. 551*4882a593Smuzhiyun * Hence, the lb below may get an exception. 552*4882a593Smuzhiyun * 553*4882a593Smuzhiyun * Assumes src < THREAD_BUADDR($28) 554*4882a593Smuzhiyun */ 555*4882a593Smuzhiyun LOADK t0, TI_TASK($28) 556*4882a593Smuzhiyun nop 557*4882a593Smuzhiyun LOADK t0, THREAD_BUADDR(t0) 558*4882a593Smuzhiyun1: 559*4882a593Smuzhiyun LOADB(t1, 0(src), .Ll_exc\@) 560*4882a593Smuzhiyun ADD src, src, 1 561*4882a593Smuzhiyun sb t1, 0(dst) # can't fault -- we're copy_from_user 562*4882a593Smuzhiyun .set reorder /* DADDI_WAR */ 563*4882a593Smuzhiyun ADD dst, dst, 1 564*4882a593Smuzhiyun bne src, t0, 1b 565*4882a593Smuzhiyun .set noreorder 566*4882a593Smuzhiyun.Ll_exc\@: 567*4882a593Smuzhiyun LOADK t0, TI_TASK($28) 568*4882a593Smuzhiyun nop 569*4882a593Smuzhiyun LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address 570*4882a593Smuzhiyun nop 571*4882a593Smuzhiyun SUB len, AT, t0 # len number of uncopied bytes 572*4882a593Smuzhiyun jr ra 573*4882a593Smuzhiyun nop 574*4882a593Smuzhiyun 575*4882a593Smuzhiyun#define SEXC(n) \ 576*4882a593Smuzhiyun .set reorder; /* DADDI_WAR */ \ 577*4882a593Smuzhiyun.Ls_exc_p ## n ## u\@: \ 578*4882a593Smuzhiyun ADD len, len, n*NBYTES; \ 579*4882a593Smuzhiyun jr ra; \ 580*4882a593Smuzhiyun .set noreorder 581*4882a593Smuzhiyun 582*4882a593SmuzhiyunSEXC(8) 583*4882a593SmuzhiyunSEXC(7) 584*4882a593SmuzhiyunSEXC(6) 585*4882a593SmuzhiyunSEXC(5) 586*4882a593SmuzhiyunSEXC(4) 587*4882a593SmuzhiyunSEXC(3) 588*4882a593SmuzhiyunSEXC(2) 589*4882a593SmuzhiyunSEXC(1) 590*4882a593Smuzhiyun 591*4882a593Smuzhiyun.Ls_exc_p1\@: 592*4882a593Smuzhiyun .set reorder /* DADDI_WAR */ 593*4882a593Smuzhiyun ADD len, len, 1 594*4882a593Smuzhiyun jr ra 595*4882a593Smuzhiyun .set noreorder 596*4882a593Smuzhiyun.Ls_exc\@: 597*4882a593Smuzhiyun jr ra 598*4882a593Smuzhiyun nop 599*4882a593Smuzhiyun .endm 600*4882a593Smuzhiyun 601*4882a593Smuzhiyun#ifndef CONFIG_HAVE_PLAT_MEMCPY 602*4882a593Smuzhiyun .align 5 603*4882a593SmuzhiyunLEAF(memmove) 604*4882a593SmuzhiyunEXPORT_SYMBOL(memmove) 605*4882a593Smuzhiyun ADD t0, a0, a2 606*4882a593Smuzhiyun ADD t1, a1, a2 607*4882a593Smuzhiyun sltu t0, a1, t0 # dst + len <= src -> memcpy 608*4882a593Smuzhiyun sltu t1, a0, t1 # dst >= src + len -> memcpy 609*4882a593Smuzhiyun and t0, t1 610*4882a593Smuzhiyun beqz t0, .L__memcpy 611*4882a593Smuzhiyun move v0, a0 /* return value */ 612*4882a593Smuzhiyun beqz a2, .Lr_out 613*4882a593Smuzhiyun END(memmove) 614*4882a593Smuzhiyun 615*4882a593Smuzhiyun /* fall through to __rmemcpy */ 616*4882a593SmuzhiyunLEAF(__rmemcpy) /* a0=dst a1=src a2=len */ 617*4882a593Smuzhiyun sltu t0, a1, a0 618*4882a593Smuzhiyun beqz t0, .Lr_end_bytes_up # src >= dst 619*4882a593Smuzhiyun nop 620*4882a593Smuzhiyun ADD a0, a2 # dst = dst + len 621*4882a593Smuzhiyun ADD a1, a2 # src = src + len 622*4882a593Smuzhiyun 623*4882a593Smuzhiyun.Lr_end_bytes: 624*4882a593Smuzhiyun R10KCBARRIER(0(ra)) 625*4882a593Smuzhiyun lb t0, -1(a1) 626*4882a593Smuzhiyun SUB a2, a2, 0x1 627*4882a593Smuzhiyun sb t0, -1(a0) 628*4882a593Smuzhiyun SUB a1, a1, 0x1 629*4882a593Smuzhiyun .set reorder /* DADDI_WAR */ 630*4882a593Smuzhiyun SUB a0, a0, 0x1 631*4882a593Smuzhiyun bnez a2, .Lr_end_bytes 632*4882a593Smuzhiyun .set noreorder 633*4882a593Smuzhiyun 634*4882a593Smuzhiyun.Lr_out: 635*4882a593Smuzhiyun jr ra 636*4882a593Smuzhiyun move a2, zero 637*4882a593Smuzhiyun 638*4882a593Smuzhiyun.Lr_end_bytes_up: 639*4882a593Smuzhiyun R10KCBARRIER(0(ra)) 640*4882a593Smuzhiyun lb t0, (a1) 641*4882a593Smuzhiyun SUB a2, a2, 0x1 642*4882a593Smuzhiyun sb t0, (a0) 643*4882a593Smuzhiyun ADD a1, a1, 0x1 644*4882a593Smuzhiyun .set reorder /* DADDI_WAR */ 645*4882a593Smuzhiyun ADD a0, a0, 0x1 646*4882a593Smuzhiyun bnez a2, .Lr_end_bytes_up 647*4882a593Smuzhiyun .set noreorder 648*4882a593Smuzhiyun 649*4882a593Smuzhiyun jr ra 650*4882a593Smuzhiyun move a2, zero 651*4882a593Smuzhiyun END(__rmemcpy) 652*4882a593Smuzhiyun 653*4882a593Smuzhiyun/* 654*4882a593Smuzhiyun * A combined memcpy/__copy_user 655*4882a593Smuzhiyun * __copy_user sets len to 0 for success; else to an upper bound of 656*4882a593Smuzhiyun * the number of uncopied bytes. 657*4882a593Smuzhiyun * memcpy sets v0 to dst. 658*4882a593Smuzhiyun */ 659*4882a593Smuzhiyun .align 5 660*4882a593SmuzhiyunLEAF(memcpy) /* a0=dst a1=src a2=len */ 661*4882a593SmuzhiyunEXPORT_SYMBOL(memcpy) 662*4882a593Smuzhiyun move v0, dst /* return value */ 663*4882a593Smuzhiyun.L__memcpy: 664*4882a593SmuzhiyunFEXPORT(__copy_user) 665*4882a593SmuzhiyunEXPORT_SYMBOL(__copy_user) 666*4882a593Smuzhiyun /* Legacy Mode, user <-> user */ 667*4882a593Smuzhiyun __BUILD_COPY_USER LEGACY_MODE USEROP USEROP 668*4882a593Smuzhiyun 669*4882a593Smuzhiyun#endif 670*4882a593Smuzhiyun 671*4882a593Smuzhiyun#ifdef CONFIG_EVA 672*4882a593Smuzhiyun 673*4882a593Smuzhiyun/* 674*4882a593Smuzhiyun * For EVA we need distinct symbols for reading and writing to user space. 675*4882a593Smuzhiyun * This is because we need to use specific EVA instructions to perform the 676*4882a593Smuzhiyun * virtual <-> physical translation when a virtual address is actually in user 677*4882a593Smuzhiyun * space 678*4882a593Smuzhiyun */ 679*4882a593Smuzhiyun 680*4882a593Smuzhiyun/* 681*4882a593Smuzhiyun * __copy_from_user (EVA) 682*4882a593Smuzhiyun */ 683*4882a593Smuzhiyun 684*4882a593SmuzhiyunLEAF(__copy_from_user_eva) 685*4882a593SmuzhiyunEXPORT_SYMBOL(__copy_from_user_eva) 686*4882a593Smuzhiyun __BUILD_COPY_USER EVA_MODE USEROP KERNELOP 687*4882a593SmuzhiyunEND(__copy_from_user_eva) 688*4882a593Smuzhiyun 689*4882a593Smuzhiyun 690*4882a593Smuzhiyun 691*4882a593Smuzhiyun/* 692*4882a593Smuzhiyun * __copy_to_user (EVA) 693*4882a593Smuzhiyun */ 694*4882a593Smuzhiyun 695*4882a593SmuzhiyunLEAF(__copy_to_user_eva) 696*4882a593SmuzhiyunEXPORT_SYMBOL(__copy_to_user_eva) 697*4882a593Smuzhiyun__BUILD_COPY_USER EVA_MODE KERNELOP USEROP 698*4882a593SmuzhiyunEND(__copy_to_user_eva) 699*4882a593Smuzhiyun 700*4882a593Smuzhiyun/* 701*4882a593Smuzhiyun * __copy_in_user (EVA) 702*4882a593Smuzhiyun */ 703*4882a593Smuzhiyun 704*4882a593SmuzhiyunLEAF(__copy_in_user_eva) 705*4882a593SmuzhiyunEXPORT_SYMBOL(__copy_in_user_eva) 706*4882a593Smuzhiyun__BUILD_COPY_USER EVA_MODE USEROP USEROP 707*4882a593SmuzhiyunEND(__copy_in_user_eva) 708*4882a593Smuzhiyun 709*4882a593Smuzhiyun#endif 710