1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * User Space Access Routines 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Copyright (C) 2000-2002 Hewlett-Packard (John Marvin) 6*4882a593Smuzhiyun * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org> 7*4882a593Smuzhiyun * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr> 8*4882a593Smuzhiyun * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org> 9*4882a593Smuzhiyun * Copyright (C) 2017 Helge Deller <deller@gmx.de> 10*4882a593Smuzhiyun * Copyright (C) 2017 John David Anglin <dave.anglin@bell.net> 11*4882a593Smuzhiyun */ 12*4882a593Smuzhiyun 13*4882a593Smuzhiyun/* 14*4882a593Smuzhiyun * These routines still have plenty of room for optimization 15*4882a593Smuzhiyun * (word & doubleword load/store, dual issue, store hints, etc.). 16*4882a593Smuzhiyun */ 17*4882a593Smuzhiyun 18*4882a593Smuzhiyun/* 19*4882a593Smuzhiyun * The following routines assume that space register 3 (sr3) contains 20*4882a593Smuzhiyun * the space id associated with the current users address space. 21*4882a593Smuzhiyun */ 22*4882a593Smuzhiyun 23*4882a593Smuzhiyun 24*4882a593Smuzhiyun .text 25*4882a593Smuzhiyun 26*4882a593Smuzhiyun#include <asm/assembly.h> 27*4882a593Smuzhiyun#include <asm/errno.h> 28*4882a593Smuzhiyun#include <linux/linkage.h> 29*4882a593Smuzhiyun 30*4882a593Smuzhiyun /* 31*4882a593Smuzhiyun * get_sr gets the appropriate space value into 32*4882a593Smuzhiyun * sr1 for kernel/user space access, depending 33*4882a593Smuzhiyun * on the flag stored in the task structure. 34*4882a593Smuzhiyun */ 35*4882a593Smuzhiyun 36*4882a593Smuzhiyun .macro get_sr 37*4882a593Smuzhiyun mfctl %cr30,%r1 38*4882a593Smuzhiyun ldw TI_SEGMENT(%r1),%r22 39*4882a593Smuzhiyun mfsp %sr3,%r1 40*4882a593Smuzhiyun or,<> %r22,%r0,%r0 41*4882a593Smuzhiyun copy %r0,%r1 42*4882a593Smuzhiyun mtsp %r1,%sr1 43*4882a593Smuzhiyun .endm 44*4882a593Smuzhiyun 45*4882a593Smuzhiyun /* 46*4882a593Smuzhiyun * unsigned long lclear_user(void *to, unsigned long n) 47*4882a593Smuzhiyun * 48*4882a593Smuzhiyun * Returns 0 for success. 49*4882a593Smuzhiyun * otherwise, returns number of bytes not transferred. 50*4882a593Smuzhiyun */ 51*4882a593Smuzhiyun 52*4882a593SmuzhiyunENTRY_CFI(lclear_user) 53*4882a593Smuzhiyun comib,=,n 0,%r25,$lclu_done 54*4882a593Smuzhiyun get_sr 55*4882a593Smuzhiyun$lclu_loop: 56*4882a593Smuzhiyun addib,<> -1,%r25,$lclu_loop 57*4882a593Smuzhiyun1: stbs,ma %r0,1(%sr1,%r26) 58*4882a593Smuzhiyun 59*4882a593Smuzhiyun$lclu_done: 60*4882a593Smuzhiyun bv %r0(%r2) 61*4882a593Smuzhiyun copy %r25,%r28 62*4882a593Smuzhiyun 63*4882a593Smuzhiyun2: b $lclu_done 64*4882a593Smuzhiyun ldo 1(%r25),%r25 65*4882a593Smuzhiyun 66*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,2b) 67*4882a593SmuzhiyunENDPROC_CFI(lclear_user) 68*4882a593Smuzhiyun 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun /* 71*4882a593Smuzhiyun * long lstrnlen_user(char *s, long n) 72*4882a593Smuzhiyun * 73*4882a593Smuzhiyun * Returns 0 if exception before zero byte or reaching N, 74*4882a593Smuzhiyun * N+1 if N would be exceeded, 75*4882a593Smuzhiyun * else strlen + 1 (i.e. includes zero byte). 76*4882a593Smuzhiyun */ 77*4882a593Smuzhiyun 78*4882a593SmuzhiyunENTRY_CFI(lstrnlen_user) 79*4882a593Smuzhiyun comib,= 0,%r25,$lslen_nzero 80*4882a593Smuzhiyun copy %r26,%r24 81*4882a593Smuzhiyun get_sr 82*4882a593Smuzhiyun1: ldbs,ma 1(%sr1,%r26),%r1 83*4882a593Smuzhiyun$lslen_loop: 84*4882a593Smuzhiyun comib,=,n 0,%r1,$lslen_done 85*4882a593Smuzhiyun addib,<> -1,%r25,$lslen_loop 86*4882a593Smuzhiyun2: ldbs,ma 1(%sr1,%r26),%r1 87*4882a593Smuzhiyun$lslen_done: 88*4882a593Smuzhiyun bv %r0(%r2) 89*4882a593Smuzhiyun sub %r26,%r24,%r28 90*4882a593Smuzhiyun 91*4882a593Smuzhiyun$lslen_nzero: 92*4882a593Smuzhiyun b $lslen_done 93*4882a593Smuzhiyun ldo 1(%r26),%r26 /* special case for N == 0 */ 94*4882a593Smuzhiyun 95*4882a593Smuzhiyun3: b $lslen_done 96*4882a593Smuzhiyun copy %r24,%r26 /* reset r26 so 0 is returned on fault */ 97*4882a593Smuzhiyun 98*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,3b) 99*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(2b,3b) 100*4882a593Smuzhiyun 101*4882a593SmuzhiyunENDPROC_CFI(lstrnlen_user) 102*4882a593Smuzhiyun 103*4882a593Smuzhiyun 104*4882a593Smuzhiyun/* 105*4882a593Smuzhiyun * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) 106*4882a593Smuzhiyun * 107*4882a593Smuzhiyun * Inputs: 108*4882a593Smuzhiyun * - sr1 already contains space of source region 109*4882a593Smuzhiyun * - sr2 already contains space of destination region 110*4882a593Smuzhiyun * 111*4882a593Smuzhiyun * Returns: 112*4882a593Smuzhiyun * - number of bytes that could not be copied. 113*4882a593Smuzhiyun * On success, this will be zero. 114*4882a593Smuzhiyun * 115*4882a593Smuzhiyun * This code is based on a C-implementation of a copy routine written by 116*4882a593Smuzhiyun * Randolph Chung, which in turn was derived from the glibc. 117*4882a593Smuzhiyun * 118*4882a593Smuzhiyun * Several strategies are tried to try to get the best performance for various 119*4882a593Smuzhiyun * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes 120*4882a593Smuzhiyun * at a time using general registers. Unaligned copies are handled either by 121*4882a593Smuzhiyun * aligning the destination and then using shift-and-write method, or in a few 122*4882a593Smuzhiyun * cases by falling back to a byte-at-a-time copy. 123*4882a593Smuzhiyun * 124*4882a593Smuzhiyun * Testing with various alignments and buffer sizes shows that this code is 125*4882a593Smuzhiyun * often >10x faster than a simple byte-at-a-time copy, even for strangely 126*4882a593Smuzhiyun * aligned operands. It is interesting to note that the glibc version of memcpy 127*4882a593Smuzhiyun * (written in C) is actually quite fast already. This routine is able to beat 128*4882a593Smuzhiyun * it by 30-40% for aligned copies because of the loop unrolling, but in some 129*4882a593Smuzhiyun * cases the glibc version is still slightly faster. This lends more 130*4882a593Smuzhiyun * credibility that gcc can generate very good code as long as we are careful. 131*4882a593Smuzhiyun * 132*4882a593Smuzhiyun * Possible optimizations: 133*4882a593Smuzhiyun * - add cache prefetching 134*4882a593Smuzhiyun * - try not to use the post-increment address modifiers; they may create 135*4882a593Smuzhiyun * additional interlocks. Assumption is that those were only efficient on old 136*4882a593Smuzhiyun * machines (pre PA8000 processors) 137*4882a593Smuzhiyun */ 138*4882a593Smuzhiyun 139*4882a593Smuzhiyun dst = arg0 140*4882a593Smuzhiyun src = arg1 141*4882a593Smuzhiyun len = arg2 142*4882a593Smuzhiyun end = arg3 143*4882a593Smuzhiyun t1 = r19 144*4882a593Smuzhiyun t2 = r20 145*4882a593Smuzhiyun t3 = r21 146*4882a593Smuzhiyun t4 = r22 147*4882a593Smuzhiyun srcspc = sr1 148*4882a593Smuzhiyun dstspc = sr2 149*4882a593Smuzhiyun 150*4882a593Smuzhiyun t0 = r1 151*4882a593Smuzhiyun a1 = t1 152*4882a593Smuzhiyun a2 = t2 153*4882a593Smuzhiyun a3 = t3 154*4882a593Smuzhiyun a0 = t4 155*4882a593Smuzhiyun 156*4882a593Smuzhiyun save_src = ret0 157*4882a593Smuzhiyun save_dst = ret1 158*4882a593Smuzhiyun save_len = r31 159*4882a593Smuzhiyun 160*4882a593SmuzhiyunENTRY_CFI(pa_memcpy) 161*4882a593Smuzhiyun /* Last destination address */ 162*4882a593Smuzhiyun add dst,len,end 163*4882a593Smuzhiyun 164*4882a593Smuzhiyun /* short copy with less than 16 bytes? */ 165*4882a593Smuzhiyun cmpib,COND(>>=),n 15,len,.Lbyte_loop 166*4882a593Smuzhiyun 167*4882a593Smuzhiyun /* same alignment? */ 168*4882a593Smuzhiyun xor src,dst,t0 169*4882a593Smuzhiyun extru t0,31,2,t1 170*4882a593Smuzhiyun cmpib,<>,n 0,t1,.Lunaligned_copy 171*4882a593Smuzhiyun 172*4882a593Smuzhiyun#ifdef CONFIG_64BIT 173*4882a593Smuzhiyun /* only do 64-bit copies if we can get aligned. */ 174*4882a593Smuzhiyun extru t0,31,3,t1 175*4882a593Smuzhiyun cmpib,<>,n 0,t1,.Lalign_loop32 176*4882a593Smuzhiyun 177*4882a593Smuzhiyun /* loop until we are 64-bit aligned */ 178*4882a593Smuzhiyun.Lalign_loop64: 179*4882a593Smuzhiyun extru dst,31,3,t1 180*4882a593Smuzhiyun cmpib,=,n 0,t1,.Lcopy_loop_16_start 181*4882a593Smuzhiyun20: ldb,ma 1(srcspc,src),t1 182*4882a593Smuzhiyun21: stb,ma t1,1(dstspc,dst) 183*4882a593Smuzhiyun b .Lalign_loop64 184*4882a593Smuzhiyun ldo -1(len),len 185*4882a593Smuzhiyun 186*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 187*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 188*4882a593Smuzhiyun 189*4882a593Smuzhiyun.Lcopy_loop_16_start: 190*4882a593Smuzhiyun ldi 31,t0 191*4882a593Smuzhiyun.Lcopy_loop_16: 192*4882a593Smuzhiyun cmpb,COND(>>=),n t0,len,.Lword_loop 193*4882a593Smuzhiyun 194*4882a593Smuzhiyun10: ldd 0(srcspc,src),t1 195*4882a593Smuzhiyun11: ldd 8(srcspc,src),t2 196*4882a593Smuzhiyun ldo 16(src),src 197*4882a593Smuzhiyun12: std,ma t1,8(dstspc,dst) 198*4882a593Smuzhiyun13: std,ma t2,8(dstspc,dst) 199*4882a593Smuzhiyun14: ldd 0(srcspc,src),t1 200*4882a593Smuzhiyun15: ldd 8(srcspc,src),t2 201*4882a593Smuzhiyun ldo 16(src),src 202*4882a593Smuzhiyun16: std,ma t1,8(dstspc,dst) 203*4882a593Smuzhiyun17: std,ma t2,8(dstspc,dst) 204*4882a593Smuzhiyun 205*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 206*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault) 207*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) 208*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) 209*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) 210*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault) 211*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) 212*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) 213*4882a593Smuzhiyun 214*4882a593Smuzhiyun b .Lcopy_loop_16 215*4882a593Smuzhiyun ldo -32(len),len 216*4882a593Smuzhiyun 217*4882a593Smuzhiyun.Lword_loop: 218*4882a593Smuzhiyun cmpib,COND(>>=),n 3,len,.Lbyte_loop 219*4882a593Smuzhiyun20: ldw,ma 4(srcspc,src),t1 220*4882a593Smuzhiyun21: stw,ma t1,4(dstspc,dst) 221*4882a593Smuzhiyun b .Lword_loop 222*4882a593Smuzhiyun ldo -4(len),len 223*4882a593Smuzhiyun 224*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 225*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 226*4882a593Smuzhiyun 227*4882a593Smuzhiyun#endif /* CONFIG_64BIT */ 228*4882a593Smuzhiyun 229*4882a593Smuzhiyun /* loop until we are 32-bit aligned */ 230*4882a593Smuzhiyun.Lalign_loop32: 231*4882a593Smuzhiyun extru dst,31,2,t1 232*4882a593Smuzhiyun cmpib,=,n 0,t1,.Lcopy_loop_8 233*4882a593Smuzhiyun20: ldb,ma 1(srcspc,src),t1 234*4882a593Smuzhiyun21: stb,ma t1,1(dstspc,dst) 235*4882a593Smuzhiyun b .Lalign_loop32 236*4882a593Smuzhiyun ldo -1(len),len 237*4882a593Smuzhiyun 238*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 239*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 240*4882a593Smuzhiyun 241*4882a593Smuzhiyun 242*4882a593Smuzhiyun.Lcopy_loop_8: 243*4882a593Smuzhiyun cmpib,COND(>>=),n 15,len,.Lbyte_loop 244*4882a593Smuzhiyun 245*4882a593Smuzhiyun10: ldw 0(srcspc,src),t1 246*4882a593Smuzhiyun11: ldw 4(srcspc,src),t2 247*4882a593Smuzhiyun12: stw,ma t1,4(dstspc,dst) 248*4882a593Smuzhiyun13: stw,ma t2,4(dstspc,dst) 249*4882a593Smuzhiyun14: ldw 8(srcspc,src),t1 250*4882a593Smuzhiyun15: ldw 12(srcspc,src),t2 251*4882a593Smuzhiyun ldo 16(src),src 252*4882a593Smuzhiyun16: stw,ma t1,4(dstspc,dst) 253*4882a593Smuzhiyun17: stw,ma t2,4(dstspc,dst) 254*4882a593Smuzhiyun 255*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 256*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault) 257*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) 258*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) 259*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) 260*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault) 261*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) 262*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) 263*4882a593Smuzhiyun 264*4882a593Smuzhiyun b .Lcopy_loop_8 265*4882a593Smuzhiyun ldo -16(len),len 266*4882a593Smuzhiyun 267*4882a593Smuzhiyun.Lbyte_loop: 268*4882a593Smuzhiyun cmpclr,COND(<>) len,%r0,%r0 269*4882a593Smuzhiyun b,n .Lcopy_done 270*4882a593Smuzhiyun20: ldb 0(srcspc,src),t1 271*4882a593Smuzhiyun ldo 1(src),src 272*4882a593Smuzhiyun21: stb,ma t1,1(dstspc,dst) 273*4882a593Smuzhiyun b .Lbyte_loop 274*4882a593Smuzhiyun ldo -1(len),len 275*4882a593Smuzhiyun 276*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 277*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 278*4882a593Smuzhiyun 279*4882a593Smuzhiyun.Lcopy_done: 280*4882a593Smuzhiyun bv %r0(%r2) 281*4882a593Smuzhiyun sub end,dst,ret0 282*4882a593Smuzhiyun 283*4882a593Smuzhiyun 284*4882a593Smuzhiyun /* src and dst are not aligned the same way. */ 285*4882a593Smuzhiyun /* need to go the hard way */ 286*4882a593Smuzhiyun.Lunaligned_copy: 287*4882a593Smuzhiyun /* align until dst is 32bit-word-aligned */ 288*4882a593Smuzhiyun extru dst,31,2,t1 289*4882a593Smuzhiyun cmpib,=,n 0,t1,.Lcopy_dstaligned 290*4882a593Smuzhiyun20: ldb 0(srcspc,src),t1 291*4882a593Smuzhiyun ldo 1(src),src 292*4882a593Smuzhiyun21: stb,ma t1,1(dstspc,dst) 293*4882a593Smuzhiyun b .Lunaligned_copy 294*4882a593Smuzhiyun ldo -1(len),len 295*4882a593Smuzhiyun 296*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 297*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 298*4882a593Smuzhiyun 299*4882a593Smuzhiyun.Lcopy_dstaligned: 300*4882a593Smuzhiyun 301*4882a593Smuzhiyun /* store src, dst and len in safe place */ 302*4882a593Smuzhiyun copy src,save_src 303*4882a593Smuzhiyun copy dst,save_dst 304*4882a593Smuzhiyun copy len,save_len 305*4882a593Smuzhiyun 306*4882a593Smuzhiyun /* len now needs give number of words to copy */ 307*4882a593Smuzhiyun SHRREG len,2,len 308*4882a593Smuzhiyun 309*4882a593Smuzhiyun /* 310*4882a593Smuzhiyun * Copy from a not-aligned src to an aligned dst using shifts. 311*4882a593Smuzhiyun * Handles 4 words per loop. 312*4882a593Smuzhiyun */ 313*4882a593Smuzhiyun 314*4882a593Smuzhiyun depw,z src,28,2,t0 315*4882a593Smuzhiyun subi 32,t0,t0 316*4882a593Smuzhiyun mtsar t0 317*4882a593Smuzhiyun extru len,31,2,t0 318*4882a593Smuzhiyun cmpib,= 2,t0,.Lcase2 319*4882a593Smuzhiyun /* Make src aligned by rounding it down. */ 320*4882a593Smuzhiyun depi 0,31,2,src 321*4882a593Smuzhiyun 322*4882a593Smuzhiyun cmpiclr,<> 3,t0,%r0 323*4882a593Smuzhiyun b,n .Lcase3 324*4882a593Smuzhiyun cmpiclr,<> 1,t0,%r0 325*4882a593Smuzhiyun b,n .Lcase1 326*4882a593Smuzhiyun.Lcase0: 327*4882a593Smuzhiyun cmpb,COND(=) %r0,len,.Lcda_finish 328*4882a593Smuzhiyun nop 329*4882a593Smuzhiyun 330*4882a593Smuzhiyun1: ldw,ma 4(srcspc,src), a3 331*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 332*4882a593Smuzhiyun1: ldw,ma 4(srcspc,src), a0 333*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 334*4882a593Smuzhiyun b,n .Ldo3 335*4882a593Smuzhiyun.Lcase1: 336*4882a593Smuzhiyun1: ldw,ma 4(srcspc,src), a2 337*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 338*4882a593Smuzhiyun1: ldw,ma 4(srcspc,src), a3 339*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 340*4882a593Smuzhiyun ldo -1(len),len 341*4882a593Smuzhiyun cmpb,COND(=),n %r0,len,.Ldo0 342*4882a593Smuzhiyun.Ldo4: 343*4882a593Smuzhiyun1: ldw,ma 4(srcspc,src), a0 344*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 345*4882a593Smuzhiyun shrpw a2, a3, %sar, t0 346*4882a593Smuzhiyun1: stw,ma t0, 4(dstspc,dst) 347*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 348*4882a593Smuzhiyun.Ldo3: 349*4882a593Smuzhiyun1: ldw,ma 4(srcspc,src), a1 350*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 351*4882a593Smuzhiyun shrpw a3, a0, %sar, t0 352*4882a593Smuzhiyun1: stw,ma t0, 4(dstspc,dst) 353*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 354*4882a593Smuzhiyun.Ldo2: 355*4882a593Smuzhiyun1: ldw,ma 4(srcspc,src), a2 356*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 357*4882a593Smuzhiyun shrpw a0, a1, %sar, t0 358*4882a593Smuzhiyun1: stw,ma t0, 4(dstspc,dst) 359*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 360*4882a593Smuzhiyun.Ldo1: 361*4882a593Smuzhiyun1: ldw,ma 4(srcspc,src), a3 362*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 363*4882a593Smuzhiyun shrpw a1, a2, %sar, t0 364*4882a593Smuzhiyun1: stw,ma t0, 4(dstspc,dst) 365*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 366*4882a593Smuzhiyun ldo -4(len),len 367*4882a593Smuzhiyun cmpb,COND(<>) %r0,len,.Ldo4 368*4882a593Smuzhiyun nop 369*4882a593Smuzhiyun.Ldo0: 370*4882a593Smuzhiyun shrpw a2, a3, %sar, t0 371*4882a593Smuzhiyun1: stw,ma t0, 4(dstspc,dst) 372*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 373*4882a593Smuzhiyun 374*4882a593Smuzhiyun.Lcda_rdfault: 375*4882a593Smuzhiyun.Lcda_finish: 376*4882a593Smuzhiyun /* calculate new src, dst and len and jump to byte-copy loop */ 377*4882a593Smuzhiyun sub dst,save_dst,t0 378*4882a593Smuzhiyun add save_src,t0,src 379*4882a593Smuzhiyun b .Lbyte_loop 380*4882a593Smuzhiyun sub save_len,t0,len 381*4882a593Smuzhiyun 382*4882a593Smuzhiyun.Lcase3: 383*4882a593Smuzhiyun1: ldw,ma 4(srcspc,src), a0 384*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 385*4882a593Smuzhiyun1: ldw,ma 4(srcspc,src), a1 386*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 387*4882a593Smuzhiyun b .Ldo2 388*4882a593Smuzhiyun ldo 1(len),len 389*4882a593Smuzhiyun.Lcase2: 390*4882a593Smuzhiyun1: ldw,ma 4(srcspc,src), a1 391*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 392*4882a593Smuzhiyun1: ldw,ma 4(srcspc,src), a2 393*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 394*4882a593Smuzhiyun b .Ldo1 395*4882a593Smuzhiyun ldo 2(len),len 396*4882a593Smuzhiyun 397*4882a593Smuzhiyun 398*4882a593Smuzhiyun /* fault exception fixup handlers: */ 399*4882a593Smuzhiyun#ifdef CONFIG_64BIT 400*4882a593Smuzhiyun.Lcopy16_fault: 401*4882a593Smuzhiyun b .Lcopy_done 402*4882a593Smuzhiyun10: std,ma t1,8(dstspc,dst) 403*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 404*4882a593Smuzhiyun#endif 405*4882a593Smuzhiyun 406*4882a593Smuzhiyun.Lcopy8_fault: 407*4882a593Smuzhiyun b .Lcopy_done 408*4882a593Smuzhiyun10: stw,ma t1,4(dstspc,dst) 409*4882a593Smuzhiyun ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 410*4882a593SmuzhiyunENDPROC_CFI(pa_memcpy) 411*4882a593Smuzhiyun 412*4882a593Smuzhiyun .end 413