1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * linux/arch/arm/lib/copy_template.s 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Code template for optimized memory copy functions 6*4882a593Smuzhiyun * 7*4882a593Smuzhiyun * Author: Nicolas Pitre 8*4882a593Smuzhiyun * Created: Sep 28, 2005 9*4882a593Smuzhiyun * Copyright: MontaVista Software, Inc. 10*4882a593Smuzhiyun */ 11*4882a593Smuzhiyun 12*4882a593Smuzhiyun/* 13*4882a593Smuzhiyun * Theory of operation 14*4882a593Smuzhiyun * ------------------- 15*4882a593Smuzhiyun * 16*4882a593Smuzhiyun * This file provides the core code for a forward memory copy used in 17*4882a593Smuzhiyun * the implementation of memcopy(), copy_to_user() and copy_from_user(). 18*4882a593Smuzhiyun * 19*4882a593Smuzhiyun * The including file must define the following accessor macros 20*4882a593Smuzhiyun * according to the need of the given function: 21*4882a593Smuzhiyun * 22*4882a593Smuzhiyun * ldr1w ptr reg abort 23*4882a593Smuzhiyun * 24*4882a593Smuzhiyun * This loads one word from 'ptr', stores it in 'reg' and increments 25*4882a593Smuzhiyun * 'ptr' to the next word. The 'abort' argument is used for fixup tables. 26*4882a593Smuzhiyun * 27*4882a593Smuzhiyun * ldr4w ptr reg1 reg2 reg3 reg4 abort 28*4882a593Smuzhiyun * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort 29*4882a593Smuzhiyun * 30*4882a593Smuzhiyun * This loads four or eight words starting from 'ptr', stores them 31*4882a593Smuzhiyun * in provided registers and increments 'ptr' past those words. 32*4882a593Smuzhiyun * The'abort' argument is used for fixup tables. 33*4882a593Smuzhiyun * 34*4882a593Smuzhiyun * ldr1b ptr reg cond abort 35*4882a593Smuzhiyun * 36*4882a593Smuzhiyun * Similar to ldr1w, but it loads a byte and increments 'ptr' one byte. 37*4882a593Smuzhiyun * It also must apply the condition code if provided, otherwise the 38*4882a593Smuzhiyun * "al" condition is assumed by default. 39*4882a593Smuzhiyun * 40*4882a593Smuzhiyun * str1w ptr reg abort 41*4882a593Smuzhiyun * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort 42*4882a593Smuzhiyun * str1b ptr reg cond abort 43*4882a593Smuzhiyun * 44*4882a593Smuzhiyun * Same as their ldr* counterparts, but data is stored to 'ptr' location 45*4882a593Smuzhiyun * rather than being loaded. 46*4882a593Smuzhiyun * 47*4882a593Smuzhiyun * enter reg1 reg2 48*4882a593Smuzhiyun * 49*4882a593Smuzhiyun * Preserve the provided registers on the stack plus any additional 50*4882a593Smuzhiyun * data as needed by the implementation including this code. Called 51*4882a593Smuzhiyun * upon code entry. 52*4882a593Smuzhiyun * 53*4882a593Smuzhiyun * usave reg1 reg2 54*4882a593Smuzhiyun * 55*4882a593Smuzhiyun * Unwind annotation macro is corresponding for 'enter' macro. 56*4882a593Smuzhiyun * It tell unwinder that preserved some provided registers on the stack 57*4882a593Smuzhiyun * and additional data by a prior 'enter' macro. 58*4882a593Smuzhiyun * 59*4882a593Smuzhiyun * exit reg1 reg2 60*4882a593Smuzhiyun * 61*4882a593Smuzhiyun * Restore registers with the values previously saved with the 62*4882a593Smuzhiyun * 'preserv' macro. Called upon code termination. 63*4882a593Smuzhiyun * 64*4882a593Smuzhiyun * LDR1W_SHIFT 65*4882a593Smuzhiyun * STR1W_SHIFT 66*4882a593Smuzhiyun * 67*4882a593Smuzhiyun * Correction to be applied to the "ip" register when branching into 68*4882a593Smuzhiyun * the ldr1w or str1w instructions (some of these macros may expand to 69*4882a593Smuzhiyun * than one 32bit instruction in Thumb-2) 70*4882a593Smuzhiyun */ 71*4882a593Smuzhiyun 72*4882a593Smuzhiyun 73*4882a593Smuzhiyun UNWIND( .fnstart ) 74*4882a593Smuzhiyun enter r4, lr 75*4882a593Smuzhiyun UNWIND( .fnend ) 76*4882a593Smuzhiyun 77*4882a593Smuzhiyun UNWIND( .fnstart ) 78*4882a593Smuzhiyun usave r4, lr @ in first stmdb block 79*4882a593Smuzhiyun 80*4882a593Smuzhiyun subs r2, r2, #4 81*4882a593Smuzhiyun blt 8f 82*4882a593Smuzhiyun ands ip, r0, #3 83*4882a593Smuzhiyun PLD( pld [r1, #0] ) 84*4882a593Smuzhiyun bne 9f 85*4882a593Smuzhiyun ands ip, r1, #3 86*4882a593Smuzhiyun bne 10f 87*4882a593Smuzhiyun 88*4882a593Smuzhiyun1: subs r2, r2, #(28) 89*4882a593Smuzhiyun stmfd sp!, {r5 - r8} 90*4882a593Smuzhiyun UNWIND( .fnend ) 91*4882a593Smuzhiyun 92*4882a593Smuzhiyun UNWIND( .fnstart ) 93*4882a593Smuzhiyun usave r4, lr 94*4882a593Smuzhiyun UNWIND( .save {r5 - r8} ) @ in second stmfd block 95*4882a593Smuzhiyun blt 5f 96*4882a593Smuzhiyun 97*4882a593Smuzhiyun CALGN( ands ip, r0, #31 ) 98*4882a593Smuzhiyun CALGN( rsb r3, ip, #32 ) 99*4882a593Smuzhiyun CALGN( sbcsne r4, r3, r2 ) @ C is always set here 100*4882a593Smuzhiyun CALGN( bcs 2f ) 101*4882a593Smuzhiyun CALGN( adr r4, 6f ) 102*4882a593Smuzhiyun CALGN( subs r2, r2, r3 ) @ C gets set 103*4882a593Smuzhiyun CALGN( add pc, r4, ip ) 104*4882a593Smuzhiyun 105*4882a593Smuzhiyun PLD( pld [r1, #0] ) 106*4882a593Smuzhiyun2: PLD( subs r2, r2, #96 ) 107*4882a593Smuzhiyun PLD( pld [r1, #28] ) 108*4882a593Smuzhiyun PLD( blt 4f ) 109*4882a593Smuzhiyun PLD( pld [r1, #60] ) 110*4882a593Smuzhiyun PLD( pld [r1, #92] ) 111*4882a593Smuzhiyun 112*4882a593Smuzhiyun3: PLD( pld [r1, #124] ) 113*4882a593Smuzhiyun4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f 114*4882a593Smuzhiyun subs r2, r2, #32 115*4882a593Smuzhiyun str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f 116*4882a593Smuzhiyun bge 3b 117*4882a593Smuzhiyun PLD( cmn r2, #96 ) 118*4882a593Smuzhiyun PLD( bge 4b ) 119*4882a593Smuzhiyun 120*4882a593Smuzhiyun5: ands ip, r2, #28 121*4882a593Smuzhiyun rsb ip, ip, #32 122*4882a593Smuzhiyun#if LDR1W_SHIFT > 0 123*4882a593Smuzhiyun lsl ip, ip, #LDR1W_SHIFT 124*4882a593Smuzhiyun#endif 125*4882a593Smuzhiyun addne pc, pc, ip @ C is always clear here 126*4882a593Smuzhiyun b 7f 127*4882a593Smuzhiyun6: 128*4882a593Smuzhiyun .rept (1 << LDR1W_SHIFT) 129*4882a593Smuzhiyun W(nop) 130*4882a593Smuzhiyun .endr 131*4882a593Smuzhiyun ldr1w r1, r3, abort=20f 132*4882a593Smuzhiyun ldr1w r1, r4, abort=20f 133*4882a593Smuzhiyun ldr1w r1, r5, abort=20f 134*4882a593Smuzhiyun ldr1w r1, r6, abort=20f 135*4882a593Smuzhiyun ldr1w r1, r7, abort=20f 136*4882a593Smuzhiyun ldr1w r1, r8, abort=20f 137*4882a593Smuzhiyun ldr1w r1, lr, abort=20f 138*4882a593Smuzhiyun 139*4882a593Smuzhiyun#if LDR1W_SHIFT < STR1W_SHIFT 140*4882a593Smuzhiyun lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT 141*4882a593Smuzhiyun#elif LDR1W_SHIFT > STR1W_SHIFT 142*4882a593Smuzhiyun lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT 143*4882a593Smuzhiyun#endif 144*4882a593Smuzhiyun add pc, pc, ip 145*4882a593Smuzhiyun nop 146*4882a593Smuzhiyun .rept (1 << STR1W_SHIFT) 147*4882a593Smuzhiyun W(nop) 148*4882a593Smuzhiyun .endr 149*4882a593Smuzhiyun str1w r0, r3, abort=20f 150*4882a593Smuzhiyun str1w r0, r4, abort=20f 151*4882a593Smuzhiyun str1w r0, r5, abort=20f 152*4882a593Smuzhiyun str1w r0, r6, abort=20f 153*4882a593Smuzhiyun str1w r0, r7, abort=20f 154*4882a593Smuzhiyun str1w r0, r8, abort=20f 155*4882a593Smuzhiyun str1w r0, lr, abort=20f 156*4882a593Smuzhiyun 157*4882a593Smuzhiyun CALGN( bcs 2b ) 158*4882a593Smuzhiyun 159*4882a593Smuzhiyun7: ldmfd sp!, {r5 - r8} 160*4882a593Smuzhiyun UNWIND( .fnend ) @ end of second stmfd block 161*4882a593Smuzhiyun 162*4882a593Smuzhiyun UNWIND( .fnstart ) 163*4882a593Smuzhiyun usave r4, lr @ still in first stmdb block 164*4882a593Smuzhiyun8: movs r2, r2, lsl #31 165*4882a593Smuzhiyun ldr1b r1, r3, ne, abort=21f 166*4882a593Smuzhiyun ldr1b r1, r4, cs, abort=21f 167*4882a593Smuzhiyun ldr1b r1, ip, cs, abort=21f 168*4882a593Smuzhiyun str1b r0, r3, ne, abort=21f 169*4882a593Smuzhiyun str1b r0, r4, cs, abort=21f 170*4882a593Smuzhiyun str1b r0, ip, cs, abort=21f 171*4882a593Smuzhiyun 172*4882a593Smuzhiyun exit r4, pc 173*4882a593Smuzhiyun 174*4882a593Smuzhiyun9: rsb ip, ip, #4 175*4882a593Smuzhiyun cmp ip, #2 176*4882a593Smuzhiyun ldr1b r1, r3, gt, abort=21f 177*4882a593Smuzhiyun ldr1b r1, r4, ge, abort=21f 178*4882a593Smuzhiyun ldr1b r1, lr, abort=21f 179*4882a593Smuzhiyun str1b r0, r3, gt, abort=21f 180*4882a593Smuzhiyun str1b r0, r4, ge, abort=21f 181*4882a593Smuzhiyun subs r2, r2, ip 182*4882a593Smuzhiyun str1b r0, lr, abort=21f 183*4882a593Smuzhiyun blt 8b 184*4882a593Smuzhiyun ands ip, r1, #3 185*4882a593Smuzhiyun beq 1b 186*4882a593Smuzhiyun 187*4882a593Smuzhiyun10: bic r1, r1, #3 188*4882a593Smuzhiyun cmp ip, #2 189*4882a593Smuzhiyun ldr1w r1, lr, abort=21f 190*4882a593Smuzhiyun beq 17f 191*4882a593Smuzhiyun bgt 18f 192*4882a593Smuzhiyun UNWIND( .fnend ) 193*4882a593Smuzhiyun 194*4882a593Smuzhiyun 195*4882a593Smuzhiyun .macro forward_copy_shift pull push 196*4882a593Smuzhiyun 197*4882a593Smuzhiyun UNWIND( .fnstart ) 198*4882a593Smuzhiyun usave r4, lr @ still in first stmdb block 199*4882a593Smuzhiyun subs r2, r2, #28 200*4882a593Smuzhiyun blt 14f 201*4882a593Smuzhiyun 202*4882a593Smuzhiyun CALGN( ands ip, r0, #31 ) 203*4882a593Smuzhiyun CALGN( rsb ip, ip, #32 ) 204*4882a593Smuzhiyun CALGN( sbcsne r4, ip, r2 ) @ C is always set here 205*4882a593Smuzhiyun CALGN( subcc r2, r2, ip ) 206*4882a593Smuzhiyun CALGN( bcc 15f ) 207*4882a593Smuzhiyun 208*4882a593Smuzhiyun11: stmfd sp!, {r5 - r9} 209*4882a593Smuzhiyun UNWIND( .fnend ) 210*4882a593Smuzhiyun 211*4882a593Smuzhiyun UNWIND( .fnstart ) 212*4882a593Smuzhiyun usave r4, lr 213*4882a593Smuzhiyun UNWIND( .save {r5 - r9} ) @ in new second stmfd block 214*4882a593Smuzhiyun PLD( pld [r1, #0] ) 215*4882a593Smuzhiyun PLD( subs r2, r2, #96 ) 216*4882a593Smuzhiyun PLD( pld [r1, #28] ) 217*4882a593Smuzhiyun PLD( blt 13f ) 218*4882a593Smuzhiyun PLD( pld [r1, #60] ) 219*4882a593Smuzhiyun PLD( pld [r1, #92] ) 220*4882a593Smuzhiyun 221*4882a593Smuzhiyun12: PLD( pld [r1, #124] ) 222*4882a593Smuzhiyun13: ldr4w r1, r4, r5, r6, r7, abort=19f 223*4882a593Smuzhiyun mov r3, lr, lspull #\pull 224*4882a593Smuzhiyun subs r2, r2, #32 225*4882a593Smuzhiyun ldr4w r1, r8, r9, ip, lr, abort=19f 226*4882a593Smuzhiyun orr r3, r3, r4, lspush #\push 227*4882a593Smuzhiyun mov r4, r4, lspull #\pull 228*4882a593Smuzhiyun orr r4, r4, r5, lspush #\push 229*4882a593Smuzhiyun mov r5, r5, lspull #\pull 230*4882a593Smuzhiyun orr r5, r5, r6, lspush #\push 231*4882a593Smuzhiyun mov r6, r6, lspull #\pull 232*4882a593Smuzhiyun orr r6, r6, r7, lspush #\push 233*4882a593Smuzhiyun mov r7, r7, lspull #\pull 234*4882a593Smuzhiyun orr r7, r7, r8, lspush #\push 235*4882a593Smuzhiyun mov r8, r8, lspull #\pull 236*4882a593Smuzhiyun orr r8, r8, r9, lspush #\push 237*4882a593Smuzhiyun mov r9, r9, lspull #\pull 238*4882a593Smuzhiyun orr r9, r9, ip, lspush #\push 239*4882a593Smuzhiyun mov ip, ip, lspull #\pull 240*4882a593Smuzhiyun orr ip, ip, lr, lspush #\push 241*4882a593Smuzhiyun str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, abort=19f 242*4882a593Smuzhiyun bge 12b 243*4882a593Smuzhiyun PLD( cmn r2, #96 ) 244*4882a593Smuzhiyun PLD( bge 13b ) 245*4882a593Smuzhiyun 246*4882a593Smuzhiyun ldmfd sp!, {r5 - r9} 247*4882a593Smuzhiyun UNWIND( .fnend ) @ end of the second stmfd block 248*4882a593Smuzhiyun 249*4882a593Smuzhiyun UNWIND( .fnstart ) 250*4882a593Smuzhiyun usave r4, lr @ still in first stmdb block 251*4882a593Smuzhiyun14: ands ip, r2, #28 252*4882a593Smuzhiyun beq 16f 253*4882a593Smuzhiyun 254*4882a593Smuzhiyun15: mov r3, lr, lspull #\pull 255*4882a593Smuzhiyun ldr1w r1, lr, abort=21f 256*4882a593Smuzhiyun subs ip, ip, #4 257*4882a593Smuzhiyun orr r3, r3, lr, lspush #\push 258*4882a593Smuzhiyun str1w r0, r3, abort=21f 259*4882a593Smuzhiyun bgt 15b 260*4882a593Smuzhiyun CALGN( cmp r2, #0 ) 261*4882a593Smuzhiyun CALGN( bge 11b ) 262*4882a593Smuzhiyun 263*4882a593Smuzhiyun16: sub r1, r1, #(\push / 8) 264*4882a593Smuzhiyun b 8b 265*4882a593Smuzhiyun UNWIND( .fnend ) 266*4882a593Smuzhiyun 267*4882a593Smuzhiyun .endm 268*4882a593Smuzhiyun 269*4882a593Smuzhiyun 270*4882a593Smuzhiyun forward_copy_shift pull=8 push=24 271*4882a593Smuzhiyun 272*4882a593Smuzhiyun17: forward_copy_shift pull=16 push=16 273*4882a593Smuzhiyun 274*4882a593Smuzhiyun18: forward_copy_shift pull=24 push=8 275*4882a593Smuzhiyun 276*4882a593Smuzhiyun 277*4882a593Smuzhiyun/* 278*4882a593Smuzhiyun * Abort preamble and completion macros. 279*4882a593Smuzhiyun * If a fixup handler is required then those macros must surround it. 280*4882a593Smuzhiyun * It is assumed that the fixup code will handle the private part of 281*4882a593Smuzhiyun * the exit macro. 282*4882a593Smuzhiyun */ 283*4882a593Smuzhiyun 284*4882a593Smuzhiyun .macro copy_abort_preamble 285*4882a593Smuzhiyun19: ldmfd sp!, {r5 - r9} 286*4882a593Smuzhiyun b 21f 287*4882a593Smuzhiyun20: ldmfd sp!, {r5 - r8} 288*4882a593Smuzhiyun21: 289*4882a593Smuzhiyun .endm 290*4882a593Smuzhiyun 291*4882a593Smuzhiyun .macro copy_abort_end 292*4882a593Smuzhiyun ldmfd sp!, {r4, pc} 293*4882a593Smuzhiyun .endm 294*4882a593Smuzhiyun 295