1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * Copyright (C) IBM Corporation, 2011 4*4882a593Smuzhiyun * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com> 5*4882a593Smuzhiyun * Author - Balbir Singh <bsingharora@gmail.com> 6*4882a593Smuzhiyun */ 7*4882a593Smuzhiyun#include <asm/ppc_asm.h> 8*4882a593Smuzhiyun#include <asm/errno.h> 9*4882a593Smuzhiyun#include <asm/export.h> 10*4882a593Smuzhiyun 11*4882a593Smuzhiyun .macro err1 12*4882a593Smuzhiyun100: 13*4882a593Smuzhiyun EX_TABLE(100b,.Ldo_err1) 14*4882a593Smuzhiyun .endm 15*4882a593Smuzhiyun 16*4882a593Smuzhiyun .macro err2 17*4882a593Smuzhiyun200: 18*4882a593Smuzhiyun EX_TABLE(200b,.Ldo_err2) 19*4882a593Smuzhiyun .endm 20*4882a593Smuzhiyun 21*4882a593Smuzhiyun .macro err3 22*4882a593Smuzhiyun300: EX_TABLE(300b,.Ldone) 23*4882a593Smuzhiyun .endm 24*4882a593Smuzhiyun 25*4882a593Smuzhiyun.Ldo_err2: 26*4882a593Smuzhiyun ld r22,STK_REG(R22)(r1) 27*4882a593Smuzhiyun ld r21,STK_REG(R21)(r1) 28*4882a593Smuzhiyun ld r20,STK_REG(R20)(r1) 29*4882a593Smuzhiyun ld r19,STK_REG(R19)(r1) 30*4882a593Smuzhiyun ld r18,STK_REG(R18)(r1) 31*4882a593Smuzhiyun ld r17,STK_REG(R17)(r1) 32*4882a593Smuzhiyun ld r16,STK_REG(R16)(r1) 33*4882a593Smuzhiyun ld r15,STK_REG(R15)(r1) 34*4882a593Smuzhiyun ld r14,STK_REG(R14)(r1) 35*4882a593Smuzhiyun addi r1,r1,STACKFRAMESIZE 36*4882a593Smuzhiyun.Ldo_err1: 37*4882a593Smuzhiyun /* Do a byte by byte copy to get the exact remaining size */ 38*4882a593Smuzhiyun mtctr r7 39*4882a593Smuzhiyun46: 40*4882a593Smuzhiyunerr3; lbz r0,0(r4) 41*4882a593Smuzhiyun addi r4,r4,1 42*4882a593Smuzhiyunerr3; stb r0,0(r3) 43*4882a593Smuzhiyun addi r3,r3,1 44*4882a593Smuzhiyun bdnz 46b 45*4882a593Smuzhiyun li r3,0 46*4882a593Smuzhiyun blr 47*4882a593Smuzhiyun 48*4882a593Smuzhiyun.Ldone: 49*4882a593Smuzhiyun mfctr r3 50*4882a593Smuzhiyun blr 51*4882a593Smuzhiyun 52*4882a593Smuzhiyun 53*4882a593Smuzhiyun_GLOBAL(copy_mc_generic) 54*4882a593Smuzhiyun mr r7,r5 55*4882a593Smuzhiyun cmpldi r5,16 56*4882a593Smuzhiyun blt .Lshort_copy 57*4882a593Smuzhiyun 58*4882a593Smuzhiyun.Lcopy: 59*4882a593Smuzhiyun /* Get the source 8B aligned */ 60*4882a593Smuzhiyun neg r6,r4 61*4882a593Smuzhiyun mtocrf 0x01,r6 62*4882a593Smuzhiyun clrldi r6,r6,(64-3) 63*4882a593Smuzhiyun 64*4882a593Smuzhiyun bf cr7*4+3,1f 65*4882a593Smuzhiyunerr1; lbz r0,0(r4) 66*4882a593Smuzhiyun addi r4,r4,1 67*4882a593Smuzhiyunerr1; stb r0,0(r3) 68*4882a593Smuzhiyun addi r3,r3,1 69*4882a593Smuzhiyun subi r7,r7,1 70*4882a593Smuzhiyun 71*4882a593Smuzhiyun1: bf cr7*4+2,2f 72*4882a593Smuzhiyunerr1; lhz r0,0(r4) 73*4882a593Smuzhiyun addi r4,r4,2 74*4882a593Smuzhiyunerr1; sth r0,0(r3) 75*4882a593Smuzhiyun addi r3,r3,2 76*4882a593Smuzhiyun subi r7,r7,2 77*4882a593Smuzhiyun 78*4882a593Smuzhiyun2: bf cr7*4+1,3f 79*4882a593Smuzhiyunerr1; lwz r0,0(r4) 80*4882a593Smuzhiyun addi r4,r4,4 81*4882a593Smuzhiyunerr1; stw r0,0(r3) 82*4882a593Smuzhiyun addi r3,r3,4 83*4882a593Smuzhiyun subi r7,r7,4 84*4882a593Smuzhiyun 85*4882a593Smuzhiyun3: sub r5,r5,r6 86*4882a593Smuzhiyun cmpldi r5,128 87*4882a593Smuzhiyun 88*4882a593Smuzhiyun mflr r0 89*4882a593Smuzhiyun stdu r1,-STACKFRAMESIZE(r1) 90*4882a593Smuzhiyun std r14,STK_REG(R14)(r1) 91*4882a593Smuzhiyun std r15,STK_REG(R15)(r1) 92*4882a593Smuzhiyun std r16,STK_REG(R16)(r1) 93*4882a593Smuzhiyun std r17,STK_REG(R17)(r1) 94*4882a593Smuzhiyun std r18,STK_REG(R18)(r1) 95*4882a593Smuzhiyun std r19,STK_REG(R19)(r1) 96*4882a593Smuzhiyun std r20,STK_REG(R20)(r1) 97*4882a593Smuzhiyun std r21,STK_REG(R21)(r1) 98*4882a593Smuzhiyun std r22,STK_REG(R22)(r1) 99*4882a593Smuzhiyun std r0,STACKFRAMESIZE+16(r1) 100*4882a593Smuzhiyun 101*4882a593Smuzhiyun blt 5f 102*4882a593Smuzhiyun srdi r6,r5,7 103*4882a593Smuzhiyun mtctr r6 104*4882a593Smuzhiyun 105*4882a593Smuzhiyun /* Now do cacheline (128B) sized loads and stores. */ 106*4882a593Smuzhiyun .align 5 107*4882a593Smuzhiyun4: 108*4882a593Smuzhiyunerr2; ld r0,0(r4) 109*4882a593Smuzhiyunerr2; ld r6,8(r4) 110*4882a593Smuzhiyunerr2; ld r8,16(r4) 111*4882a593Smuzhiyunerr2; ld r9,24(r4) 112*4882a593Smuzhiyunerr2; ld r10,32(r4) 113*4882a593Smuzhiyunerr2; ld r11,40(r4) 114*4882a593Smuzhiyunerr2; ld r12,48(r4) 115*4882a593Smuzhiyunerr2; ld r14,56(r4) 116*4882a593Smuzhiyunerr2; ld r15,64(r4) 117*4882a593Smuzhiyunerr2; ld r16,72(r4) 118*4882a593Smuzhiyunerr2; ld r17,80(r4) 119*4882a593Smuzhiyunerr2; ld r18,88(r4) 120*4882a593Smuzhiyunerr2; ld r19,96(r4) 121*4882a593Smuzhiyunerr2; ld r20,104(r4) 122*4882a593Smuzhiyunerr2; ld r21,112(r4) 123*4882a593Smuzhiyunerr2; ld r22,120(r4) 124*4882a593Smuzhiyun addi r4,r4,128 125*4882a593Smuzhiyunerr2; std r0,0(r3) 126*4882a593Smuzhiyunerr2; std r6,8(r3) 127*4882a593Smuzhiyunerr2; std r8,16(r3) 128*4882a593Smuzhiyunerr2; std r9,24(r3) 129*4882a593Smuzhiyunerr2; std r10,32(r3) 130*4882a593Smuzhiyunerr2; std r11,40(r3) 131*4882a593Smuzhiyunerr2; std r12,48(r3) 132*4882a593Smuzhiyunerr2; std r14,56(r3) 133*4882a593Smuzhiyunerr2; std r15,64(r3) 134*4882a593Smuzhiyunerr2; std r16,72(r3) 135*4882a593Smuzhiyunerr2; std r17,80(r3) 136*4882a593Smuzhiyunerr2; std r18,88(r3) 137*4882a593Smuzhiyunerr2; std r19,96(r3) 138*4882a593Smuzhiyunerr2; std r20,104(r3) 139*4882a593Smuzhiyunerr2; std r21,112(r3) 140*4882a593Smuzhiyunerr2; std r22,120(r3) 141*4882a593Smuzhiyun addi r3,r3,128 142*4882a593Smuzhiyun subi r7,r7,128 143*4882a593Smuzhiyun bdnz 4b 144*4882a593Smuzhiyun 145*4882a593Smuzhiyun clrldi r5,r5,(64-7) 146*4882a593Smuzhiyun 147*4882a593Smuzhiyun /* Up to 127B to go */ 148*4882a593Smuzhiyun5: srdi r6,r5,4 149*4882a593Smuzhiyun mtocrf 0x01,r6 150*4882a593Smuzhiyun 151*4882a593Smuzhiyun6: bf cr7*4+1,7f 152*4882a593Smuzhiyunerr2; ld r0,0(r4) 153*4882a593Smuzhiyunerr2; ld r6,8(r4) 154*4882a593Smuzhiyunerr2; ld r8,16(r4) 155*4882a593Smuzhiyunerr2; ld r9,24(r4) 156*4882a593Smuzhiyunerr2; ld r10,32(r4) 157*4882a593Smuzhiyunerr2; ld r11,40(r4) 158*4882a593Smuzhiyunerr2; ld r12,48(r4) 159*4882a593Smuzhiyunerr2; ld r14,56(r4) 160*4882a593Smuzhiyun addi r4,r4,64 161*4882a593Smuzhiyunerr2; std r0,0(r3) 162*4882a593Smuzhiyunerr2; std r6,8(r3) 163*4882a593Smuzhiyunerr2; std r8,16(r3) 164*4882a593Smuzhiyunerr2; std r9,24(r3) 165*4882a593Smuzhiyunerr2; std r10,32(r3) 166*4882a593Smuzhiyunerr2; std r11,40(r3) 167*4882a593Smuzhiyunerr2; std r12,48(r3) 168*4882a593Smuzhiyunerr2; std r14,56(r3) 169*4882a593Smuzhiyun addi r3,r3,64 170*4882a593Smuzhiyun subi r7,r7,64 171*4882a593Smuzhiyun 172*4882a593Smuzhiyun7: ld r14,STK_REG(R14)(r1) 173*4882a593Smuzhiyun ld r15,STK_REG(R15)(r1) 174*4882a593Smuzhiyun ld r16,STK_REG(R16)(r1) 175*4882a593Smuzhiyun ld r17,STK_REG(R17)(r1) 176*4882a593Smuzhiyun ld r18,STK_REG(R18)(r1) 177*4882a593Smuzhiyun ld r19,STK_REG(R19)(r1) 178*4882a593Smuzhiyun ld r20,STK_REG(R20)(r1) 179*4882a593Smuzhiyun ld r21,STK_REG(R21)(r1) 180*4882a593Smuzhiyun ld r22,STK_REG(R22)(r1) 181*4882a593Smuzhiyun addi r1,r1,STACKFRAMESIZE 182*4882a593Smuzhiyun 183*4882a593Smuzhiyun /* Up to 63B to go */ 184*4882a593Smuzhiyun bf cr7*4+2,8f 185*4882a593Smuzhiyunerr1; ld r0,0(r4) 186*4882a593Smuzhiyunerr1; ld r6,8(r4) 187*4882a593Smuzhiyunerr1; ld r8,16(r4) 188*4882a593Smuzhiyunerr1; ld r9,24(r4) 189*4882a593Smuzhiyun addi r4,r4,32 190*4882a593Smuzhiyunerr1; std r0,0(r3) 191*4882a593Smuzhiyunerr1; std r6,8(r3) 192*4882a593Smuzhiyunerr1; std r8,16(r3) 193*4882a593Smuzhiyunerr1; std r9,24(r3) 194*4882a593Smuzhiyun addi r3,r3,32 195*4882a593Smuzhiyun subi r7,r7,32 196*4882a593Smuzhiyun 197*4882a593Smuzhiyun /* Up to 31B to go */ 198*4882a593Smuzhiyun8: bf cr7*4+3,9f 199*4882a593Smuzhiyunerr1; ld r0,0(r4) 200*4882a593Smuzhiyunerr1; ld r6,8(r4) 201*4882a593Smuzhiyun addi r4,r4,16 202*4882a593Smuzhiyunerr1; std r0,0(r3) 203*4882a593Smuzhiyunerr1; std r6,8(r3) 204*4882a593Smuzhiyun addi r3,r3,16 205*4882a593Smuzhiyun subi r7,r7,16 206*4882a593Smuzhiyun 207*4882a593Smuzhiyun9: clrldi r5,r5,(64-4) 208*4882a593Smuzhiyun 209*4882a593Smuzhiyun /* Up to 15B to go */ 210*4882a593Smuzhiyun.Lshort_copy: 211*4882a593Smuzhiyun mtocrf 0x01,r5 212*4882a593Smuzhiyun bf cr7*4+0,12f 213*4882a593Smuzhiyunerr1; lwz r0,0(r4) /* Less chance of a reject with word ops */ 214*4882a593Smuzhiyunerr1; lwz r6,4(r4) 215*4882a593Smuzhiyun addi r4,r4,8 216*4882a593Smuzhiyunerr1; stw r0,0(r3) 217*4882a593Smuzhiyunerr1; stw r6,4(r3) 218*4882a593Smuzhiyun addi r3,r3,8 219*4882a593Smuzhiyun subi r7,r7,8 220*4882a593Smuzhiyun 221*4882a593Smuzhiyun12: bf cr7*4+1,13f 222*4882a593Smuzhiyunerr1; lwz r0,0(r4) 223*4882a593Smuzhiyun addi r4,r4,4 224*4882a593Smuzhiyunerr1; stw r0,0(r3) 225*4882a593Smuzhiyun addi r3,r3,4 226*4882a593Smuzhiyun subi r7,r7,4 227*4882a593Smuzhiyun 228*4882a593Smuzhiyun13: bf cr7*4+2,14f 229*4882a593Smuzhiyunerr1; lhz r0,0(r4) 230*4882a593Smuzhiyun addi r4,r4,2 231*4882a593Smuzhiyunerr1; sth r0,0(r3) 232*4882a593Smuzhiyun addi r3,r3,2 233*4882a593Smuzhiyun subi r7,r7,2 234*4882a593Smuzhiyun 235*4882a593Smuzhiyun14: bf cr7*4+3,15f 236*4882a593Smuzhiyunerr1; lbz r0,0(r4) 237*4882a593Smuzhiyunerr1; stb r0,0(r3) 238*4882a593Smuzhiyun 239*4882a593Smuzhiyun15: li r3,0 240*4882a593Smuzhiyun blr 241*4882a593Smuzhiyun 242*4882a593SmuzhiyunEXPORT_SYMBOL_GPL(copy_mc_generic); 243