1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * arch/alpha/lib/memmove.S 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Barely optimized memmove routine for Alpha EV5. 6*4882a593Smuzhiyun * 7*4882a593Smuzhiyun * This is hand-massaged output from the original memcpy.c. We defer to 8*4882a593Smuzhiyun * memcpy whenever possible; the backwards copy loops are not unrolled. 9*4882a593Smuzhiyun */ 10*4882a593Smuzhiyun#include <asm/export.h> 11*4882a593Smuzhiyun .set noat 12*4882a593Smuzhiyun .set noreorder 13*4882a593Smuzhiyun .text 14*4882a593Smuzhiyun 15*4882a593Smuzhiyun .align 4 16*4882a593Smuzhiyun .globl memmove 17*4882a593Smuzhiyun .ent memmove 18*4882a593Smuzhiyunmemmove: 19*4882a593Smuzhiyun ldgp $29, 0($27) 20*4882a593Smuzhiyun unop 21*4882a593Smuzhiyun nop 22*4882a593Smuzhiyun .prologue 1 23*4882a593Smuzhiyun 24*4882a593Smuzhiyun addq $16,$18,$4 25*4882a593Smuzhiyun addq $17,$18,$5 26*4882a593Smuzhiyun cmpule $4,$17,$1 /* dest + n <= src */ 27*4882a593Smuzhiyun cmpule $5,$16,$2 /* dest >= src + n */ 28*4882a593Smuzhiyun 29*4882a593Smuzhiyun bis $1,$2,$1 30*4882a593Smuzhiyun mov $16,$0 31*4882a593Smuzhiyun xor $16,$17,$2 32*4882a593Smuzhiyun bne $1,memcpy !samegp 33*4882a593Smuzhiyun 34*4882a593Smuzhiyun and $2,7,$2 /* Test for src/dest co-alignment. */ 35*4882a593Smuzhiyun and $16,7,$1 36*4882a593Smuzhiyun cmpule $16,$17,$3 37*4882a593Smuzhiyun bne $3,$memmove_up /* dest < src */ 38*4882a593Smuzhiyun 39*4882a593Smuzhiyun and $4,7,$1 40*4882a593Smuzhiyun bne $2,$misaligned_dn 41*4882a593Smuzhiyun unop 42*4882a593Smuzhiyun beq $1,$skip_aligned_byte_loop_head_dn 43*4882a593Smuzhiyun 44*4882a593Smuzhiyun$aligned_byte_loop_head_dn: 45*4882a593Smuzhiyun lda $4,-1($4) 46*4882a593Smuzhiyun lda $5,-1($5) 47*4882a593Smuzhiyun unop 48*4882a593Smuzhiyun ble $18,$egress 49*4882a593Smuzhiyun 50*4882a593Smuzhiyun ldq_u $3,0($5) 51*4882a593Smuzhiyun ldq_u $2,0($4) 52*4882a593Smuzhiyun lda $18,-1($18) 53*4882a593Smuzhiyun extbl $3,$5,$1 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun insbl $1,$4,$1 56*4882a593Smuzhiyun mskbl $2,$4,$2 57*4882a593Smuzhiyun bis $1,$2,$1 58*4882a593Smuzhiyun and $4,7,$6 59*4882a593Smuzhiyun 60*4882a593Smuzhiyun stq_u $1,0($4) 61*4882a593Smuzhiyun bne $6,$aligned_byte_loop_head_dn 62*4882a593Smuzhiyun 63*4882a593Smuzhiyun$skip_aligned_byte_loop_head_dn: 64*4882a593Smuzhiyun lda $18,-8($18) 65*4882a593Smuzhiyun blt $18,$skip_aligned_word_loop_dn 66*4882a593Smuzhiyun 67*4882a593Smuzhiyun$aligned_word_loop_dn: 68*4882a593Smuzhiyun ldq $1,-8($5) 69*4882a593Smuzhiyun nop 70*4882a593Smuzhiyun lda $5,-8($5) 71*4882a593Smuzhiyun lda $18,-8($18) 72*4882a593Smuzhiyun 73*4882a593Smuzhiyun stq $1,-8($4) 74*4882a593Smuzhiyun nop 75*4882a593Smuzhiyun lda $4,-8($4) 76*4882a593Smuzhiyun bge $18,$aligned_word_loop_dn 77*4882a593Smuzhiyun 78*4882a593Smuzhiyun$skip_aligned_word_loop_dn: 79*4882a593Smuzhiyun lda $18,8($18) 80*4882a593Smuzhiyun bgt $18,$byte_loop_tail_dn 81*4882a593Smuzhiyun unop 82*4882a593Smuzhiyun ret $31,($26),1 83*4882a593Smuzhiyun 84*4882a593Smuzhiyun .align 4 85*4882a593Smuzhiyun$misaligned_dn: 86*4882a593Smuzhiyun nop 87*4882a593Smuzhiyun fnop 88*4882a593Smuzhiyun unop 89*4882a593Smuzhiyun beq $18,$egress 90*4882a593Smuzhiyun 91*4882a593Smuzhiyun$byte_loop_tail_dn: 92*4882a593Smuzhiyun ldq_u $3,-1($5) 93*4882a593Smuzhiyun ldq_u $2,-1($4) 94*4882a593Smuzhiyun lda $5,-1($5) 95*4882a593Smuzhiyun lda $4,-1($4) 96*4882a593Smuzhiyun 97*4882a593Smuzhiyun lda $18,-1($18) 98*4882a593Smuzhiyun extbl $3,$5,$1 99*4882a593Smuzhiyun insbl $1,$4,$1 100*4882a593Smuzhiyun mskbl $2,$4,$2 101*4882a593Smuzhiyun 102*4882a593Smuzhiyun bis $1,$2,$1 103*4882a593Smuzhiyun stq_u $1,0($4) 104*4882a593Smuzhiyun bgt $18,$byte_loop_tail_dn 105*4882a593Smuzhiyun br $egress 106*4882a593Smuzhiyun 107*4882a593Smuzhiyun$memmove_up: 108*4882a593Smuzhiyun mov $16,$4 109*4882a593Smuzhiyun mov $17,$5 110*4882a593Smuzhiyun bne $2,$misaligned_up 111*4882a593Smuzhiyun beq $1,$skip_aligned_byte_loop_head_up 112*4882a593Smuzhiyun 113*4882a593Smuzhiyun$aligned_byte_loop_head_up: 114*4882a593Smuzhiyun unop 115*4882a593Smuzhiyun ble $18,$egress 116*4882a593Smuzhiyun ldq_u $3,0($5) 117*4882a593Smuzhiyun ldq_u $2,0($4) 118*4882a593Smuzhiyun 119*4882a593Smuzhiyun lda $18,-1($18) 120*4882a593Smuzhiyun extbl $3,$5,$1 121*4882a593Smuzhiyun insbl $1,$4,$1 122*4882a593Smuzhiyun mskbl $2,$4,$2 123*4882a593Smuzhiyun 124*4882a593Smuzhiyun bis $1,$2,$1 125*4882a593Smuzhiyun lda $5,1($5) 126*4882a593Smuzhiyun stq_u $1,0($4) 127*4882a593Smuzhiyun lda $4,1($4) 128*4882a593Smuzhiyun 129*4882a593Smuzhiyun and $4,7,$6 130*4882a593Smuzhiyun bne $6,$aligned_byte_loop_head_up 131*4882a593Smuzhiyun 132*4882a593Smuzhiyun$skip_aligned_byte_loop_head_up: 133*4882a593Smuzhiyun lda $18,-8($18) 134*4882a593Smuzhiyun blt $18,$skip_aligned_word_loop_up 135*4882a593Smuzhiyun 136*4882a593Smuzhiyun$aligned_word_loop_up: 137*4882a593Smuzhiyun ldq $1,0($5) 138*4882a593Smuzhiyun nop 139*4882a593Smuzhiyun lda $5,8($5) 140*4882a593Smuzhiyun lda $18,-8($18) 141*4882a593Smuzhiyun 142*4882a593Smuzhiyun stq $1,0($4) 143*4882a593Smuzhiyun nop 144*4882a593Smuzhiyun lda $4,8($4) 145*4882a593Smuzhiyun bge $18,$aligned_word_loop_up 146*4882a593Smuzhiyun 147*4882a593Smuzhiyun$skip_aligned_word_loop_up: 148*4882a593Smuzhiyun lda $18,8($18) 149*4882a593Smuzhiyun bgt $18,$byte_loop_tail_up 150*4882a593Smuzhiyun unop 151*4882a593Smuzhiyun ret $31,($26),1 152*4882a593Smuzhiyun 153*4882a593Smuzhiyun .align 4 154*4882a593Smuzhiyun$misaligned_up: 155*4882a593Smuzhiyun nop 156*4882a593Smuzhiyun fnop 157*4882a593Smuzhiyun unop 158*4882a593Smuzhiyun beq $18,$egress 159*4882a593Smuzhiyun 160*4882a593Smuzhiyun$byte_loop_tail_up: 161*4882a593Smuzhiyun ldq_u $3,0($5) 162*4882a593Smuzhiyun ldq_u $2,0($4) 163*4882a593Smuzhiyun lda $18,-1($18) 164*4882a593Smuzhiyun extbl $3,$5,$1 165*4882a593Smuzhiyun 166*4882a593Smuzhiyun insbl $1,$4,$1 167*4882a593Smuzhiyun mskbl $2,$4,$2 168*4882a593Smuzhiyun bis $1,$2,$1 169*4882a593Smuzhiyun stq_u $1,0($4) 170*4882a593Smuzhiyun 171*4882a593Smuzhiyun lda $5,1($5) 172*4882a593Smuzhiyun lda $4,1($4) 173*4882a593Smuzhiyun nop 174*4882a593Smuzhiyun bgt $18,$byte_loop_tail_up 175*4882a593Smuzhiyun 176*4882a593Smuzhiyun$egress: 177*4882a593Smuzhiyun ret $31,($26),1 178*4882a593Smuzhiyun nop 179*4882a593Smuzhiyun nop 180*4882a593Smuzhiyun nop 181*4882a593Smuzhiyun 182*4882a593Smuzhiyun .end memmove 183*4882a593Smuzhiyun EXPORT_SYMBOL(memmove) 184