1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * Copyright (C) 2013 Regents of the University of California 4*4882a593Smuzhiyun */ 5*4882a593Smuzhiyun 6*4882a593Smuzhiyun#include <linux/linkage.h> 7*4882a593Smuzhiyun#include <asm/asm.h> 8*4882a593Smuzhiyun 9*4882a593Smuzhiyun/* void *memcpy(void *, const void *, size_t) */ 10*4882a593SmuzhiyunENTRY(__memcpy) 11*4882a593SmuzhiyunWEAK(memcpy) 12*4882a593Smuzhiyun move t6, a0 /* Preserve return value */ 13*4882a593Smuzhiyun 14*4882a593Smuzhiyun /* Defer to byte-oriented copy for small sizes */ 15*4882a593Smuzhiyun sltiu a3, a2, 128 16*4882a593Smuzhiyun bnez a3, 4f 17*4882a593Smuzhiyun /* Use word-oriented copy only if low-order bits match */ 18*4882a593Smuzhiyun andi a3, t6, SZREG-1 19*4882a593Smuzhiyun andi a4, a1, SZREG-1 20*4882a593Smuzhiyun bne a3, a4, 4f 21*4882a593Smuzhiyun 22*4882a593Smuzhiyun beqz a3, 2f /* Skip if already aligned */ 23*4882a593Smuzhiyun /* 24*4882a593Smuzhiyun * Round to nearest double word-aligned address 25*4882a593Smuzhiyun * greater than or equal to start address 26*4882a593Smuzhiyun */ 27*4882a593Smuzhiyun andi a3, a1, ~(SZREG-1) 28*4882a593Smuzhiyun addi a3, a3, SZREG 29*4882a593Smuzhiyun /* Handle initial misalignment */ 30*4882a593Smuzhiyun sub a4, a3, a1 31*4882a593Smuzhiyun1: 32*4882a593Smuzhiyun lb a5, 0(a1) 33*4882a593Smuzhiyun addi a1, a1, 1 34*4882a593Smuzhiyun sb a5, 0(t6) 35*4882a593Smuzhiyun addi t6, t6, 1 36*4882a593Smuzhiyun bltu a1, a3, 1b 37*4882a593Smuzhiyun sub a2, a2, a4 /* Update count */ 38*4882a593Smuzhiyun 39*4882a593Smuzhiyun2: 40*4882a593Smuzhiyun andi a4, a2, ~((16*SZREG)-1) 41*4882a593Smuzhiyun beqz a4, 4f 42*4882a593Smuzhiyun add a3, a1, a4 43*4882a593Smuzhiyun3: 44*4882a593Smuzhiyun REG_L a4, 0(a1) 45*4882a593Smuzhiyun REG_L a5, SZREG(a1) 46*4882a593Smuzhiyun REG_L a6, 2*SZREG(a1) 47*4882a593Smuzhiyun REG_L a7, 3*SZREG(a1) 48*4882a593Smuzhiyun REG_L t0, 4*SZREG(a1) 49*4882a593Smuzhiyun REG_L t1, 5*SZREG(a1) 50*4882a593Smuzhiyun REG_L t2, 6*SZREG(a1) 51*4882a593Smuzhiyun REG_L t3, 7*SZREG(a1) 52*4882a593Smuzhiyun REG_L t4, 8*SZREG(a1) 53*4882a593Smuzhiyun REG_L t5, 9*SZREG(a1) 54*4882a593Smuzhiyun REG_S a4, 0(t6) 55*4882a593Smuzhiyun REG_S a5, SZREG(t6) 56*4882a593Smuzhiyun REG_S a6, 2*SZREG(t6) 57*4882a593Smuzhiyun REG_S a7, 3*SZREG(t6) 58*4882a593Smuzhiyun REG_S t0, 4*SZREG(t6) 59*4882a593Smuzhiyun REG_S t1, 5*SZREG(t6) 60*4882a593Smuzhiyun REG_S t2, 6*SZREG(t6) 61*4882a593Smuzhiyun REG_S t3, 7*SZREG(t6) 62*4882a593Smuzhiyun REG_S t4, 8*SZREG(t6) 63*4882a593Smuzhiyun REG_S t5, 9*SZREG(t6) 64*4882a593Smuzhiyun REG_L a4, 10*SZREG(a1) 65*4882a593Smuzhiyun REG_L a5, 11*SZREG(a1) 66*4882a593Smuzhiyun REG_L a6, 12*SZREG(a1) 67*4882a593Smuzhiyun REG_L a7, 13*SZREG(a1) 68*4882a593Smuzhiyun REG_L t0, 14*SZREG(a1) 69*4882a593Smuzhiyun REG_L t1, 15*SZREG(a1) 70*4882a593Smuzhiyun addi a1, a1, 16*SZREG 71*4882a593Smuzhiyun REG_S a4, 10*SZREG(t6) 72*4882a593Smuzhiyun REG_S a5, 11*SZREG(t6) 73*4882a593Smuzhiyun REG_S a6, 12*SZREG(t6) 74*4882a593Smuzhiyun REG_S a7, 13*SZREG(t6) 75*4882a593Smuzhiyun REG_S t0, 14*SZREG(t6) 76*4882a593Smuzhiyun REG_S t1, 15*SZREG(t6) 77*4882a593Smuzhiyun addi t6, t6, 16*SZREG 78*4882a593Smuzhiyun bltu a1, a3, 3b 79*4882a593Smuzhiyun andi a2, a2, (16*SZREG)-1 /* Update count */ 80*4882a593Smuzhiyun 81*4882a593Smuzhiyun4: 82*4882a593Smuzhiyun /* Handle trailing misalignment */ 83*4882a593Smuzhiyun beqz a2, 6f 84*4882a593Smuzhiyun add a3, a1, a2 85*4882a593Smuzhiyun 86*4882a593Smuzhiyun /* Use word-oriented copy if co-aligned to word boundary */ 87*4882a593Smuzhiyun or a5, a1, t6 88*4882a593Smuzhiyun or a5, a5, a3 89*4882a593Smuzhiyun andi a5, a5, 3 90*4882a593Smuzhiyun bnez a5, 5f 91*4882a593Smuzhiyun7: 92*4882a593Smuzhiyun lw a4, 0(a1) 93*4882a593Smuzhiyun addi a1, a1, 4 94*4882a593Smuzhiyun sw a4, 0(t6) 95*4882a593Smuzhiyun addi t6, t6, 4 96*4882a593Smuzhiyun bltu a1, a3, 7b 97*4882a593Smuzhiyun 98*4882a593Smuzhiyun ret 99*4882a593Smuzhiyun 100*4882a593Smuzhiyun5: 101*4882a593Smuzhiyun lb a4, 0(a1) 102*4882a593Smuzhiyun addi a1, a1, 1 103*4882a593Smuzhiyun sb a4, 0(t6) 104*4882a593Smuzhiyun addi t6, t6, 1 105*4882a593Smuzhiyun bltu a1, a3, 5b 106*4882a593Smuzhiyun6: 107*4882a593Smuzhiyun ret 108*4882a593SmuzhiyunEND(__memcpy) 109