1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 4*4882a593Smuzhiyun */ 5*4882a593Smuzhiyun 6*4882a593Smuzhiyun/* If dst and src are 4 byte aligned, copy 8 bytes at a time. 7*4882a593Smuzhiyun If the src is 4, but not 8 byte aligned, we first read 4 bytes to get 8*4882a593Smuzhiyun it 8 byte aligned. Thus, we can do a little read-ahead, without 9*4882a593Smuzhiyun dereferencing a cache line that we should not touch. 10*4882a593Smuzhiyun Note that short and long instructions have been scheduled to avoid 11*4882a593Smuzhiyun branch stalls. 12*4882a593Smuzhiyun The beq_s to r3z could be made unaligned & long to avoid a stall 13*4882a593Smuzhiyun there, but the it is not likely to be taken often, and it 14*4882a593Smuzhiyun would also be likey to cost an unaligned mispredict at the next call. */ 15*4882a593Smuzhiyun 16*4882a593Smuzhiyun#include <linux/linkage.h> 17*4882a593Smuzhiyun 18*4882a593SmuzhiyunENTRY_CFI(strcpy) 19*4882a593Smuzhiyun or r2,r0,r1 20*4882a593Smuzhiyun bmsk_s r2,r2,1 21*4882a593Smuzhiyun brne.d r2,0,charloop 22*4882a593Smuzhiyun mov_s r10,r0 23*4882a593Smuzhiyun ld_s r3,[r1,0] 24*4882a593Smuzhiyun mov r8,0x01010101 25*4882a593Smuzhiyun bbit0.d r1,2,loop_start 26*4882a593Smuzhiyun ror r12,r8 27*4882a593Smuzhiyun sub r2,r3,r8 28*4882a593Smuzhiyun bic_s r2,r2,r3 29*4882a593Smuzhiyun tst_s r2,r12 30*4882a593Smuzhiyun bne r3z 31*4882a593Smuzhiyun mov_s r4,r3 32*4882a593Smuzhiyun .balign 4 33*4882a593Smuzhiyunloop: 34*4882a593Smuzhiyun ld.a r3,[r1,4] 35*4882a593Smuzhiyun st.ab r4,[r10,4] 36*4882a593Smuzhiyunloop_start: 37*4882a593Smuzhiyun ld.a r4,[r1,4] 38*4882a593Smuzhiyun sub r2,r3,r8 39*4882a593Smuzhiyun bic_s r2,r2,r3 40*4882a593Smuzhiyun tst_s r2,r12 41*4882a593Smuzhiyun bne_s r3z 42*4882a593Smuzhiyun st.ab r3,[r10,4] 43*4882a593Smuzhiyun sub r2,r4,r8 44*4882a593Smuzhiyun bic r2,r2,r4 45*4882a593Smuzhiyun tst r2,r12 46*4882a593Smuzhiyun beq loop 47*4882a593Smuzhiyun mov_s r3,r4 48*4882a593Smuzhiyun#ifdef __LITTLE_ENDIAN__ 49*4882a593Smuzhiyunr3z: bmsk.f r1,r3,7 50*4882a593Smuzhiyun lsr_s r3,r3,8 51*4882a593Smuzhiyun#else 52*4882a593Smuzhiyunr3z: lsr.f r1,r3,24 53*4882a593Smuzhiyun asl_s r3,r3,8 54*4882a593Smuzhiyun#endif 55*4882a593Smuzhiyun bne.d r3z 56*4882a593Smuzhiyun stb.ab r1,[r10,1] 57*4882a593Smuzhiyun j_s [blink] 58*4882a593Smuzhiyun 59*4882a593Smuzhiyun .balign 4 60*4882a593Smuzhiyuncharloop: 61*4882a593Smuzhiyun ldb.ab r3,[r1,1] 62*4882a593Smuzhiyun 63*4882a593Smuzhiyun 64*4882a593Smuzhiyun brne.d r3,0,charloop 65*4882a593Smuzhiyun stb.ab r3,[r10,1] 66*4882a593Smuzhiyun j [blink] 67*4882a593SmuzhiyunEND_CFI(strcpy) 68