1*22723828SAlexey Brodkin/* 2*22723828SAlexey Brodkin * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. 3*22723828SAlexey Brodkin * 4*22723828SAlexey Brodkin * SPDX-License-Identifier: GPL-2.0+ 5*22723828SAlexey Brodkin */ 6*22723828SAlexey Brodkin 7*22723828SAlexey Brodkin/* 8*22723828SAlexey Brodkin * If dst and src are 4 byte aligned, copy 8 bytes at a time. 9*22723828SAlexey Brodkin * If the src is 4, but not 8 byte aligned, we first read 4 bytes to get 10*22723828SAlexey Brodkin * it 8 byte aligned. Thus, we can do a little read-ahead, without 11*22723828SAlexey Brodkin * dereferencing a cache line that we should not touch. 12*22723828SAlexey Brodkin * Note that short and long instructions have been scheduled to avoid 13*22723828SAlexey Brodkin * branch stalls. 14*22723828SAlexey Brodkin * The beq_s to r3z could be made unaligned & long to avoid a stall 15*22723828SAlexey Brodkin * there, but it is not likely to be taken often, and it would also be likely 16*22723828SAlexey Brodkin * to cost an unaligned mispredict at the next call. 17*22723828SAlexey Brodkin */ 18*22723828SAlexey Brodkin 19*22723828SAlexey Brodkin.global strcpy 20*22723828SAlexey Brodkin.align 4 21*22723828SAlexey Brodkinstrcpy: 22*22723828SAlexey Brodkin or %r2, %r0, %r1 23*22723828SAlexey Brodkin bmsk_s %r2, %r2, 1 24*22723828SAlexey Brodkin brne.d %r2, 0, charloop 25*22723828SAlexey Brodkin mov_s %r10, %r0 26*22723828SAlexey Brodkin ld_s %r3, [%r1, 0] 27*22723828SAlexey Brodkin mov %r8, 0x01010101 28*22723828SAlexey Brodkin bbit0.d %r1, 2, loop_start 29*22723828SAlexey Brodkin ror %r12, %r8 30*22723828SAlexey Brodkin sub %r2, %r3, %r8 31*22723828SAlexey Brodkin bic_s %r2, %r2, %r3 32*22723828SAlexey Brodkin tst_s %r2,%r12 33*22723828SAlexey Brodkin bne r3z 34*22723828SAlexey Brodkin mov_s %r4,%r3 35*22723828SAlexey Brodkin .balign 4 36*22723828SAlexey Brodkinloop: 37*22723828SAlexey Brodkin ld.a %r3, [%r1, 4] 38*22723828SAlexey Brodkin st.ab %r4, [%r10, 4] 39*22723828SAlexey Brodkinloop_start: 40*22723828SAlexey Brodkin ld.a %r4, [%r1, 4] 41*22723828SAlexey Brodkin sub %r2, %r3, %r8 42*22723828SAlexey Brodkin bic_s %r2, %r2, %r3 43*22723828SAlexey Brodkin tst_s %r2, %r12 44*22723828SAlexey Brodkin bne_s r3z 45*22723828SAlexey Brodkin st.ab %r3, [%r10, 4] 46*22723828SAlexey Brodkin sub %r2, %r4, %r8 47*22723828SAlexey Brodkin bic %r2, %r2, %r4 48*22723828SAlexey Brodkin tst %r2, %r12 49*22723828SAlexey Brodkin beq loop 50*22723828SAlexey Brodkin mov_s %r3, %r4 51*22723828SAlexey Brodkin#ifdef __LITTLE_ENDIAN__ 52*22723828SAlexey Brodkinr3z: bmsk.f %r1, %r3, 7 53*22723828SAlexey Brodkin lsr_s %r3, %r3, 8 54*22723828SAlexey Brodkin#else /* __BIG_ENDIAN__ */ 55*22723828SAlexey Brodkinr3z: lsr.f %r1, %r3, 24 56*22723828SAlexey Brodkin asl_s %r3, %r3, 8 57*22723828SAlexey Brodkin#endif /* _ENDIAN__ */ 58*22723828SAlexey Brodkin bne.d r3z 59*22723828SAlexey Brodkin stb.ab %r1, [%r10, 1] 60*22723828SAlexey Brodkin j_s [%blink] 61*22723828SAlexey Brodkin 62*22723828SAlexey Brodkin .balign 4 63*22723828SAlexey Brodkincharloop: 64*22723828SAlexey Brodkin ldb.ab %r3, [%r1, 1] 65*22723828SAlexey Brodkin brne.d %r3, 0, charloop 66*22723828SAlexey Brodkin stb.ab %r3, [%r10, 1] 67*22723828SAlexey Brodkin j [%blink] 68