xref: /OK3568_Linux_fs/kernel/arch/xtensa/lib/memset.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/*
2*4882a593Smuzhiyun *  arch/xtensa/lib/memset.S
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun *  ANSI C standard library function memset
5*4882a593Smuzhiyun *  (Well, almost.  .fixup code might return zero.)
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun *  This file is subject to the terms and conditions of the GNU General
8*4882a593Smuzhiyun *  Public License.  See the file "COPYING" in the main directory of
9*4882a593Smuzhiyun *  this archive for more details.
10*4882a593Smuzhiyun *
11*4882a593Smuzhiyun *  Copyright (C) 2002 Tensilica Inc.
12*4882a593Smuzhiyun */
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun#include <linux/linkage.h>
15*4882a593Smuzhiyun#include <asm/asmmacro.h>
16*4882a593Smuzhiyun#include <asm/core.h>
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun/*
19*4882a593Smuzhiyun * void *memset(void *dst, int c, size_t length)
20*4882a593Smuzhiyun *
21*4882a593Smuzhiyun * The algorithm is as follows:
22*4882a593Smuzhiyun *   Create a word with c in all byte positions
23*4882a593Smuzhiyun *   If the destination is aligned,
24*4882a593Smuzhiyun *     do 16B chucks with a loop, and then finish up with
25*4882a593Smuzhiyun *     8B, 4B, 2B, and 1B stores conditional on the length.
26*4882a593Smuzhiyun *   If destination is unaligned, align it by conditionally
27*4882a593Smuzhiyun *     setting 1B and 2B and then go to aligned case.
28*4882a593Smuzhiyun *   This code tries to use fall-through branches for the common
29*4882a593Smuzhiyun *     case of an aligned destination (except for the branches to
30*4882a593Smuzhiyun *     the alignment labels).
31*4882a593Smuzhiyun */
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun.text
34*4882a593SmuzhiyunENTRY(__memset)
35*4882a593SmuzhiyunWEAK(memset)
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun	abi_entry_default
38*4882a593Smuzhiyun	# a2/ dst, a3/ c, a4/ length
39*4882a593Smuzhiyun	extui	a3, a3, 0, 8	# mask to just 8 bits
40*4882a593Smuzhiyun	slli	a7, a3, 8	# duplicate character in all bytes of word
41*4882a593Smuzhiyun	or	a3, a3, a7	# ...
42*4882a593Smuzhiyun	slli	a7, a3, 16	# ...
43*4882a593Smuzhiyun	or	a3, a3, a7	# ...
44*4882a593Smuzhiyun	mov	a5, a2		# copy dst so that a2 is return value
45*4882a593Smuzhiyun	movi	a6, 3		# for alignment tests
46*4882a593Smuzhiyun	bany	a2, a6, .Ldstunaligned # if dst is unaligned
47*4882a593Smuzhiyun.L0:	# return here from .Ldstunaligned when dst is aligned
48*4882a593Smuzhiyun	srli	a7, a4, 4	# number of loop iterations with 16B
49*4882a593Smuzhiyun				# per iteration
50*4882a593Smuzhiyun	bnez	a4, .Laligned
51*4882a593Smuzhiyun	abi_ret_default
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun/*
54*4882a593Smuzhiyun * Destination is word-aligned.
55*4882a593Smuzhiyun */
56*4882a593Smuzhiyun	# set 16 bytes per iteration for word-aligned dst
57*4882a593Smuzhiyun	.align	4		# 1 mod 4 alignment for LOOPNEZ
58*4882a593Smuzhiyun	.byte	0		# (0 mod 4 alignment for LBEG)
59*4882a593Smuzhiyun.Laligned:
60*4882a593Smuzhiyun#if XCHAL_HAVE_LOOPS
61*4882a593Smuzhiyun	loopnez	a7, .Loop1done
62*4882a593Smuzhiyun#else /* !XCHAL_HAVE_LOOPS */
63*4882a593Smuzhiyun	beqz	a7, .Loop1done
64*4882a593Smuzhiyun	slli	a6, a7, 4
65*4882a593Smuzhiyun	add	a6, a6, a5	# a6 = end of last 16B chunk
66*4882a593Smuzhiyun#endif /* !XCHAL_HAVE_LOOPS */
67*4882a593Smuzhiyun.Loop1:
68*4882a593SmuzhiyunEX(10f) s32i	a3, a5,  0
69*4882a593SmuzhiyunEX(10f) s32i	a3, a5,  4
70*4882a593SmuzhiyunEX(10f) s32i	a3, a5,  8
71*4882a593SmuzhiyunEX(10f) s32i	a3, a5, 12
72*4882a593Smuzhiyun	addi	a5, a5, 16
73*4882a593Smuzhiyun#if !XCHAL_HAVE_LOOPS
74*4882a593Smuzhiyun	blt	a5, a6, .Loop1
75*4882a593Smuzhiyun#endif /* !XCHAL_HAVE_LOOPS */
76*4882a593Smuzhiyun.Loop1done:
77*4882a593Smuzhiyun	bbci.l	a4, 3, .L2
78*4882a593Smuzhiyun	# set 8 bytes
79*4882a593SmuzhiyunEX(10f) s32i	a3, a5,  0
80*4882a593SmuzhiyunEX(10f) s32i	a3, a5,  4
81*4882a593Smuzhiyun	addi	a5, a5,  8
82*4882a593Smuzhiyun.L2:
83*4882a593Smuzhiyun	bbci.l	a4, 2, .L3
84*4882a593Smuzhiyun	# set 4 bytes
85*4882a593SmuzhiyunEX(10f) s32i	a3, a5,  0
86*4882a593Smuzhiyun	addi	a5, a5,  4
87*4882a593Smuzhiyun.L3:
88*4882a593Smuzhiyun	bbci.l	a4, 1, .L4
89*4882a593Smuzhiyun	# set 2 bytes
90*4882a593SmuzhiyunEX(10f) s16i	a3, a5,  0
91*4882a593Smuzhiyun	addi	a5, a5,  2
92*4882a593Smuzhiyun.L4:
93*4882a593Smuzhiyun	bbci.l	a4, 0, .L5
94*4882a593Smuzhiyun	# set 1 byte
95*4882a593SmuzhiyunEX(10f) s8i	a3, a5,  0
96*4882a593Smuzhiyun.L5:
97*4882a593Smuzhiyun.Lret1:
98*4882a593Smuzhiyun	abi_ret_default
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun/*
101*4882a593Smuzhiyun * Destination is unaligned
102*4882a593Smuzhiyun */
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun.Ldstunaligned:
105*4882a593Smuzhiyun	bltui	a4, 8, .Lbyteset	# do short copies byte by byte
106*4882a593Smuzhiyun	bbci.l	a5, 0, .L20		# branch if dst alignment half-aligned
107*4882a593Smuzhiyun	# dst is only byte aligned
108*4882a593Smuzhiyun	# set 1 byte
109*4882a593SmuzhiyunEX(10f) s8i	a3, a5,  0
110*4882a593Smuzhiyun	addi	a5, a5,  1
111*4882a593Smuzhiyun	addi	a4, a4, -1
112*4882a593Smuzhiyun	# now retest if dst aligned
113*4882a593Smuzhiyun	bbci.l	a5, 1, .L0	# if now aligned, return to main algorithm
114*4882a593Smuzhiyun.L20:
115*4882a593Smuzhiyun	# dst half-aligned
116*4882a593Smuzhiyun	# set 2 bytes
117*4882a593SmuzhiyunEX(10f) s16i	a3, a5,  0
118*4882a593Smuzhiyun	addi	a5, a5,  2
119*4882a593Smuzhiyun	addi	a4, a4, -2
120*4882a593Smuzhiyun	j	.L0		# dst is now aligned, return to main algorithm
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun/*
123*4882a593Smuzhiyun * Byte by byte set
124*4882a593Smuzhiyun */
125*4882a593Smuzhiyun	.align	4
126*4882a593Smuzhiyun	.byte	0		# 1 mod 4 alignment for LOOPNEZ
127*4882a593Smuzhiyun				# (0 mod 4 alignment for LBEG)
128*4882a593Smuzhiyun.Lbyteset:
129*4882a593Smuzhiyun#if XCHAL_HAVE_LOOPS
130*4882a593Smuzhiyun	loopnez	a4, .Lbytesetdone
131*4882a593Smuzhiyun#else /* !XCHAL_HAVE_LOOPS */
132*4882a593Smuzhiyun	beqz	a4, .Lbytesetdone
133*4882a593Smuzhiyun	add	a6, a5, a4	# a6 = ending address
134*4882a593Smuzhiyun#endif /* !XCHAL_HAVE_LOOPS */
135*4882a593Smuzhiyun.Lbyteloop:
136*4882a593SmuzhiyunEX(10f) s8i	a3, a5, 0
137*4882a593Smuzhiyun	addi	a5, a5, 1
138*4882a593Smuzhiyun#if !XCHAL_HAVE_LOOPS
139*4882a593Smuzhiyun	blt	a5, a6, .Lbyteloop
140*4882a593Smuzhiyun#endif /* !XCHAL_HAVE_LOOPS */
141*4882a593Smuzhiyun.Lbytesetdone:
142*4882a593Smuzhiyun	abi_ret_default
143*4882a593Smuzhiyun
144*4882a593SmuzhiyunENDPROC(__memset)
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun	.section .fixup, "ax"
147*4882a593Smuzhiyun	.align	4
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun/* We return zero if a failure occurred. */
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun10:
152*4882a593Smuzhiyun	movi	a2, 0
153*4882a593Smuzhiyun	abi_ret_default
154