xref: /OK3568_Linux_fs/kernel/arch/riscv/lib/memset.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * Copyright (C) 2013 Regents of the University of California
4*4882a593Smuzhiyun */
5*4882a593Smuzhiyun
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun#include <linux/linkage.h>
8*4882a593Smuzhiyun#include <asm/asm.h>
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun/* void *memset(void *, int, size_t) */
11*4882a593SmuzhiyunENTRY(__memset)
12*4882a593SmuzhiyunWEAK(memset)
13*4882a593Smuzhiyun	move t0, a0  /* Preserve return value */
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun	/* Defer to byte-oriented fill for small sizes */
16*4882a593Smuzhiyun	sltiu a3, a2, 16
17*4882a593Smuzhiyun	bnez a3, 4f
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun	/*
20*4882a593Smuzhiyun	 * Round to nearest XLEN-aligned address
21*4882a593Smuzhiyun	 * greater than or equal to start address
22*4882a593Smuzhiyun	 */
23*4882a593Smuzhiyun	addi a3, t0, SZREG-1
24*4882a593Smuzhiyun	andi a3, a3, ~(SZREG-1)
25*4882a593Smuzhiyun	beq a3, t0, 2f  /* Skip if already aligned */
26*4882a593Smuzhiyun	/* Handle initial misalignment */
27*4882a593Smuzhiyun	sub a4, a3, t0
28*4882a593Smuzhiyun1:
29*4882a593Smuzhiyun	sb a1, 0(t0)
30*4882a593Smuzhiyun	addi t0, t0, 1
31*4882a593Smuzhiyun	bltu t0, a3, 1b
32*4882a593Smuzhiyun	sub a2, a2, a4  /* Update count */
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun2: /* Duff's device with 32 XLEN stores per iteration */
35*4882a593Smuzhiyun	/* Broadcast value into all bytes */
36*4882a593Smuzhiyun	andi a1, a1, 0xff
37*4882a593Smuzhiyun	slli a3, a1, 8
38*4882a593Smuzhiyun	or a1, a3, a1
39*4882a593Smuzhiyun	slli a3, a1, 16
40*4882a593Smuzhiyun	or a1, a3, a1
41*4882a593Smuzhiyun#ifdef CONFIG_64BIT
42*4882a593Smuzhiyun	slli a3, a1, 32
43*4882a593Smuzhiyun	or a1, a3, a1
44*4882a593Smuzhiyun#endif
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun	/* Calculate end address */
47*4882a593Smuzhiyun	andi a4, a2, ~(SZREG-1)
48*4882a593Smuzhiyun	add a3, t0, a4
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun	andi a4, a4, 31*SZREG  /* Calculate remainder */
51*4882a593Smuzhiyun	beqz a4, 3f            /* Shortcut if no remainder */
52*4882a593Smuzhiyun	neg a4, a4
53*4882a593Smuzhiyun	addi a4, a4, 32*SZREG  /* Calculate initial offset */
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun	/* Adjust start address with offset */
56*4882a593Smuzhiyun	sub t0, t0, a4
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun	/* Jump into loop body */
59*4882a593Smuzhiyun	/* Assumes 32-bit instruction lengths */
60*4882a593Smuzhiyun	la a5, 3f
61*4882a593Smuzhiyun#ifdef CONFIG_64BIT
62*4882a593Smuzhiyun	srli a4, a4, 1
63*4882a593Smuzhiyun#endif
64*4882a593Smuzhiyun	add a5, a5, a4
65*4882a593Smuzhiyun	jr a5
66*4882a593Smuzhiyun3:
67*4882a593Smuzhiyun	REG_S a1,        0(t0)
68*4882a593Smuzhiyun	REG_S a1,    SZREG(t0)
69*4882a593Smuzhiyun	REG_S a1,  2*SZREG(t0)
70*4882a593Smuzhiyun	REG_S a1,  3*SZREG(t0)
71*4882a593Smuzhiyun	REG_S a1,  4*SZREG(t0)
72*4882a593Smuzhiyun	REG_S a1,  5*SZREG(t0)
73*4882a593Smuzhiyun	REG_S a1,  6*SZREG(t0)
74*4882a593Smuzhiyun	REG_S a1,  7*SZREG(t0)
75*4882a593Smuzhiyun	REG_S a1,  8*SZREG(t0)
76*4882a593Smuzhiyun	REG_S a1,  9*SZREG(t0)
77*4882a593Smuzhiyun	REG_S a1, 10*SZREG(t0)
78*4882a593Smuzhiyun	REG_S a1, 11*SZREG(t0)
79*4882a593Smuzhiyun	REG_S a1, 12*SZREG(t0)
80*4882a593Smuzhiyun	REG_S a1, 13*SZREG(t0)
81*4882a593Smuzhiyun	REG_S a1, 14*SZREG(t0)
82*4882a593Smuzhiyun	REG_S a1, 15*SZREG(t0)
83*4882a593Smuzhiyun	REG_S a1, 16*SZREG(t0)
84*4882a593Smuzhiyun	REG_S a1, 17*SZREG(t0)
85*4882a593Smuzhiyun	REG_S a1, 18*SZREG(t0)
86*4882a593Smuzhiyun	REG_S a1, 19*SZREG(t0)
87*4882a593Smuzhiyun	REG_S a1, 20*SZREG(t0)
88*4882a593Smuzhiyun	REG_S a1, 21*SZREG(t0)
89*4882a593Smuzhiyun	REG_S a1, 22*SZREG(t0)
90*4882a593Smuzhiyun	REG_S a1, 23*SZREG(t0)
91*4882a593Smuzhiyun	REG_S a1, 24*SZREG(t0)
92*4882a593Smuzhiyun	REG_S a1, 25*SZREG(t0)
93*4882a593Smuzhiyun	REG_S a1, 26*SZREG(t0)
94*4882a593Smuzhiyun	REG_S a1, 27*SZREG(t0)
95*4882a593Smuzhiyun	REG_S a1, 28*SZREG(t0)
96*4882a593Smuzhiyun	REG_S a1, 29*SZREG(t0)
97*4882a593Smuzhiyun	REG_S a1, 30*SZREG(t0)
98*4882a593Smuzhiyun	REG_S a1, 31*SZREG(t0)
99*4882a593Smuzhiyun	addi t0, t0, 32*SZREG
100*4882a593Smuzhiyun	bltu t0, a3, 3b
101*4882a593Smuzhiyun	andi a2, a2, SZREG-1  /* Update count */
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun4:
104*4882a593Smuzhiyun	/* Handle trailing misalignment */
105*4882a593Smuzhiyun	beqz a2, 6f
106*4882a593Smuzhiyun	add a3, t0, a2
107*4882a593Smuzhiyun5:
108*4882a593Smuzhiyun	sb a1, 0(t0)
109*4882a593Smuzhiyun	addi t0, t0, 1
110*4882a593Smuzhiyun	bltu t0, a3, 5b
111*4882a593Smuzhiyun6:
112*4882a593Smuzhiyun	ret
113*4882a593SmuzhiyunEND(__memset)
114