xref: /OK3568_Linux_fs/kernel/arch/openrisc/lib/memset.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * OpenRISC memset.S
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Hand-optimized assembler version of memset for OpenRISC.
6*4882a593Smuzhiyun * Algorithm inspired by several other arch-specific memset routines
7*4882a593Smuzhiyun * in the kernel tree
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com>
10*4882a593Smuzhiyun */
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun	.global memset
13*4882a593Smuzhiyun	.type	memset, @function
14*4882a593Smuzhiyunmemset:
15*4882a593Smuzhiyun	/* arguments:
16*4882a593Smuzhiyun	 * r3 = *s
17*4882a593Smuzhiyun	 * r4 = c
18*4882a593Smuzhiyun	 * r5 = n
19*4882a593Smuzhiyun	 * r13, r15, r17, r19 used as temp regs
20*4882a593Smuzhiyun	*/
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun	/* Exit if n == 0 */
23*4882a593Smuzhiyun	l.sfeqi		r5, 0
24*4882a593Smuzhiyun	l.bf		4f
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun	/* Truncate c to char */
27*4882a593Smuzhiyun	l.andi  	r13, r4, 0xff
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun	/* Skip word extension if c is 0 */
30*4882a593Smuzhiyun	l.sfeqi		r13, 0
31*4882a593Smuzhiyun	l.bf		1f
32*4882a593Smuzhiyun	/* Check for at least two whole words (8 bytes) */
33*4882a593Smuzhiyun	 l.sfleui	r5, 7
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun	/* Extend char c to 32-bit word cccc in r13 */
36*4882a593Smuzhiyun	l.slli		r15, r13, 16  // r13 = 000c, r15 = 0c00
37*4882a593Smuzhiyun	l.or		r13, r13, r15 // r13 = 0c0c, r15 = 0c00
38*4882a593Smuzhiyun	l.slli		r15, r13, 8   // r13 = 0c0c, r15 = c0c0
39*4882a593Smuzhiyun	l.or		r13, r13, r15 // r13 = cccc, r15 = c0c0
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun1:	l.addi		r19, r3, 0 // Set r19 = src
42*4882a593Smuzhiyun	/* Jump to byte copy loop if less than two words */
43*4882a593Smuzhiyun	l.bf		3f
44*4882a593Smuzhiyun	 l.or		r17, r5, r0 // Set r17 = n
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun	/* Mask out two LSBs to check alignment */
47*4882a593Smuzhiyun	l.andi		r15, r3, 0x3
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun	/* lsb == 00, jump to word copy loop */
50*4882a593Smuzhiyun	l.sfeqi		r15, 0
51*4882a593Smuzhiyun	l.bf		2f
52*4882a593Smuzhiyun	 l.addi		r19, r3, 0 // Set r19 = src
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun	/* lsb == 01,10 or 11 */
55*4882a593Smuzhiyun	l.sb		0(r3), r13   // *src = c
56*4882a593Smuzhiyun	l.addi		r17, r17, -1 // Decrease n
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun	l.sfeqi		r15, 3
59*4882a593Smuzhiyun	l.bf		2f
60*4882a593Smuzhiyun	 l.addi		r19, r3, 1  // src += 1
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun	/* lsb == 01 or 10 */
63*4882a593Smuzhiyun	l.sb		1(r3), r13   // *(src+1) = c
64*4882a593Smuzhiyun	l.addi		r17, r17, -1 // Decrease n
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun	l.sfeqi		r15, 2
67*4882a593Smuzhiyun	l.bf		2f
68*4882a593Smuzhiyun	 l.addi		r19, r3, 2  // src += 2
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun	/* lsb == 01 */
71*4882a593Smuzhiyun	l.sb		2(r3), r13   // *(src+2) = c
72*4882a593Smuzhiyun	l.addi		r17, r17, -1 // Decrease n
73*4882a593Smuzhiyun	l.addi		r19, r3, 3   // src += 3
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun	/* Word copy loop */
76*4882a593Smuzhiyun2:	l.sw		0(r19), r13  // *src = cccc
77*4882a593Smuzhiyun	l.addi		r17, r17, -4 // Decrease n
78*4882a593Smuzhiyun	l.sfgeui	r17, 4
79*4882a593Smuzhiyun	l.bf		2b
80*4882a593Smuzhiyun	 l.addi		r19, r19, 4  // Increase src
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun	/* When n > 0, copy the remaining bytes, otherwise jump to exit */
83*4882a593Smuzhiyun	l.sfeqi		r17, 0
84*4882a593Smuzhiyun	l.bf		4f
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun	/* Byte copy loop */
87*4882a593Smuzhiyun3:	l.addi		r17, r17, -1 // Decrease n
88*4882a593Smuzhiyun	l.sb		0(r19), r13  // *src = cccc
89*4882a593Smuzhiyun	l.sfnei		r17, 0
90*4882a593Smuzhiyun	l.bf		3b
91*4882a593Smuzhiyun	 l.addi		r19, r19, 1  // Increase src
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun4:	l.jr		r9
94*4882a593Smuzhiyun	 l.ori		r11, r3, 0
95