1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * "memset" implementation for SH4 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Copyright (C) 1999 Niibe Yutaka 6*4882a593Smuzhiyun * Copyright (c) 2009 STMicroelectronics Limited 7*4882a593Smuzhiyun * Author: Stuart Menefy <stuart.menefy:st.com> 8*4882a593Smuzhiyun */ 9*4882a593Smuzhiyun 10*4882a593Smuzhiyun/* 11*4882a593Smuzhiyun * void *memset(void *s, int c, size_t n); 12*4882a593Smuzhiyun */ 13*4882a593Smuzhiyun 14*4882a593Smuzhiyun#include <linux/linkage.h> 15*4882a593Smuzhiyun 16*4882a593SmuzhiyunENTRY(memset) 17*4882a593Smuzhiyun mov #12,r0 18*4882a593Smuzhiyun add r6,r4 19*4882a593Smuzhiyun cmp/gt r6,r0 20*4882a593Smuzhiyun bt/s 40f ! if it's too small, set a byte at once 21*4882a593Smuzhiyun mov r4,r0 22*4882a593Smuzhiyun and #3,r0 23*4882a593Smuzhiyun cmp/eq #0,r0 24*4882a593Smuzhiyun bt/s 2f ! It's aligned 25*4882a593Smuzhiyun sub r0,r6 26*4882a593Smuzhiyun1: 27*4882a593Smuzhiyun dt r0 28*4882a593Smuzhiyun bf/s 1b 29*4882a593Smuzhiyun mov.b r5,@-r4 30*4882a593Smuzhiyun2: ! make VVVV 31*4882a593Smuzhiyun extu.b r5,r5 32*4882a593Smuzhiyun swap.b r5,r0 ! V0 33*4882a593Smuzhiyun or r0,r5 ! VV 34*4882a593Smuzhiyun swap.w r5,r0 ! VV00 35*4882a593Smuzhiyun or r0,r5 ! VVVV 36*4882a593Smuzhiyun 37*4882a593Smuzhiyun ! Check if enough bytes need to be copied to be worth the big loop 38*4882a593Smuzhiyun mov #0x40, r0 ! (MT) 39*4882a593Smuzhiyun cmp/gt r6,r0 ! (MT) 64 > len => slow loop 40*4882a593Smuzhiyun 41*4882a593Smuzhiyun bt/s 22f 42*4882a593Smuzhiyun mov r6,r0 43*4882a593Smuzhiyun 44*4882a593Smuzhiyun ! align the dst to the cache block size if necessary 45*4882a593Smuzhiyun mov r4, r3 46*4882a593Smuzhiyun mov #~(0x1f), r1 47*4882a593Smuzhiyun 48*4882a593Smuzhiyun and r3, r1 49*4882a593Smuzhiyun cmp/eq r3, r1 50*4882a593Smuzhiyun 51*4882a593Smuzhiyun bt/s 11f ! dst is already aligned 52*4882a593Smuzhiyun sub r1, r3 ! r3-r1 -> r3 53*4882a593Smuzhiyun shlr2 r3 ! number of loops 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun10: mov.l r5,@-r4 56*4882a593Smuzhiyun dt r3 57*4882a593Smuzhiyun bf/s 10b 58*4882a593Smuzhiyun add #-4, r6 59*4882a593Smuzhiyun 60*4882a593Smuzhiyun11: ! dst is 32byte aligned 61*4882a593Smuzhiyun mov r6,r2 62*4882a593Smuzhiyun mov #-5,r0 63*4882a593Smuzhiyun shld r0,r2 ! number of loops 64*4882a593Smuzhiyun 65*4882a593Smuzhiyun add #-32, r4 66*4882a593Smuzhiyun mov r5, r0 67*4882a593Smuzhiyun12: 68*4882a593Smuzhiyun movca.l r0,@r4 69*4882a593Smuzhiyun mov.l r5,@(4, r4) 70*4882a593Smuzhiyun mov.l r5,@(8, r4) 71*4882a593Smuzhiyun mov.l r5,@(12,r4) 72*4882a593Smuzhiyun mov.l r5,@(16,r4) 73*4882a593Smuzhiyun mov.l r5,@(20,r4) 74*4882a593Smuzhiyun add #-0x20, r6 75*4882a593Smuzhiyun mov.l r5,@(24,r4) 76*4882a593Smuzhiyun dt r2 77*4882a593Smuzhiyun mov.l r5,@(28,r4) 78*4882a593Smuzhiyun bf/s 12b 79*4882a593Smuzhiyun add #-32, r4 80*4882a593Smuzhiyun 81*4882a593Smuzhiyun add #32, r4 82*4882a593Smuzhiyun mov #8, r0 83*4882a593Smuzhiyun cmp/ge r0, r6 84*4882a593Smuzhiyun bf 40f 85*4882a593Smuzhiyun 86*4882a593Smuzhiyun mov r6,r0 87*4882a593Smuzhiyun22: 88*4882a593Smuzhiyun shlr2 r0 89*4882a593Smuzhiyun shlr r0 ! r0 = r6 >> 3 90*4882a593Smuzhiyun3: 91*4882a593Smuzhiyun dt r0 92*4882a593Smuzhiyun mov.l r5,@-r4 ! set 8-byte at once 93*4882a593Smuzhiyun bf/s 3b 94*4882a593Smuzhiyun mov.l r5,@-r4 95*4882a593Smuzhiyun ! 96*4882a593Smuzhiyun mov #7,r0 97*4882a593Smuzhiyun and r0,r6 98*4882a593Smuzhiyun 99*4882a593Smuzhiyun ! fill bytes (length may be zero) 100*4882a593Smuzhiyun40: tst r6,r6 101*4882a593Smuzhiyun bt 5f 102*4882a593Smuzhiyun4: 103*4882a593Smuzhiyun dt r6 104*4882a593Smuzhiyun bf/s 4b 105*4882a593Smuzhiyun mov.b r5,@-r4 106*4882a593Smuzhiyun5: 107*4882a593Smuzhiyun rts 108*4882a593Smuzhiyun mov r4,r0 109