1*4882a593Smuzhiyun/* 2*4882a593Smuzhiyun * arch/xtensa/lib/memset.S 3*4882a593Smuzhiyun * 4*4882a593Smuzhiyun * ANSI C standard library function memset 5*4882a593Smuzhiyun * (Well, almost. .fixup code might return zero.) 6*4882a593Smuzhiyun * 7*4882a593Smuzhiyun * This file is subject to the terms and conditions of the GNU General 8*4882a593Smuzhiyun * Public License. See the file "COPYING" in the main directory of 9*4882a593Smuzhiyun * this archive for more details. 10*4882a593Smuzhiyun * 11*4882a593Smuzhiyun * Copyright (C) 2002 Tensilica Inc. 12*4882a593Smuzhiyun */ 13*4882a593Smuzhiyun 14*4882a593Smuzhiyun#include <linux/linkage.h> 15*4882a593Smuzhiyun#include <asm/asmmacro.h> 16*4882a593Smuzhiyun#include <asm/core.h> 17*4882a593Smuzhiyun 18*4882a593Smuzhiyun/* 19*4882a593Smuzhiyun * void *memset(void *dst, int c, size_t length) 20*4882a593Smuzhiyun * 21*4882a593Smuzhiyun * The algorithm is as follows: 22*4882a593Smuzhiyun * Create a word with c in all byte positions 23*4882a593Smuzhiyun * If the destination is aligned, 24*4882a593Smuzhiyun * do 16B chucks with a loop, and then finish up with 25*4882a593Smuzhiyun * 8B, 4B, 2B, and 1B stores conditional on the length. 26*4882a593Smuzhiyun * If destination is unaligned, align it by conditionally 27*4882a593Smuzhiyun * setting 1B and 2B and then go to aligned case. 28*4882a593Smuzhiyun * This code tries to use fall-through branches for the common 29*4882a593Smuzhiyun * case of an aligned destination (except for the branches to 30*4882a593Smuzhiyun * the alignment labels). 31*4882a593Smuzhiyun */ 32*4882a593Smuzhiyun 33*4882a593Smuzhiyun.text 34*4882a593SmuzhiyunENTRY(__memset) 35*4882a593SmuzhiyunWEAK(memset) 36*4882a593Smuzhiyun 37*4882a593Smuzhiyun abi_entry_default 38*4882a593Smuzhiyun # a2/ dst, a3/ c, a4/ length 39*4882a593Smuzhiyun extui a3, a3, 0, 8 # mask to just 8 bits 40*4882a593Smuzhiyun slli a7, a3, 8 # duplicate character in all bytes of word 41*4882a593Smuzhiyun or a3, a3, a7 # ... 42*4882a593Smuzhiyun slli a7, a3, 16 # ... 43*4882a593Smuzhiyun or a3, a3, a7 # ... 44*4882a593Smuzhiyun mov a5, a2 # copy dst so that a2 is return value 45*4882a593Smuzhiyun movi a6, 3 # for alignment tests 46*4882a593Smuzhiyun bany a2, a6, .Ldstunaligned # if dst is unaligned 47*4882a593Smuzhiyun.L0: # return here from .Ldstunaligned when dst is aligned 48*4882a593Smuzhiyun srli a7, a4, 4 # number of loop iterations with 16B 49*4882a593Smuzhiyun # per iteration 50*4882a593Smuzhiyun bnez a4, .Laligned 51*4882a593Smuzhiyun abi_ret_default 52*4882a593Smuzhiyun 53*4882a593Smuzhiyun/* 54*4882a593Smuzhiyun * Destination is word-aligned. 55*4882a593Smuzhiyun */ 56*4882a593Smuzhiyun # set 16 bytes per iteration for word-aligned dst 57*4882a593Smuzhiyun .align 4 # 1 mod 4 alignment for LOOPNEZ 58*4882a593Smuzhiyun .byte 0 # (0 mod 4 alignment for LBEG) 59*4882a593Smuzhiyun.Laligned: 60*4882a593Smuzhiyun#if XCHAL_HAVE_LOOPS 61*4882a593Smuzhiyun loopnez a7, .Loop1done 62*4882a593Smuzhiyun#else /* !XCHAL_HAVE_LOOPS */ 63*4882a593Smuzhiyun beqz a7, .Loop1done 64*4882a593Smuzhiyun slli a6, a7, 4 65*4882a593Smuzhiyun add a6, a6, a5 # a6 = end of last 16B chunk 66*4882a593Smuzhiyun#endif /* !XCHAL_HAVE_LOOPS */ 67*4882a593Smuzhiyun.Loop1: 68*4882a593SmuzhiyunEX(10f) s32i a3, a5, 0 69*4882a593SmuzhiyunEX(10f) s32i a3, a5, 4 70*4882a593SmuzhiyunEX(10f) s32i a3, a5, 8 71*4882a593SmuzhiyunEX(10f) s32i a3, a5, 12 72*4882a593Smuzhiyun addi a5, a5, 16 73*4882a593Smuzhiyun#if !XCHAL_HAVE_LOOPS 74*4882a593Smuzhiyun blt a5, a6, .Loop1 75*4882a593Smuzhiyun#endif /* !XCHAL_HAVE_LOOPS */ 76*4882a593Smuzhiyun.Loop1done: 77*4882a593Smuzhiyun bbci.l a4, 3, .L2 78*4882a593Smuzhiyun # set 8 bytes 79*4882a593SmuzhiyunEX(10f) s32i a3, a5, 0 80*4882a593SmuzhiyunEX(10f) s32i a3, a5, 4 81*4882a593Smuzhiyun addi a5, a5, 8 82*4882a593Smuzhiyun.L2: 83*4882a593Smuzhiyun bbci.l a4, 2, .L3 84*4882a593Smuzhiyun # set 4 bytes 85*4882a593SmuzhiyunEX(10f) s32i a3, a5, 0 86*4882a593Smuzhiyun addi a5, a5, 4 87*4882a593Smuzhiyun.L3: 88*4882a593Smuzhiyun bbci.l a4, 1, .L4 89*4882a593Smuzhiyun # set 2 bytes 90*4882a593SmuzhiyunEX(10f) s16i a3, a5, 0 91*4882a593Smuzhiyun addi a5, a5, 2 92*4882a593Smuzhiyun.L4: 93*4882a593Smuzhiyun bbci.l a4, 0, .L5 94*4882a593Smuzhiyun # set 1 byte 95*4882a593SmuzhiyunEX(10f) s8i a3, a5, 0 96*4882a593Smuzhiyun.L5: 97*4882a593Smuzhiyun.Lret1: 98*4882a593Smuzhiyun abi_ret_default 99*4882a593Smuzhiyun 100*4882a593Smuzhiyun/* 101*4882a593Smuzhiyun * Destination is unaligned 102*4882a593Smuzhiyun */ 103*4882a593Smuzhiyun 104*4882a593Smuzhiyun.Ldstunaligned: 105*4882a593Smuzhiyun bltui a4, 8, .Lbyteset # do short copies byte by byte 106*4882a593Smuzhiyun bbci.l a5, 0, .L20 # branch if dst alignment half-aligned 107*4882a593Smuzhiyun # dst is only byte aligned 108*4882a593Smuzhiyun # set 1 byte 109*4882a593SmuzhiyunEX(10f) s8i a3, a5, 0 110*4882a593Smuzhiyun addi a5, a5, 1 111*4882a593Smuzhiyun addi a4, a4, -1 112*4882a593Smuzhiyun # now retest if dst aligned 113*4882a593Smuzhiyun bbci.l a5, 1, .L0 # if now aligned, return to main algorithm 114*4882a593Smuzhiyun.L20: 115*4882a593Smuzhiyun # dst half-aligned 116*4882a593Smuzhiyun # set 2 bytes 117*4882a593SmuzhiyunEX(10f) s16i a3, a5, 0 118*4882a593Smuzhiyun addi a5, a5, 2 119*4882a593Smuzhiyun addi a4, a4, -2 120*4882a593Smuzhiyun j .L0 # dst is now aligned, return to main algorithm 121*4882a593Smuzhiyun 122*4882a593Smuzhiyun/* 123*4882a593Smuzhiyun * Byte by byte set 124*4882a593Smuzhiyun */ 125*4882a593Smuzhiyun .align 4 126*4882a593Smuzhiyun .byte 0 # 1 mod 4 alignment for LOOPNEZ 127*4882a593Smuzhiyun # (0 mod 4 alignment for LBEG) 128*4882a593Smuzhiyun.Lbyteset: 129*4882a593Smuzhiyun#if XCHAL_HAVE_LOOPS 130*4882a593Smuzhiyun loopnez a4, .Lbytesetdone 131*4882a593Smuzhiyun#else /* !XCHAL_HAVE_LOOPS */ 132*4882a593Smuzhiyun beqz a4, .Lbytesetdone 133*4882a593Smuzhiyun add a6, a5, a4 # a6 = ending address 134*4882a593Smuzhiyun#endif /* !XCHAL_HAVE_LOOPS */ 135*4882a593Smuzhiyun.Lbyteloop: 136*4882a593SmuzhiyunEX(10f) s8i a3, a5, 0 137*4882a593Smuzhiyun addi a5, a5, 1 138*4882a593Smuzhiyun#if !XCHAL_HAVE_LOOPS 139*4882a593Smuzhiyun blt a5, a6, .Lbyteloop 140*4882a593Smuzhiyun#endif /* !XCHAL_HAVE_LOOPS */ 141*4882a593Smuzhiyun.Lbytesetdone: 142*4882a593Smuzhiyun abi_ret_default 143*4882a593Smuzhiyun 144*4882a593SmuzhiyunENDPROC(__memset) 145*4882a593Smuzhiyun 146*4882a593Smuzhiyun .section .fixup, "ax" 147*4882a593Smuzhiyun .align 4 148*4882a593Smuzhiyun 149*4882a593Smuzhiyun/* We return zero if a failure occurred. */ 150*4882a593Smuzhiyun 151*4882a593Smuzhiyun10: 152*4882a593Smuzhiyun movi a2, 0 153*4882a593Smuzhiyun abi_ret_default 154