1d8834a13SMatthias Weisser/* 2d8834a13SMatthias Weisser * linux/arch/arm/lib/memset.S 3d8834a13SMatthias Weisser * 4d8834a13SMatthias Weisser * Copyright (C) 1995-2000 Russell King 5d8834a13SMatthias Weisser * 6d8834a13SMatthias Weisser * This program is free software; you can redistribute it and/or modify 7d8834a13SMatthias Weisser * it under the terms of the GNU General Public License version 2 as 8d8834a13SMatthias Weisser * published by the Free Software Foundation. 9d8834a13SMatthias Weisser * 10d8834a13SMatthias Weisser * ASM optimised string functions 11d8834a13SMatthias Weisser */ 1275d7a0d7SStefan Agner#include <linux/linkage.h> 13d8834a13SMatthias Weisser#include <asm/assembler.h> 14d8834a13SMatthias Weisser 15d8834a13SMatthias Weisser .text 16d8834a13SMatthias Weisser .align 5 17d8834a13SMatthias Weisser 1875d7a0d7SStefan Agner .syntax unified 19*3a649407STom Rini#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(MEMSET_NO_THUMB_BUILD) 2075d7a0d7SStefan Agner .thumb 2175d7a0d7SStefan Agner .thumb_func 2275d7a0d7SStefan Agner#endif 2375d7a0d7SStefan AgnerENTRY(memset) 24d8834a13SMatthias Weisser ands r3, r0, #3 @ 1 unaligned? 2575d7a0d7SStefan Agner mov ip, r0 @ preserve r0 as return value 2675d7a0d7SStefan Agner bne 6f @ 1 27d8834a13SMatthias Weisser/* 2875d7a0d7SStefan Agner * we know that the pointer in ip is aligned to a word boundary. 29d8834a13SMatthias Weisser */ 3075d7a0d7SStefan Agner1: orr r1, r1, r1, lsl #8 31d8834a13SMatthias Weisser orr r1, r1, r1, lsl #16 32d8834a13SMatthias Weisser mov r3, r1 33d8834a13SMatthias Weisser cmp r2, #16 34d8834a13SMatthias Weisser blt 4f 35d8834a13SMatthias Weisser 36d8834a13SMatthias Weisser#if ! CALGN(1)+0 37d8834a13SMatthias Weisser 38d8834a13SMatthias Weisser/* 3975d7a0d7SStefan Agner * We need 2 extra registers for this loop - use r8 and the LR 40d8834a13SMatthias Weisser */ 4175d7a0d7SStefan Agner stmfd sp!, {r8, lr} 4275d7a0d7SStefan Agner mov r8, r1 43d8834a13SMatthias Weisser mov lr, r1 44d8834a13SMatthias Weisser 45d8834a13SMatthias Weisser2: subs r2, r2, #64 4675d7a0d7SStefan Agner stmiage ip!, {r1, r3, r8, lr} @ 64 bytes at a time. 4775d7a0d7SStefan Agner stmiage ip!, {r1, r3, r8, lr} 4875d7a0d7SStefan Agner stmiage ip!, {r1, r3, r8, lr} 4975d7a0d7SStefan Agner stmiage ip!, {r1, r3, r8, lr} 50d8834a13SMatthias Weisser bgt 2b 5175d7a0d7SStefan Agner ldmfdeq sp!, {r8, pc} @ Now <64 bytes to go. 52d8834a13SMatthias Weisser/* 53d8834a13SMatthias Weisser * No need to correct the count; we're only testing bits from now on 54d8834a13SMatthias Weisser */ 55d8834a13SMatthias Weisser tst r2, #32 5675d7a0d7SStefan Agner stmiane ip!, {r1, r3, r8, lr} 5775d7a0d7SStefan Agner stmiane ip!, {r1, r3, r8, lr} 58d8834a13SMatthias Weisser tst r2, #16 5975d7a0d7SStefan Agner stmiane ip!, {r1, r3, r8, lr} 6075d7a0d7SStefan Agner ldmfd sp!, {r8, lr} 61d8834a13SMatthias Weisser 62d8834a13SMatthias Weisser#else 63d8834a13SMatthias Weisser 64d8834a13SMatthias Weisser/* 65d8834a13SMatthias Weisser * This version aligns the destination pointer in order to write 66d8834a13SMatthias Weisser * whole cache lines at once. 67d8834a13SMatthias Weisser */ 68d8834a13SMatthias Weisser 6975d7a0d7SStefan Agner stmfd sp!, {r4-r8, lr} 70d8834a13SMatthias Weisser mov r4, r1 71d8834a13SMatthias Weisser mov r5, r1 72d8834a13SMatthias Weisser mov r6, r1 73d8834a13SMatthias Weisser mov r7, r1 7475d7a0d7SStefan Agner mov r8, r1 75d8834a13SMatthias Weisser mov lr, r1 76d8834a13SMatthias Weisser 77d8834a13SMatthias Weisser cmp r2, #96 7875d7a0d7SStefan Agner tstgt ip, #31 79d8834a13SMatthias Weisser ble 3f 80d8834a13SMatthias Weisser 8175d7a0d7SStefan Agner and r8, ip, #31 8275d7a0d7SStefan Agner rsb r8, r8, #32 8375d7a0d7SStefan Agner sub r2, r2, r8 8475d7a0d7SStefan Agner movs r8, r8, lsl #(32 - 4) 8575d7a0d7SStefan Agner stmiacs ip!, {r4, r5, r6, r7} 8675d7a0d7SStefan Agner stmiami ip!, {r4, r5} 8775d7a0d7SStefan Agner tst r8, #(1 << 30) 8875d7a0d7SStefan Agner mov r8, r1 8975d7a0d7SStefan Agner strne r1, [ip], #4 90d8834a13SMatthias Weisser 91d8834a13SMatthias Weisser3: subs r2, r2, #64 9275d7a0d7SStefan Agner stmiage ip!, {r1, r3-r8, lr} 9375d7a0d7SStefan Agner stmiage ip!, {r1, r3-r8, lr} 94d8834a13SMatthias Weisser bgt 3b 9575d7a0d7SStefan Agner ldmfdeq sp!, {r4-r8, pc} 96d8834a13SMatthias Weisser 97d8834a13SMatthias Weisser tst r2, #32 9875d7a0d7SStefan Agner stmiane ip!, {r1, r3-r8, lr} 99d8834a13SMatthias Weisser tst r2, #16 10075d7a0d7SStefan Agner stmiane ip!, {r4-r7} 10175d7a0d7SStefan Agner ldmfd sp!, {r4-r8, lr} 102d8834a13SMatthias Weisser 103d8834a13SMatthias Weisser#endif 104d8834a13SMatthias Weisser 105d8834a13SMatthias Weisser4: tst r2, #8 10675d7a0d7SStefan Agner stmiane ip!, {r1, r3} 107d8834a13SMatthias Weisser tst r2, #4 10875d7a0d7SStefan Agner strne r1, [ip], #4 109d8834a13SMatthias Weisser/* 110d8834a13SMatthias Weisser * When we get here, we've got less than 4 bytes to zero. We 111d8834a13SMatthias Weisser * may have an unaligned pointer as well. 112d8834a13SMatthias Weisser */ 113d8834a13SMatthias Weisser5: tst r2, #2 11475d7a0d7SStefan Agner strbne r1, [ip], #1 11575d7a0d7SStefan Agner strbne r1, [ip], #1 116d8834a13SMatthias Weisser tst r2, #1 11775d7a0d7SStefan Agner strbne r1, [ip], #1 11875d7a0d7SStefan Agner ret lr 11975d7a0d7SStefan Agner 12075d7a0d7SStefan Agner6: subs r2, r2, #4 @ 1 do we have enough 12175d7a0d7SStefan Agner blt 5b @ 1 bytes to align with? 12275d7a0d7SStefan Agner cmp r3, #2 @ 1 12375d7a0d7SStefan Agner strblt r1, [ip], #1 @ 1 12475d7a0d7SStefan Agner strble r1, [ip], #1 @ 1 12575d7a0d7SStefan Agner strb r1, [ip], #1 @ 1 12675d7a0d7SStefan Agner add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) 12775d7a0d7SStefan Agner b 1b 12875d7a0d7SStefan AgnerENDPROC(memset) 129