1d8834a13SMatthias Weisser/* 2d8834a13SMatthias Weisser * linux/arch/arm/lib/memset.S 3d8834a13SMatthias Weisser * 4d8834a13SMatthias Weisser * Copyright (C) 1995-2000 Russell King 5d8834a13SMatthias Weisser * 6d8834a13SMatthias Weisser * This program is free software; you can redistribute it and/or modify 7d8834a13SMatthias Weisser * it under the terms of the GNU General Public License version 2 as 8d8834a13SMatthias Weisser * published by the Free Software Foundation. 9d8834a13SMatthias Weisser * 10d8834a13SMatthias Weisser * ASM optimised string functions 11d8834a13SMatthias Weisser */ 12*75d7a0d7SStefan Agner#include <linux/linkage.h> 13d8834a13SMatthias Weisser#include <asm/assembler.h> 14d8834a13SMatthias Weisser 15d8834a13SMatthias Weisser .text 16d8834a13SMatthias Weisser .align 5 17d8834a13SMatthias Weisser 18*75d7a0d7SStefan Agner .syntax unified 19*75d7a0d7SStefan Agner#ifdef CONFIG_SYS_THUMB_BUILD 20*75d7a0d7SStefan Agner .thumb 21*75d7a0d7SStefan Agner .thumb_func 22*75d7a0d7SStefan Agner#endif 23*75d7a0d7SStefan AgnerENTRY(memset) 24d8834a13SMatthias Weisser ands r3, r0, #3 @ 1 unaligned? 25*75d7a0d7SStefan Agner mov ip, r0 @ preserve r0 as return value 26*75d7a0d7SStefan Agner bne 6f @ 1 27d8834a13SMatthias Weisser/* 28*75d7a0d7SStefan Agner * we know that the pointer in ip is aligned to a word boundary. 29d8834a13SMatthias Weisser */ 30*75d7a0d7SStefan Agner1: orr r1, r1, r1, lsl #8 31d8834a13SMatthias Weisser orr r1, r1, r1, lsl #16 32d8834a13SMatthias Weisser mov r3, r1 33d8834a13SMatthias Weisser cmp r2, #16 34d8834a13SMatthias Weisser blt 4f 35d8834a13SMatthias Weisser 36d8834a13SMatthias Weisser#if ! CALGN(1)+0 37d8834a13SMatthias Weisser 38d8834a13SMatthias Weisser/* 39*75d7a0d7SStefan Agner * We need 2 extra registers for this loop - use r8 and the LR 40d8834a13SMatthias Weisser */ 41*75d7a0d7SStefan Agner stmfd sp!, {r8, lr} 42*75d7a0d7SStefan Agner mov r8, r1 43d8834a13SMatthias Weisser mov lr, r1 44d8834a13SMatthias Weisser 45d8834a13SMatthias Weisser2: subs r2, r2, #64 46*75d7a0d7SStefan Agner stmiage ip!, {r1, r3, r8, lr} @ 64 bytes at a time. 47*75d7a0d7SStefan Agner stmiage ip!, {r1, r3, r8, lr} 48*75d7a0d7SStefan Agner stmiage ip!, {r1, r3, r8, lr} 49*75d7a0d7SStefan Agner stmiage ip!, {r1, r3, r8, lr} 50d8834a13SMatthias Weisser bgt 2b 51*75d7a0d7SStefan Agner ldmfdeq sp!, {r8, pc} @ Now <64 bytes to go. 52d8834a13SMatthias Weisser/* 53d8834a13SMatthias Weisser * No need to correct the count; we're only testing bits from now on 54d8834a13SMatthias Weisser */ 55d8834a13SMatthias Weisser tst r2, #32 56*75d7a0d7SStefan Agner stmiane ip!, {r1, r3, r8, lr} 57*75d7a0d7SStefan Agner stmiane ip!, {r1, r3, r8, lr} 58d8834a13SMatthias Weisser tst r2, #16 59*75d7a0d7SStefan Agner stmiane ip!, {r1, r3, r8, lr} 60*75d7a0d7SStefan Agner ldmfd sp!, {r8, lr} 61d8834a13SMatthias Weisser 62d8834a13SMatthias Weisser#else 63d8834a13SMatthias Weisser 64d8834a13SMatthias Weisser/* 65d8834a13SMatthias Weisser * This version aligns the destination pointer in order to write 66d8834a13SMatthias Weisser * whole cache lines at once. 67d8834a13SMatthias Weisser */ 68d8834a13SMatthias Weisser 69*75d7a0d7SStefan Agner stmfd sp!, {r4-r8, lr} 70d8834a13SMatthias Weisser mov r4, r1 71d8834a13SMatthias Weisser mov r5, r1 72d8834a13SMatthias Weisser mov r6, r1 73d8834a13SMatthias Weisser mov r7, r1 74*75d7a0d7SStefan Agner mov r8, r1 75d8834a13SMatthias Weisser mov lr, r1 76d8834a13SMatthias Weisser 77d8834a13SMatthias Weisser cmp r2, #96 78*75d7a0d7SStefan Agner tstgt ip, #31 79d8834a13SMatthias Weisser ble 3f 80d8834a13SMatthias Weisser 81*75d7a0d7SStefan Agner and r8, ip, #31 82*75d7a0d7SStefan Agner rsb r8, r8, #32 83*75d7a0d7SStefan Agner sub r2, r2, r8 84*75d7a0d7SStefan Agner movs r8, r8, lsl #(32 - 4) 85*75d7a0d7SStefan Agner stmiacs ip!, {r4, r5, r6, r7} 86*75d7a0d7SStefan Agner stmiami ip!, {r4, r5} 87*75d7a0d7SStefan Agner tst r8, #(1 << 30) 88*75d7a0d7SStefan Agner mov r8, r1 89*75d7a0d7SStefan Agner strne r1, [ip], #4 90d8834a13SMatthias Weisser 91d8834a13SMatthias Weisser3: subs r2, r2, #64 92*75d7a0d7SStefan Agner stmiage ip!, {r1, r3-r8, lr} 93*75d7a0d7SStefan Agner stmiage ip!, {r1, r3-r8, lr} 94d8834a13SMatthias Weisser bgt 3b 95*75d7a0d7SStefan Agner ldmfdeq sp!, {r4-r8, pc} 96d8834a13SMatthias Weisser 97d8834a13SMatthias Weisser tst r2, #32 98*75d7a0d7SStefan Agner stmiane ip!, {r1, r3-r8, lr} 99d8834a13SMatthias Weisser tst r2, #16 100*75d7a0d7SStefan Agner stmiane ip!, {r4-r7} 101*75d7a0d7SStefan Agner ldmfd sp!, {r4-r8, lr} 102d8834a13SMatthias Weisser 103d8834a13SMatthias Weisser#endif 104d8834a13SMatthias Weisser 105d8834a13SMatthias Weisser4: tst r2, #8 106*75d7a0d7SStefan Agner stmiane ip!, {r1, r3} 107d8834a13SMatthias Weisser tst r2, #4 108*75d7a0d7SStefan Agner strne r1, [ip], #4 109d8834a13SMatthias Weisser/* 110d8834a13SMatthias Weisser * When we get here, we've got less than 4 bytes to zero. We 111d8834a13SMatthias Weisser * may have an unaligned pointer as well. 112d8834a13SMatthias Weisser */ 113d8834a13SMatthias Weisser5: tst r2, #2 114*75d7a0d7SStefan Agner strbne r1, [ip], #1 115*75d7a0d7SStefan Agner strbne r1, [ip], #1 116d8834a13SMatthias Weisser tst r2, #1 117*75d7a0d7SStefan Agner strbne r1, [ip], #1 118*75d7a0d7SStefan Agner ret lr 119*75d7a0d7SStefan Agner 120*75d7a0d7SStefan Agner6: subs r2, r2, #4 @ 1 do we have enough 121*75d7a0d7SStefan Agner blt 5b @ 1 bytes to align with? 122*75d7a0d7SStefan Agner cmp r3, #2 @ 1 123*75d7a0d7SStefan Agner strblt r1, [ip], #1 @ 1 124*75d7a0d7SStefan Agner strble r1, [ip], #1 @ 1 125*75d7a0d7SStefan Agner strb r1, [ip], #1 @ 1 126*75d7a0d7SStefan Agner add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) 127*75d7a0d7SStefan Agner b 1b 128*75d7a0d7SStefan AgnerENDPROC(memset) 129