1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * linux/arch/alpha/lib/memset.S 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * This is an efficient (and small) implementation of the C library "memset()" 6*4882a593Smuzhiyun * function for the alpha. 7*4882a593Smuzhiyun * 8*4882a593Smuzhiyun * (C) Copyright 1996 Linus Torvalds 9*4882a593Smuzhiyun * 10*4882a593Smuzhiyun * This routine is "moral-ware": you are free to use it any way you wish, and 11*4882a593Smuzhiyun * the only obligation I put on you is a moral one: if you make any improvements 12*4882a593Smuzhiyun * to the routine, please send me your improvements for me to use similarly. 13*4882a593Smuzhiyun * 14*4882a593Smuzhiyun * The scheduling comments are according to the EV5 documentation (and done by 15*4882a593Smuzhiyun * hand, so they might well be incorrect, please do tell me about it..) 16*4882a593Smuzhiyun */ 17*4882a593Smuzhiyun#include <asm/export.h> 18*4882a593Smuzhiyun .set noat 19*4882a593Smuzhiyun .set noreorder 20*4882a593Smuzhiyun.text 21*4882a593Smuzhiyun .globl memset 22*4882a593Smuzhiyun .globl __memset 23*4882a593Smuzhiyun .globl ___memset 24*4882a593Smuzhiyun .globl __memset16 25*4882a593Smuzhiyun .globl __constant_c_memset 26*4882a593Smuzhiyun 27*4882a593Smuzhiyun .ent ___memset 28*4882a593Smuzhiyun.align 5 29*4882a593Smuzhiyun___memset: 30*4882a593Smuzhiyun .frame $30,0,$26,0 31*4882a593Smuzhiyun .prologue 0 32*4882a593Smuzhiyun 33*4882a593Smuzhiyun and $17,255,$1 /* E1 */ 34*4882a593Smuzhiyun insbl $17,1,$17 /* .. E0 */ 35*4882a593Smuzhiyun bis $17,$1,$17 /* E0 (p-c latency, next cycle) */ 36*4882a593Smuzhiyun sll $17,16,$1 /* E1 (p-c latency, next cycle) */ 37*4882a593Smuzhiyun 38*4882a593Smuzhiyun bis $17,$1,$17 /* E0 (p-c latency, next cycle) */ 39*4882a593Smuzhiyun sll $17,32,$1 /* E1 (p-c latency, next cycle) */ 40*4882a593Smuzhiyun bis $17,$1,$17 /* E0 (p-c latency, next cycle) */ 41*4882a593Smuzhiyun ldq_u $31,0($30) /* .. E1 */ 42*4882a593Smuzhiyun 43*4882a593Smuzhiyun.align 5 44*4882a593Smuzhiyun__constant_c_memset: 45*4882a593Smuzhiyun addq $18,$16,$6 /* E0 */ 46*4882a593Smuzhiyun bis $16,$16,$0 /* .. E1 */ 47*4882a593Smuzhiyun xor $16,$6,$1 /* E0 */ 48*4882a593Smuzhiyun ble $18,end /* .. E1 */ 49*4882a593Smuzhiyun 50*4882a593Smuzhiyun bic $1,7,$1 /* E0 */ 51*4882a593Smuzhiyun beq $1,within_one_quad /* .. E1 (note EV5 zero-latency forwarding) */ 52*4882a593Smuzhiyun and $16,7,$3 /* E0 */ 53*4882a593Smuzhiyun beq $3,aligned /* .. E1 (note EV5 zero-latency forwarding) */ 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun ldq_u $4,0($16) /* E0 */ 56*4882a593Smuzhiyun bis $16,$16,$5 /* .. E1 */ 57*4882a593Smuzhiyun insql $17,$16,$2 /* E0 */ 58*4882a593Smuzhiyun subq $3,8,$3 /* .. E1 */ 59*4882a593Smuzhiyun 60*4882a593Smuzhiyun addq $18,$3,$18 /* E0 $18 is new count ($3 is negative) */ 61*4882a593Smuzhiyun mskql $4,$16,$4 /* .. E1 (and possible load stall) */ 62*4882a593Smuzhiyun subq $16,$3,$16 /* E0 $16 is new aligned destination */ 63*4882a593Smuzhiyun bis $2,$4,$1 /* .. E1 */ 64*4882a593Smuzhiyun 65*4882a593Smuzhiyun bis $31,$31,$31 /* E0 */ 66*4882a593Smuzhiyun ldq_u $31,0($30) /* .. E1 */ 67*4882a593Smuzhiyun stq_u $1,0($5) /* E0 */ 68*4882a593Smuzhiyun bis $31,$31,$31 /* .. E1 */ 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun.align 4 71*4882a593Smuzhiyunaligned: 72*4882a593Smuzhiyun sra $18,3,$3 /* E0 */ 73*4882a593Smuzhiyun and $18,7,$18 /* .. E1 */ 74*4882a593Smuzhiyun bis $16,$16,$5 /* E0 */ 75*4882a593Smuzhiyun beq $3,no_quad /* .. E1 */ 76*4882a593Smuzhiyun 77*4882a593Smuzhiyun.align 3 78*4882a593Smuzhiyunloop: 79*4882a593Smuzhiyun stq $17,0($5) /* E0 */ 80*4882a593Smuzhiyun subq $3,1,$3 /* .. E1 */ 81*4882a593Smuzhiyun addq $5,8,$5 /* E0 */ 82*4882a593Smuzhiyun bne $3,loop /* .. E1 */ 83*4882a593Smuzhiyun 84*4882a593Smuzhiyunno_quad: 85*4882a593Smuzhiyun bis $31,$31,$31 /* E0 */ 86*4882a593Smuzhiyun beq $18,end /* .. E1 */ 87*4882a593Smuzhiyun ldq $7,0($5) /* E0 */ 88*4882a593Smuzhiyun mskqh $7,$6,$2 /* .. E1 (and load stall) */ 89*4882a593Smuzhiyun 90*4882a593Smuzhiyun insqh $17,$6,$4 /* E0 */ 91*4882a593Smuzhiyun bis $2,$4,$1 /* .. E1 */ 92*4882a593Smuzhiyun stq $1,0($5) /* E0 */ 93*4882a593Smuzhiyun ret $31,($26),1 /* .. E1 */ 94*4882a593Smuzhiyun 95*4882a593Smuzhiyun.align 3 96*4882a593Smuzhiyunwithin_one_quad: 97*4882a593Smuzhiyun ldq_u $1,0($16) /* E0 */ 98*4882a593Smuzhiyun insql $17,$16,$2 /* E1 */ 99*4882a593Smuzhiyun mskql $1,$16,$4 /* E0 (after load stall) */ 100*4882a593Smuzhiyun bis $2,$4,$2 /* E0 */ 101*4882a593Smuzhiyun 102*4882a593Smuzhiyun mskql $2,$6,$4 /* E0 */ 103*4882a593Smuzhiyun mskqh $1,$6,$2 /* .. E1 */ 104*4882a593Smuzhiyun bis $2,$4,$1 /* E0 */ 105*4882a593Smuzhiyun stq_u $1,0($16) /* E0 */ 106*4882a593Smuzhiyun 107*4882a593Smuzhiyunend: 108*4882a593Smuzhiyun ret $31,($26),1 /* E1 */ 109*4882a593Smuzhiyun .end ___memset 110*4882a593SmuzhiyunEXPORT_SYMBOL(___memset) 111*4882a593SmuzhiyunEXPORT_SYMBOL(__constant_c_memset) 112*4882a593Smuzhiyun 113*4882a593Smuzhiyun .align 5 114*4882a593Smuzhiyun .ent __memset16 115*4882a593Smuzhiyun__memset16: 116*4882a593Smuzhiyun .prologue 0 117*4882a593Smuzhiyun 118*4882a593Smuzhiyun inswl $17,0,$1 /* E0 */ 119*4882a593Smuzhiyun inswl $17,2,$2 /* E0 */ 120*4882a593Smuzhiyun inswl $17,4,$3 /* E0 */ 121*4882a593Smuzhiyun or $1,$2,$1 /* .. E1 */ 122*4882a593Smuzhiyun inswl $17,6,$4 /* E0 */ 123*4882a593Smuzhiyun or $1,$3,$1 /* .. E1 */ 124*4882a593Smuzhiyun or $1,$4,$17 /* E0 */ 125*4882a593Smuzhiyun br __constant_c_memset /* .. E1 */ 126*4882a593Smuzhiyun 127*4882a593Smuzhiyun .end __memset16 128*4882a593SmuzhiyunEXPORT_SYMBOL(__memset16) 129*4882a593Smuzhiyun 130*4882a593Smuzhiyunmemset = ___memset 131*4882a593Smuzhiyun__memset = ___memset 132*4882a593Smuzhiyun EXPORT_SYMBOL(memset) 133*4882a593Smuzhiyun EXPORT_SYMBOL(__memset) 134