1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * blockops.S: Common block zero optimized routines. 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 6*4882a593Smuzhiyun */ 7*4882a593Smuzhiyun 8*4882a593Smuzhiyun#include <linux/linkage.h> 9*4882a593Smuzhiyun#include <asm/page.h> 10*4882a593Smuzhiyun#include <asm/export.h> 11*4882a593Smuzhiyun 12*4882a593Smuzhiyun /* Zero out 64 bytes of memory at (buf + offset). 13*4882a593Smuzhiyun * Assumes %g1 contains zero. 14*4882a593Smuzhiyun */ 15*4882a593Smuzhiyun#define BLAST_BLOCK(buf, offset) \ 16*4882a593Smuzhiyun std %g0, [buf + offset + 0x38]; \ 17*4882a593Smuzhiyun std %g0, [buf + offset + 0x30]; \ 18*4882a593Smuzhiyun std %g0, [buf + offset + 0x28]; \ 19*4882a593Smuzhiyun std %g0, [buf + offset + 0x20]; \ 20*4882a593Smuzhiyun std %g0, [buf + offset + 0x18]; \ 21*4882a593Smuzhiyun std %g0, [buf + offset + 0x10]; \ 22*4882a593Smuzhiyun std %g0, [buf + offset + 0x08]; \ 23*4882a593Smuzhiyun std %g0, [buf + offset + 0x00]; 24*4882a593Smuzhiyun 25*4882a593Smuzhiyun /* Copy 32 bytes of memory at (src + offset) to 26*4882a593Smuzhiyun * (dst + offset). 27*4882a593Smuzhiyun */ 28*4882a593Smuzhiyun#define MIRROR_BLOCK(dst, src, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 29*4882a593Smuzhiyun ldd [src + offset + 0x18], t0; \ 30*4882a593Smuzhiyun ldd [src + offset + 0x10], t2; \ 31*4882a593Smuzhiyun ldd [src + offset + 0x08], t4; \ 32*4882a593Smuzhiyun ldd [src + offset + 0x00], t6; \ 33*4882a593Smuzhiyun std t0, [dst + offset + 0x18]; \ 34*4882a593Smuzhiyun std t2, [dst + offset + 0x10]; \ 35*4882a593Smuzhiyun std t4, [dst + offset + 0x08]; \ 36*4882a593Smuzhiyun std t6, [dst + offset + 0x00]; 37*4882a593Smuzhiyun 38*4882a593Smuzhiyun /* Profiling evidence indicates that memset() is 39*4882a593Smuzhiyun * commonly called for blocks of size PAGE_SIZE, 40*4882a593Smuzhiyun * and (2 * PAGE_SIZE) (for kernel stacks) 41*4882a593Smuzhiyun * and with a second arg of zero. We assume in 42*4882a593Smuzhiyun * all of these cases that the buffer is aligned 43*4882a593Smuzhiyun * on at least an 8 byte boundary. 44*4882a593Smuzhiyun * 45*4882a593Smuzhiyun * Therefore we special case them to make them 46*4882a593Smuzhiyun * as fast as possible. 47*4882a593Smuzhiyun */ 48*4882a593Smuzhiyun 49*4882a593Smuzhiyun .text 50*4882a593SmuzhiyunENTRY(bzero_1page) 51*4882a593Smuzhiyun/* NOTE: If you change the number of insns of this routine, please check 52*4882a593Smuzhiyun * arch/sparc/mm/hypersparc.S */ 53*4882a593Smuzhiyun /* %o0 = buf */ 54*4882a593Smuzhiyun or %g0, %g0, %g1 55*4882a593Smuzhiyun or %o0, %g0, %o1 56*4882a593Smuzhiyun or %g0, (PAGE_SIZE >> 8), %g2 57*4882a593Smuzhiyun1: 58*4882a593Smuzhiyun BLAST_BLOCK(%o0, 0x00) 59*4882a593Smuzhiyun BLAST_BLOCK(%o0, 0x40) 60*4882a593Smuzhiyun BLAST_BLOCK(%o0, 0x80) 61*4882a593Smuzhiyun BLAST_BLOCK(%o0, 0xc0) 62*4882a593Smuzhiyun subcc %g2, 1, %g2 63*4882a593Smuzhiyun bne 1b 64*4882a593Smuzhiyun add %o0, 0x100, %o0 65*4882a593Smuzhiyun 66*4882a593Smuzhiyun retl 67*4882a593Smuzhiyun nop 68*4882a593SmuzhiyunENDPROC(bzero_1page) 69*4882a593SmuzhiyunEXPORT_SYMBOL(bzero_1page) 70*4882a593Smuzhiyun 71*4882a593SmuzhiyunENTRY(__copy_1page) 72*4882a593Smuzhiyun/* NOTE: If you change the number of insns of this routine, please check 73*4882a593Smuzhiyun * arch/sparc/mm/hypersparc.S */ 74*4882a593Smuzhiyun /* %o0 = dst, %o1 = src */ 75*4882a593Smuzhiyun or %g0, (PAGE_SIZE >> 8), %g1 76*4882a593Smuzhiyun1: 77*4882a593Smuzhiyun MIRROR_BLOCK(%o0, %o1, 0x00, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) 78*4882a593Smuzhiyun MIRROR_BLOCK(%o0, %o1, 0x20, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) 79*4882a593Smuzhiyun MIRROR_BLOCK(%o0, %o1, 0x40, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) 80*4882a593Smuzhiyun MIRROR_BLOCK(%o0, %o1, 0x60, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) 81*4882a593Smuzhiyun MIRROR_BLOCK(%o0, %o1, 0x80, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) 82*4882a593Smuzhiyun MIRROR_BLOCK(%o0, %o1, 0xa0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) 83*4882a593Smuzhiyun MIRROR_BLOCK(%o0, %o1, 0xc0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) 84*4882a593Smuzhiyun MIRROR_BLOCK(%o0, %o1, 0xe0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) 85*4882a593Smuzhiyun subcc %g1, 1, %g1 86*4882a593Smuzhiyun add %o0, 0x100, %o0 87*4882a593Smuzhiyun bne 1b 88*4882a593Smuzhiyun add %o1, 0x100, %o1 89*4882a593Smuzhiyun 90*4882a593Smuzhiyun retl 91*4882a593Smuzhiyun nop 92*4882a593SmuzhiyunENDPROC(__copy_1page) 93*4882a593SmuzhiyunEXPORT_SYMBOL(__copy_1page) 94