1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * memscan.S: Optimized memscan for Sparc64. 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz) 6*4882a593Smuzhiyun * Copyright (C) 1998 David S. Miller (davem@redhat.com) 7*4882a593Smuzhiyun */ 8*4882a593Smuzhiyun 9*4882a593Smuzhiyun #include <asm/export.h> 10*4882a593Smuzhiyun 11*4882a593Smuzhiyun#define HI_MAGIC 0x8080808080808080 12*4882a593Smuzhiyun#define LO_MAGIC 0x0101010101010101 13*4882a593Smuzhiyun#define ASI_PL 0x88 14*4882a593Smuzhiyun 15*4882a593Smuzhiyun .text 16*4882a593Smuzhiyun .align 32 17*4882a593Smuzhiyun .globl __memscan_zero, __memscan_generic 18*4882a593Smuzhiyun .type __memscan_zero,#function 19*4882a593Smuzhiyun .type __memscan_generic,#function 20*4882a593Smuzhiyun .globl memscan 21*4882a593Smuzhiyun EXPORT_SYMBOL(__memscan_zero) 22*4882a593Smuzhiyun EXPORT_SYMBOL(__memscan_generic) 23*4882a593Smuzhiyun 24*4882a593Smuzhiyun__memscan_zero: 25*4882a593Smuzhiyun /* %o0 = bufp, %o1 = size */ 26*4882a593Smuzhiyun brlez,pn %o1, szzero 27*4882a593Smuzhiyun andcc %o0, 7, %g0 28*4882a593Smuzhiyun be,pt %icc, we_are_aligned 29*4882a593Smuzhiyun sethi %hi(HI_MAGIC), %o4 30*4882a593Smuzhiyun ldub [%o0], %o5 31*4882a593Smuzhiyun1: subcc %o1, 1, %o1 32*4882a593Smuzhiyun brz,pn %o5, 10f 33*4882a593Smuzhiyun add %o0, 1, %o0 34*4882a593Smuzhiyun 35*4882a593Smuzhiyun be,pn %xcc, szzero 36*4882a593Smuzhiyun andcc %o0, 7, %g0 37*4882a593Smuzhiyun bne,a,pn %icc, 1b 38*4882a593Smuzhiyun ldub [%o0], %o5 39*4882a593Smuzhiyunwe_are_aligned: 40*4882a593Smuzhiyun ldxa [%o0] ASI_PL, %o5 41*4882a593Smuzhiyun or %o4, %lo(HI_MAGIC), %o3 42*4882a593Smuzhiyun sllx %o3, 32, %o4 43*4882a593Smuzhiyun or %o4, %o3, %o3 44*4882a593Smuzhiyun 45*4882a593Smuzhiyun srlx %o3, 7, %o2 46*4882a593Smuzhiyunmsloop: 47*4882a593Smuzhiyun sub %o1, 8, %o1 48*4882a593Smuzhiyun add %o0, 8, %o0 49*4882a593Smuzhiyun sub %o5, %o2, %o4 50*4882a593Smuzhiyun xor %o4, %o5, %o4 51*4882a593Smuzhiyun andcc %o4, %o3, %g3 52*4882a593Smuzhiyun bne,pn %xcc, check_bytes 53*4882a593Smuzhiyun srlx %o4, 32, %g3 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun brgz,a,pt %o1, msloop 56*4882a593Smuzhiyun ldxa [%o0] ASI_PL, %o5 57*4882a593Smuzhiyuncheck_bytes: 58*4882a593Smuzhiyun bne,a,pn %icc, 2f 59*4882a593Smuzhiyun andcc %o5, 0xff, %g0 60*4882a593Smuzhiyun add %o0, -5, %g2 61*4882a593Smuzhiyun ba,pt %xcc, 3f 62*4882a593Smuzhiyun srlx %o5, 32, %g7 63*4882a593Smuzhiyun 64*4882a593Smuzhiyun2: srlx %o5, 8, %g7 65*4882a593Smuzhiyun be,pn %icc, 1f 66*4882a593Smuzhiyun add %o0, -8, %g2 67*4882a593Smuzhiyun andcc %g7, 0xff, %g0 68*4882a593Smuzhiyun srlx %g7, 8, %g7 69*4882a593Smuzhiyun be,pn %icc, 1f 70*4882a593Smuzhiyun inc %g2 71*4882a593Smuzhiyun andcc %g7, 0xff, %g0 72*4882a593Smuzhiyun 73*4882a593Smuzhiyun srlx %g7, 8, %g7 74*4882a593Smuzhiyun be,pn %icc, 1f 75*4882a593Smuzhiyun inc %g2 76*4882a593Smuzhiyun andcc %g7, 0xff, %g0 77*4882a593Smuzhiyun srlx %g7, 8, %g7 78*4882a593Smuzhiyun be,pn %icc, 1f 79*4882a593Smuzhiyun inc %g2 80*4882a593Smuzhiyun andcc %g3, %o3, %g0 81*4882a593Smuzhiyun 82*4882a593Smuzhiyun be,a,pn %icc, 2f 83*4882a593Smuzhiyun mov %o0, %g2 84*4882a593Smuzhiyun3: andcc %g7, 0xff, %g0 85*4882a593Smuzhiyun srlx %g7, 8, %g7 86*4882a593Smuzhiyun be,pn %icc, 1f 87*4882a593Smuzhiyun inc %g2 88*4882a593Smuzhiyun andcc %g7, 0xff, %g0 89*4882a593Smuzhiyun srlx %g7, 8, %g7 90*4882a593Smuzhiyun 91*4882a593Smuzhiyun be,pn %icc, 1f 92*4882a593Smuzhiyun inc %g2 93*4882a593Smuzhiyun andcc %g7, 0xff, %g0 94*4882a593Smuzhiyun srlx %g7, 8, %g7 95*4882a593Smuzhiyun be,pn %icc, 1f 96*4882a593Smuzhiyun inc %g2 97*4882a593Smuzhiyun andcc %g7, 0xff, %g0 98*4882a593Smuzhiyun srlx %g7, 8, %g7 99*4882a593Smuzhiyun 100*4882a593Smuzhiyun be,pn %icc, 1f 101*4882a593Smuzhiyun inc %g2 102*4882a593Smuzhiyun2: brgz,a,pt %o1, msloop 103*4882a593Smuzhiyun ldxa [%o0] ASI_PL, %o5 104*4882a593Smuzhiyun inc %g2 105*4882a593Smuzhiyun1: add %o0, %o1, %o0 106*4882a593Smuzhiyun cmp %g2, %o0 107*4882a593Smuzhiyun retl 108*4882a593Smuzhiyun 109*4882a593Smuzhiyun movle %xcc, %g2, %o0 110*4882a593Smuzhiyun10: retl 111*4882a593Smuzhiyun sub %o0, 1, %o0 112*4882a593Smuzhiyunszzero: retl 113*4882a593Smuzhiyun nop 114*4882a593Smuzhiyun 115*4882a593Smuzhiyunmemscan: 116*4882a593Smuzhiyun__memscan_generic: 117*4882a593Smuzhiyun /* %o0 = addr, %o1 = c, %o2 = size */ 118*4882a593Smuzhiyun brz,pn %o2, 3f 119*4882a593Smuzhiyun add %o0, %o2, %o3 120*4882a593Smuzhiyun ldub [%o0], %o5 121*4882a593Smuzhiyun sub %g0, %o2, %o4 122*4882a593Smuzhiyun1: 123*4882a593Smuzhiyun cmp %o5, %o1 124*4882a593Smuzhiyun be,pn %icc, 2f 125*4882a593Smuzhiyun addcc %o4, 1, %o4 126*4882a593Smuzhiyun bne,a,pt %xcc, 1b 127*4882a593Smuzhiyun ldub [%o3 + %o4], %o5 128*4882a593Smuzhiyun retl 129*4882a593Smuzhiyun /* The delay slot is the same as the next insn, this is just to make it look more awful */ 130*4882a593Smuzhiyun2: 131*4882a593Smuzhiyun add %o3, %o4, %o0 132*4882a593Smuzhiyun retl 133*4882a593Smuzhiyun sub %o0, 1, %o0 134*4882a593Smuzhiyun3: 135*4882a593Smuzhiyun retl 136*4882a593Smuzhiyun nop 137