1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * arch/sparc64/lib/xor.S 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * High speed xor_block operation for RAID4/5 utilizing the 6*4882a593Smuzhiyun * UltraSparc Visual Instruction Set and Niagara store-init/twin-load. 7*4882a593Smuzhiyun * 8*4882a593Smuzhiyun * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) 9*4882a593Smuzhiyun * Copyright (C) 2006 David S. Miller <davem@davemloft.net> 10*4882a593Smuzhiyun */ 11*4882a593Smuzhiyun 12*4882a593Smuzhiyun#include <linux/linkage.h> 13*4882a593Smuzhiyun#include <asm/visasm.h> 14*4882a593Smuzhiyun#include <asm/asi.h> 15*4882a593Smuzhiyun#include <asm/dcu.h> 16*4882a593Smuzhiyun#include <asm/spitfire.h> 17*4882a593Smuzhiyun#include <asm/export.h> 18*4882a593Smuzhiyun 19*4882a593Smuzhiyun/* 20*4882a593Smuzhiyun * Requirements: 21*4882a593Smuzhiyun * !(((long)dest | (long)sourceN) & (64 - 1)) && 22*4882a593Smuzhiyun * !(len & 127) && len >= 256 23*4882a593Smuzhiyun */ 24*4882a593Smuzhiyun .text 25*4882a593Smuzhiyun 26*4882a593Smuzhiyun /* VIS versions. */ 27*4882a593SmuzhiyunENTRY(xor_vis_2) 28*4882a593Smuzhiyun rd %fprs, %o5 29*4882a593Smuzhiyun andcc %o5, FPRS_FEF|FPRS_DU, %g0 30*4882a593Smuzhiyun be,pt %icc, 0f 31*4882a593Smuzhiyun sethi %hi(VISenter), %g1 32*4882a593Smuzhiyun jmpl %g1 + %lo(VISenter), %g7 33*4882a593Smuzhiyun add %g7, 8, %g7 34*4882a593Smuzhiyun0: wr %g0, FPRS_FEF, %fprs 35*4882a593Smuzhiyun rd %asi, %g1 36*4882a593Smuzhiyun wr %g0, ASI_BLK_P, %asi 37*4882a593Smuzhiyun membar #LoadStore|#StoreLoad|#StoreStore 38*4882a593Smuzhiyun sub %o0, 128, %o0 39*4882a593Smuzhiyun ldda [%o1] %asi, %f0 40*4882a593Smuzhiyun ldda [%o2] %asi, %f16 41*4882a593Smuzhiyun 42*4882a593Smuzhiyun2: ldda [%o1 + 64] %asi, %f32 43*4882a593Smuzhiyun fxor %f0, %f16, %f16 44*4882a593Smuzhiyun fxor %f2, %f18, %f18 45*4882a593Smuzhiyun fxor %f4, %f20, %f20 46*4882a593Smuzhiyun fxor %f6, %f22, %f22 47*4882a593Smuzhiyun fxor %f8, %f24, %f24 48*4882a593Smuzhiyun fxor %f10, %f26, %f26 49*4882a593Smuzhiyun fxor %f12, %f28, %f28 50*4882a593Smuzhiyun fxor %f14, %f30, %f30 51*4882a593Smuzhiyun stda %f16, [%o1] %asi 52*4882a593Smuzhiyun ldda [%o2 + 64] %asi, %f48 53*4882a593Smuzhiyun ldda [%o1 + 128] %asi, %f0 54*4882a593Smuzhiyun fxor %f32, %f48, %f48 55*4882a593Smuzhiyun fxor %f34, %f50, %f50 56*4882a593Smuzhiyun add %o1, 128, %o1 57*4882a593Smuzhiyun fxor %f36, %f52, %f52 58*4882a593Smuzhiyun add %o2, 128, %o2 59*4882a593Smuzhiyun fxor %f38, %f54, %f54 60*4882a593Smuzhiyun subcc %o0, 128, %o0 61*4882a593Smuzhiyun fxor %f40, %f56, %f56 62*4882a593Smuzhiyun fxor %f42, %f58, %f58 63*4882a593Smuzhiyun fxor %f44, %f60, %f60 64*4882a593Smuzhiyun fxor %f46, %f62, %f62 65*4882a593Smuzhiyun stda %f48, [%o1 - 64] %asi 66*4882a593Smuzhiyun bne,pt %xcc, 2b 67*4882a593Smuzhiyun ldda [%o2] %asi, %f16 68*4882a593Smuzhiyun 69*4882a593Smuzhiyun ldda [%o1 + 64] %asi, %f32 70*4882a593Smuzhiyun fxor %f0, %f16, %f16 71*4882a593Smuzhiyun fxor %f2, %f18, %f18 72*4882a593Smuzhiyun fxor %f4, %f20, %f20 73*4882a593Smuzhiyun fxor %f6, %f22, %f22 74*4882a593Smuzhiyun fxor %f8, %f24, %f24 75*4882a593Smuzhiyun fxor %f10, %f26, %f26 76*4882a593Smuzhiyun fxor %f12, %f28, %f28 77*4882a593Smuzhiyun fxor %f14, %f30, %f30 78*4882a593Smuzhiyun stda %f16, [%o1] %asi 79*4882a593Smuzhiyun ldda [%o2 + 64] %asi, %f48 80*4882a593Smuzhiyun membar #Sync 81*4882a593Smuzhiyun fxor %f32, %f48, %f48 82*4882a593Smuzhiyun fxor %f34, %f50, %f50 83*4882a593Smuzhiyun fxor %f36, %f52, %f52 84*4882a593Smuzhiyun fxor %f38, %f54, %f54 85*4882a593Smuzhiyun fxor %f40, %f56, %f56 86*4882a593Smuzhiyun fxor %f42, %f58, %f58 87*4882a593Smuzhiyun fxor %f44, %f60, %f60 88*4882a593Smuzhiyun fxor %f46, %f62, %f62 89*4882a593Smuzhiyun stda %f48, [%o1 + 64] %asi 90*4882a593Smuzhiyun membar #Sync|#StoreStore|#StoreLoad 91*4882a593Smuzhiyun wr %g1, %g0, %asi 92*4882a593Smuzhiyun retl 93*4882a593Smuzhiyun wr %g0, 0, %fprs 94*4882a593SmuzhiyunENDPROC(xor_vis_2) 95*4882a593SmuzhiyunEXPORT_SYMBOL(xor_vis_2) 96*4882a593Smuzhiyun 97*4882a593SmuzhiyunENTRY(xor_vis_3) 98*4882a593Smuzhiyun rd %fprs, %o5 99*4882a593Smuzhiyun andcc %o5, FPRS_FEF|FPRS_DU, %g0 100*4882a593Smuzhiyun be,pt %icc, 0f 101*4882a593Smuzhiyun sethi %hi(VISenter), %g1 102*4882a593Smuzhiyun jmpl %g1 + %lo(VISenter), %g7 103*4882a593Smuzhiyun add %g7, 8, %g7 104*4882a593Smuzhiyun0: wr %g0, FPRS_FEF, %fprs 105*4882a593Smuzhiyun rd %asi, %g1 106*4882a593Smuzhiyun wr %g0, ASI_BLK_P, %asi 107*4882a593Smuzhiyun membar #LoadStore|#StoreLoad|#StoreStore 108*4882a593Smuzhiyun sub %o0, 64, %o0 109*4882a593Smuzhiyun ldda [%o1] %asi, %f0 110*4882a593Smuzhiyun ldda [%o2] %asi, %f16 111*4882a593Smuzhiyun 112*4882a593Smuzhiyun3: ldda [%o3] %asi, %f32 113*4882a593Smuzhiyun fxor %f0, %f16, %f48 114*4882a593Smuzhiyun fxor %f2, %f18, %f50 115*4882a593Smuzhiyun add %o1, 64, %o1 116*4882a593Smuzhiyun fxor %f4, %f20, %f52 117*4882a593Smuzhiyun fxor %f6, %f22, %f54 118*4882a593Smuzhiyun add %o2, 64, %o2 119*4882a593Smuzhiyun fxor %f8, %f24, %f56 120*4882a593Smuzhiyun fxor %f10, %f26, %f58 121*4882a593Smuzhiyun fxor %f12, %f28, %f60 122*4882a593Smuzhiyun fxor %f14, %f30, %f62 123*4882a593Smuzhiyun ldda [%o1] %asi, %f0 124*4882a593Smuzhiyun fxor %f48, %f32, %f48 125*4882a593Smuzhiyun fxor %f50, %f34, %f50 126*4882a593Smuzhiyun fxor %f52, %f36, %f52 127*4882a593Smuzhiyun fxor %f54, %f38, %f54 128*4882a593Smuzhiyun add %o3, 64, %o3 129*4882a593Smuzhiyun fxor %f56, %f40, %f56 130*4882a593Smuzhiyun fxor %f58, %f42, %f58 131*4882a593Smuzhiyun subcc %o0, 64, %o0 132*4882a593Smuzhiyun fxor %f60, %f44, %f60 133*4882a593Smuzhiyun fxor %f62, %f46, %f62 134*4882a593Smuzhiyun stda %f48, [%o1 - 64] %asi 135*4882a593Smuzhiyun bne,pt %xcc, 3b 136*4882a593Smuzhiyun ldda [%o2] %asi, %f16 137*4882a593Smuzhiyun 138*4882a593Smuzhiyun ldda [%o3] %asi, %f32 139*4882a593Smuzhiyun fxor %f0, %f16, %f48 140*4882a593Smuzhiyun fxor %f2, %f18, %f50 141*4882a593Smuzhiyun fxor %f4, %f20, %f52 142*4882a593Smuzhiyun fxor %f6, %f22, %f54 143*4882a593Smuzhiyun fxor %f8, %f24, %f56 144*4882a593Smuzhiyun fxor %f10, %f26, %f58 145*4882a593Smuzhiyun fxor %f12, %f28, %f60 146*4882a593Smuzhiyun fxor %f14, %f30, %f62 147*4882a593Smuzhiyun membar #Sync 148*4882a593Smuzhiyun fxor %f48, %f32, %f48 149*4882a593Smuzhiyun fxor %f50, %f34, %f50 150*4882a593Smuzhiyun fxor %f52, %f36, %f52 151*4882a593Smuzhiyun fxor %f54, %f38, %f54 152*4882a593Smuzhiyun fxor %f56, %f40, %f56 153*4882a593Smuzhiyun fxor %f58, %f42, %f58 154*4882a593Smuzhiyun fxor %f60, %f44, %f60 155*4882a593Smuzhiyun fxor %f62, %f46, %f62 156*4882a593Smuzhiyun stda %f48, [%o1] %asi 157*4882a593Smuzhiyun membar #Sync|#StoreStore|#StoreLoad 158*4882a593Smuzhiyun wr %g1, %g0, %asi 159*4882a593Smuzhiyun retl 160*4882a593Smuzhiyun wr %g0, 0, %fprs 161*4882a593SmuzhiyunENDPROC(xor_vis_3) 162*4882a593SmuzhiyunEXPORT_SYMBOL(xor_vis_3) 163*4882a593Smuzhiyun 164*4882a593SmuzhiyunENTRY(xor_vis_4) 165*4882a593Smuzhiyun rd %fprs, %o5 166*4882a593Smuzhiyun andcc %o5, FPRS_FEF|FPRS_DU, %g0 167*4882a593Smuzhiyun be,pt %icc, 0f 168*4882a593Smuzhiyun sethi %hi(VISenter), %g1 169*4882a593Smuzhiyun jmpl %g1 + %lo(VISenter), %g7 170*4882a593Smuzhiyun add %g7, 8, %g7 171*4882a593Smuzhiyun0: wr %g0, FPRS_FEF, %fprs 172*4882a593Smuzhiyun rd %asi, %g1 173*4882a593Smuzhiyun wr %g0, ASI_BLK_P, %asi 174*4882a593Smuzhiyun membar #LoadStore|#StoreLoad|#StoreStore 175*4882a593Smuzhiyun sub %o0, 64, %o0 176*4882a593Smuzhiyun ldda [%o1] %asi, %f0 177*4882a593Smuzhiyun ldda [%o2] %asi, %f16 178*4882a593Smuzhiyun 179*4882a593Smuzhiyun4: ldda [%o3] %asi, %f32 180*4882a593Smuzhiyun fxor %f0, %f16, %f16 181*4882a593Smuzhiyun fxor %f2, %f18, %f18 182*4882a593Smuzhiyun add %o1, 64, %o1 183*4882a593Smuzhiyun fxor %f4, %f20, %f20 184*4882a593Smuzhiyun fxor %f6, %f22, %f22 185*4882a593Smuzhiyun add %o2, 64, %o2 186*4882a593Smuzhiyun fxor %f8, %f24, %f24 187*4882a593Smuzhiyun fxor %f10, %f26, %f26 188*4882a593Smuzhiyun fxor %f12, %f28, %f28 189*4882a593Smuzhiyun fxor %f14, %f30, %f30 190*4882a593Smuzhiyun ldda [%o4] %asi, %f48 191*4882a593Smuzhiyun fxor %f16, %f32, %f32 192*4882a593Smuzhiyun fxor %f18, %f34, %f34 193*4882a593Smuzhiyun fxor %f20, %f36, %f36 194*4882a593Smuzhiyun fxor %f22, %f38, %f38 195*4882a593Smuzhiyun add %o3, 64, %o3 196*4882a593Smuzhiyun fxor %f24, %f40, %f40 197*4882a593Smuzhiyun fxor %f26, %f42, %f42 198*4882a593Smuzhiyun fxor %f28, %f44, %f44 199*4882a593Smuzhiyun fxor %f30, %f46, %f46 200*4882a593Smuzhiyun ldda [%o1] %asi, %f0 201*4882a593Smuzhiyun fxor %f32, %f48, %f48 202*4882a593Smuzhiyun fxor %f34, %f50, %f50 203*4882a593Smuzhiyun fxor %f36, %f52, %f52 204*4882a593Smuzhiyun add %o4, 64, %o4 205*4882a593Smuzhiyun fxor %f38, %f54, %f54 206*4882a593Smuzhiyun fxor %f40, %f56, %f56 207*4882a593Smuzhiyun fxor %f42, %f58, %f58 208*4882a593Smuzhiyun subcc %o0, 64, %o0 209*4882a593Smuzhiyun fxor %f44, %f60, %f60 210*4882a593Smuzhiyun fxor %f46, %f62, %f62 211*4882a593Smuzhiyun stda %f48, [%o1 - 64] %asi 212*4882a593Smuzhiyun bne,pt %xcc, 4b 213*4882a593Smuzhiyun ldda [%o2] %asi, %f16 214*4882a593Smuzhiyun 215*4882a593Smuzhiyun ldda [%o3] %asi, %f32 216*4882a593Smuzhiyun fxor %f0, %f16, %f16 217*4882a593Smuzhiyun fxor %f2, %f18, %f18 218*4882a593Smuzhiyun fxor %f4, %f20, %f20 219*4882a593Smuzhiyun fxor %f6, %f22, %f22 220*4882a593Smuzhiyun fxor %f8, %f24, %f24 221*4882a593Smuzhiyun fxor %f10, %f26, %f26 222*4882a593Smuzhiyun fxor %f12, %f28, %f28 223*4882a593Smuzhiyun fxor %f14, %f30, %f30 224*4882a593Smuzhiyun ldda [%o4] %asi, %f48 225*4882a593Smuzhiyun fxor %f16, %f32, %f32 226*4882a593Smuzhiyun fxor %f18, %f34, %f34 227*4882a593Smuzhiyun fxor %f20, %f36, %f36 228*4882a593Smuzhiyun fxor %f22, %f38, %f38 229*4882a593Smuzhiyun fxor %f24, %f40, %f40 230*4882a593Smuzhiyun fxor %f26, %f42, %f42 231*4882a593Smuzhiyun fxor %f28, %f44, %f44 232*4882a593Smuzhiyun fxor %f30, %f46, %f46 233*4882a593Smuzhiyun membar #Sync 234*4882a593Smuzhiyun fxor %f32, %f48, %f48 235*4882a593Smuzhiyun fxor %f34, %f50, %f50 236*4882a593Smuzhiyun fxor %f36, %f52, %f52 237*4882a593Smuzhiyun fxor %f38, %f54, %f54 238*4882a593Smuzhiyun fxor %f40, %f56, %f56 239*4882a593Smuzhiyun fxor %f42, %f58, %f58 240*4882a593Smuzhiyun fxor %f44, %f60, %f60 241*4882a593Smuzhiyun fxor %f46, %f62, %f62 242*4882a593Smuzhiyun stda %f48, [%o1] %asi 243*4882a593Smuzhiyun membar #Sync|#StoreStore|#StoreLoad 244*4882a593Smuzhiyun wr %g1, %g0, %asi 245*4882a593Smuzhiyun retl 246*4882a593Smuzhiyun wr %g0, 0, %fprs 247*4882a593SmuzhiyunENDPROC(xor_vis_4) 248*4882a593SmuzhiyunEXPORT_SYMBOL(xor_vis_4) 249*4882a593Smuzhiyun 250*4882a593SmuzhiyunENTRY(xor_vis_5) 251*4882a593Smuzhiyun save %sp, -192, %sp 252*4882a593Smuzhiyun rd %fprs, %o5 253*4882a593Smuzhiyun andcc %o5, FPRS_FEF|FPRS_DU, %g0 254*4882a593Smuzhiyun be,pt %icc, 0f 255*4882a593Smuzhiyun sethi %hi(VISenter), %g1 256*4882a593Smuzhiyun jmpl %g1 + %lo(VISenter), %g7 257*4882a593Smuzhiyun add %g7, 8, %g7 258*4882a593Smuzhiyun0: wr %g0, FPRS_FEF, %fprs 259*4882a593Smuzhiyun rd %asi, %g1 260*4882a593Smuzhiyun wr %g0, ASI_BLK_P, %asi 261*4882a593Smuzhiyun membar #LoadStore|#StoreLoad|#StoreStore 262*4882a593Smuzhiyun sub %i0, 64, %i0 263*4882a593Smuzhiyun ldda [%i1] %asi, %f0 264*4882a593Smuzhiyun ldda [%i2] %asi, %f16 265*4882a593Smuzhiyun 266*4882a593Smuzhiyun5: ldda [%i3] %asi, %f32 267*4882a593Smuzhiyun fxor %f0, %f16, %f48 268*4882a593Smuzhiyun fxor %f2, %f18, %f50 269*4882a593Smuzhiyun add %i1, 64, %i1 270*4882a593Smuzhiyun fxor %f4, %f20, %f52 271*4882a593Smuzhiyun fxor %f6, %f22, %f54 272*4882a593Smuzhiyun add %i2, 64, %i2 273*4882a593Smuzhiyun fxor %f8, %f24, %f56 274*4882a593Smuzhiyun fxor %f10, %f26, %f58 275*4882a593Smuzhiyun fxor %f12, %f28, %f60 276*4882a593Smuzhiyun fxor %f14, %f30, %f62 277*4882a593Smuzhiyun ldda [%i4] %asi, %f16 278*4882a593Smuzhiyun fxor %f48, %f32, %f48 279*4882a593Smuzhiyun fxor %f50, %f34, %f50 280*4882a593Smuzhiyun fxor %f52, %f36, %f52 281*4882a593Smuzhiyun fxor %f54, %f38, %f54 282*4882a593Smuzhiyun add %i3, 64, %i3 283*4882a593Smuzhiyun fxor %f56, %f40, %f56 284*4882a593Smuzhiyun fxor %f58, %f42, %f58 285*4882a593Smuzhiyun fxor %f60, %f44, %f60 286*4882a593Smuzhiyun fxor %f62, %f46, %f62 287*4882a593Smuzhiyun ldda [%i5] %asi, %f32 288*4882a593Smuzhiyun fxor %f48, %f16, %f48 289*4882a593Smuzhiyun fxor %f50, %f18, %f50 290*4882a593Smuzhiyun add %i4, 64, %i4 291*4882a593Smuzhiyun fxor %f52, %f20, %f52 292*4882a593Smuzhiyun fxor %f54, %f22, %f54 293*4882a593Smuzhiyun add %i5, 64, %i5 294*4882a593Smuzhiyun fxor %f56, %f24, %f56 295*4882a593Smuzhiyun fxor %f58, %f26, %f58 296*4882a593Smuzhiyun fxor %f60, %f28, %f60 297*4882a593Smuzhiyun fxor %f62, %f30, %f62 298*4882a593Smuzhiyun ldda [%i1] %asi, %f0 299*4882a593Smuzhiyun fxor %f48, %f32, %f48 300*4882a593Smuzhiyun fxor %f50, %f34, %f50 301*4882a593Smuzhiyun fxor %f52, %f36, %f52 302*4882a593Smuzhiyun fxor %f54, %f38, %f54 303*4882a593Smuzhiyun fxor %f56, %f40, %f56 304*4882a593Smuzhiyun fxor %f58, %f42, %f58 305*4882a593Smuzhiyun subcc %i0, 64, %i0 306*4882a593Smuzhiyun fxor %f60, %f44, %f60 307*4882a593Smuzhiyun fxor %f62, %f46, %f62 308*4882a593Smuzhiyun stda %f48, [%i1 - 64] %asi 309*4882a593Smuzhiyun bne,pt %xcc, 5b 310*4882a593Smuzhiyun ldda [%i2] %asi, %f16 311*4882a593Smuzhiyun 312*4882a593Smuzhiyun ldda [%i3] %asi, %f32 313*4882a593Smuzhiyun fxor %f0, %f16, %f48 314*4882a593Smuzhiyun fxor %f2, %f18, %f50 315*4882a593Smuzhiyun fxor %f4, %f20, %f52 316*4882a593Smuzhiyun fxor %f6, %f22, %f54 317*4882a593Smuzhiyun fxor %f8, %f24, %f56 318*4882a593Smuzhiyun fxor %f10, %f26, %f58 319*4882a593Smuzhiyun fxor %f12, %f28, %f60 320*4882a593Smuzhiyun fxor %f14, %f30, %f62 321*4882a593Smuzhiyun ldda [%i4] %asi, %f16 322*4882a593Smuzhiyun fxor %f48, %f32, %f48 323*4882a593Smuzhiyun fxor %f50, %f34, %f50 324*4882a593Smuzhiyun fxor %f52, %f36, %f52 325*4882a593Smuzhiyun fxor %f54, %f38, %f54 326*4882a593Smuzhiyun fxor %f56, %f40, %f56 327*4882a593Smuzhiyun fxor %f58, %f42, %f58 328*4882a593Smuzhiyun fxor %f60, %f44, %f60 329*4882a593Smuzhiyun fxor %f62, %f46, %f62 330*4882a593Smuzhiyun ldda [%i5] %asi, %f32 331*4882a593Smuzhiyun fxor %f48, %f16, %f48 332*4882a593Smuzhiyun fxor %f50, %f18, %f50 333*4882a593Smuzhiyun fxor %f52, %f20, %f52 334*4882a593Smuzhiyun fxor %f54, %f22, %f54 335*4882a593Smuzhiyun fxor %f56, %f24, %f56 336*4882a593Smuzhiyun fxor %f58, %f26, %f58 337*4882a593Smuzhiyun fxor %f60, %f28, %f60 338*4882a593Smuzhiyun fxor %f62, %f30, %f62 339*4882a593Smuzhiyun membar #Sync 340*4882a593Smuzhiyun fxor %f48, %f32, %f48 341*4882a593Smuzhiyun fxor %f50, %f34, %f50 342*4882a593Smuzhiyun fxor %f52, %f36, %f52 343*4882a593Smuzhiyun fxor %f54, %f38, %f54 344*4882a593Smuzhiyun fxor %f56, %f40, %f56 345*4882a593Smuzhiyun fxor %f58, %f42, %f58 346*4882a593Smuzhiyun fxor %f60, %f44, %f60 347*4882a593Smuzhiyun fxor %f62, %f46, %f62 348*4882a593Smuzhiyun stda %f48, [%i1] %asi 349*4882a593Smuzhiyun membar #Sync|#StoreStore|#StoreLoad 350*4882a593Smuzhiyun wr %g1, %g0, %asi 351*4882a593Smuzhiyun wr %g0, 0, %fprs 352*4882a593Smuzhiyun ret 353*4882a593Smuzhiyun restore 354*4882a593SmuzhiyunENDPROC(xor_vis_5) 355*4882a593SmuzhiyunEXPORT_SYMBOL(xor_vis_5) 356*4882a593Smuzhiyun 357*4882a593Smuzhiyun /* Niagara versions. */ 358*4882a593SmuzhiyunENTRY(xor_niagara_2) /* %o0=bytes, %o1=dest, %o2=src */ 359*4882a593Smuzhiyun save %sp, -192, %sp 360*4882a593Smuzhiyun prefetch [%i1], #n_writes 361*4882a593Smuzhiyun prefetch [%i2], #one_read 362*4882a593Smuzhiyun rd %asi, %g7 363*4882a593Smuzhiyun wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi 364*4882a593Smuzhiyun srlx %i0, 6, %g1 365*4882a593Smuzhiyun mov %i1, %i0 366*4882a593Smuzhiyun mov %i2, %i1 367*4882a593Smuzhiyun1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src + 0x00 */ 368*4882a593Smuzhiyun ldda [%i1 + 0x10] %asi, %i4 /* %i4/%i5 = src + 0x10 */ 369*4882a593Smuzhiyun ldda [%i1 + 0x20] %asi, %g2 /* %g2/%g3 = src + 0x20 */ 370*4882a593Smuzhiyun ldda [%i1 + 0x30] %asi, %l0 /* %l0/%l1 = src + 0x30 */ 371*4882a593Smuzhiyun prefetch [%i1 + 0x40], #one_read 372*4882a593Smuzhiyun ldda [%i0 + 0x00] %asi, %o0 /* %o0/%o1 = dest + 0x00 */ 373*4882a593Smuzhiyun ldda [%i0 + 0x10] %asi, %o2 /* %o2/%o3 = dest + 0x10 */ 374*4882a593Smuzhiyun ldda [%i0 + 0x20] %asi, %o4 /* %o4/%o5 = dest + 0x20 */ 375*4882a593Smuzhiyun ldda [%i0 + 0x30] %asi, %l2 /* %l2/%l3 = dest + 0x30 */ 376*4882a593Smuzhiyun prefetch [%i0 + 0x40], #n_writes 377*4882a593Smuzhiyun xor %o0, %i2, %o0 378*4882a593Smuzhiyun xor %o1, %i3, %o1 379*4882a593Smuzhiyun stxa %o0, [%i0 + 0x00] %asi 380*4882a593Smuzhiyun stxa %o1, [%i0 + 0x08] %asi 381*4882a593Smuzhiyun xor %o2, %i4, %o2 382*4882a593Smuzhiyun xor %o3, %i5, %o3 383*4882a593Smuzhiyun stxa %o2, [%i0 + 0x10] %asi 384*4882a593Smuzhiyun stxa %o3, [%i0 + 0x18] %asi 385*4882a593Smuzhiyun xor %o4, %g2, %o4 386*4882a593Smuzhiyun xor %o5, %g3, %o5 387*4882a593Smuzhiyun stxa %o4, [%i0 + 0x20] %asi 388*4882a593Smuzhiyun stxa %o5, [%i0 + 0x28] %asi 389*4882a593Smuzhiyun xor %l2, %l0, %l2 390*4882a593Smuzhiyun xor %l3, %l1, %l3 391*4882a593Smuzhiyun stxa %l2, [%i0 + 0x30] %asi 392*4882a593Smuzhiyun stxa %l3, [%i0 + 0x38] %asi 393*4882a593Smuzhiyun add %i0, 0x40, %i0 394*4882a593Smuzhiyun subcc %g1, 1, %g1 395*4882a593Smuzhiyun bne,pt %xcc, 1b 396*4882a593Smuzhiyun add %i1, 0x40, %i1 397*4882a593Smuzhiyun membar #Sync 398*4882a593Smuzhiyun wr %g7, 0x0, %asi 399*4882a593Smuzhiyun ret 400*4882a593Smuzhiyun restore 401*4882a593SmuzhiyunENDPROC(xor_niagara_2) 402*4882a593SmuzhiyunEXPORT_SYMBOL(xor_niagara_2) 403*4882a593Smuzhiyun 404*4882a593SmuzhiyunENTRY(xor_niagara_3) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */ 405*4882a593Smuzhiyun save %sp, -192, %sp 406*4882a593Smuzhiyun prefetch [%i1], #n_writes 407*4882a593Smuzhiyun prefetch [%i2], #one_read 408*4882a593Smuzhiyun prefetch [%i3], #one_read 409*4882a593Smuzhiyun rd %asi, %g7 410*4882a593Smuzhiyun wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi 411*4882a593Smuzhiyun srlx %i0, 6, %g1 412*4882a593Smuzhiyun mov %i1, %i0 413*4882a593Smuzhiyun mov %i2, %i1 414*4882a593Smuzhiyun mov %i3, %l7 415*4882a593Smuzhiyun1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src1 + 0x00 */ 416*4882a593Smuzhiyun ldda [%i1 + 0x10] %asi, %i4 /* %i4/%i5 = src1 + 0x10 */ 417*4882a593Smuzhiyun ldda [%l7 + 0x00] %asi, %g2 /* %g2/%g3 = src2 + 0x00 */ 418*4882a593Smuzhiyun ldda [%l7 + 0x10] %asi, %l0 /* %l0/%l1 = src2 + 0x10 */ 419*4882a593Smuzhiyun ldda [%i0 + 0x00] %asi, %o0 /* %o0/%o1 = dest + 0x00 */ 420*4882a593Smuzhiyun ldda [%i0 + 0x10] %asi, %o2 /* %o2/%o3 = dest + 0x10 */ 421*4882a593Smuzhiyun xor %g2, %i2, %g2 422*4882a593Smuzhiyun xor %g3, %i3, %g3 423*4882a593Smuzhiyun xor %o0, %g2, %o0 424*4882a593Smuzhiyun xor %o1, %g3, %o1 425*4882a593Smuzhiyun stxa %o0, [%i0 + 0x00] %asi 426*4882a593Smuzhiyun stxa %o1, [%i0 + 0x08] %asi 427*4882a593Smuzhiyun ldda [%i1 + 0x20] %asi, %i2 /* %i2/%i3 = src1 + 0x20 */ 428*4882a593Smuzhiyun ldda [%l7 + 0x20] %asi, %g2 /* %g2/%g3 = src2 + 0x20 */ 429*4882a593Smuzhiyun ldda [%i0 + 0x20] %asi, %o0 /* %o0/%o1 = dest + 0x20 */ 430*4882a593Smuzhiyun xor %l0, %i4, %l0 431*4882a593Smuzhiyun xor %l1, %i5, %l1 432*4882a593Smuzhiyun xor %o2, %l0, %o2 433*4882a593Smuzhiyun xor %o3, %l1, %o3 434*4882a593Smuzhiyun stxa %o2, [%i0 + 0x10] %asi 435*4882a593Smuzhiyun stxa %o3, [%i0 + 0x18] %asi 436*4882a593Smuzhiyun ldda [%i1 + 0x30] %asi, %i4 /* %i4/%i5 = src1 + 0x30 */ 437*4882a593Smuzhiyun ldda [%l7 + 0x30] %asi, %l0 /* %l0/%l1 = src2 + 0x30 */ 438*4882a593Smuzhiyun ldda [%i0 + 0x30] %asi, %o2 /* %o2/%o3 = dest + 0x30 */ 439*4882a593Smuzhiyun prefetch [%i1 + 0x40], #one_read 440*4882a593Smuzhiyun prefetch [%l7 + 0x40], #one_read 441*4882a593Smuzhiyun prefetch [%i0 + 0x40], #n_writes 442*4882a593Smuzhiyun xor %g2, %i2, %g2 443*4882a593Smuzhiyun xor %g3, %i3, %g3 444*4882a593Smuzhiyun xor %o0, %g2, %o0 445*4882a593Smuzhiyun xor %o1, %g3, %o1 446*4882a593Smuzhiyun stxa %o0, [%i0 + 0x20] %asi 447*4882a593Smuzhiyun stxa %o1, [%i0 + 0x28] %asi 448*4882a593Smuzhiyun xor %l0, %i4, %l0 449*4882a593Smuzhiyun xor %l1, %i5, %l1 450*4882a593Smuzhiyun xor %o2, %l0, %o2 451*4882a593Smuzhiyun xor %o3, %l1, %o3 452*4882a593Smuzhiyun stxa %o2, [%i0 + 0x30] %asi 453*4882a593Smuzhiyun stxa %o3, [%i0 + 0x38] %asi 454*4882a593Smuzhiyun add %i0, 0x40, %i0 455*4882a593Smuzhiyun add %i1, 0x40, %i1 456*4882a593Smuzhiyun subcc %g1, 1, %g1 457*4882a593Smuzhiyun bne,pt %xcc, 1b 458*4882a593Smuzhiyun add %l7, 0x40, %l7 459*4882a593Smuzhiyun membar #Sync 460*4882a593Smuzhiyun wr %g7, 0x0, %asi 461*4882a593Smuzhiyun ret 462*4882a593Smuzhiyun restore 463*4882a593SmuzhiyunENDPROC(xor_niagara_3) 464*4882a593SmuzhiyunEXPORT_SYMBOL(xor_niagara_3) 465*4882a593Smuzhiyun 466*4882a593SmuzhiyunENTRY(xor_niagara_4) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */ 467*4882a593Smuzhiyun save %sp, -192, %sp 468*4882a593Smuzhiyun prefetch [%i1], #n_writes 469*4882a593Smuzhiyun prefetch [%i2], #one_read 470*4882a593Smuzhiyun prefetch [%i3], #one_read 471*4882a593Smuzhiyun prefetch [%i4], #one_read 472*4882a593Smuzhiyun rd %asi, %g7 473*4882a593Smuzhiyun wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi 474*4882a593Smuzhiyun srlx %i0, 6, %g1 475*4882a593Smuzhiyun mov %i1, %i0 476*4882a593Smuzhiyun mov %i2, %i1 477*4882a593Smuzhiyun mov %i3, %l7 478*4882a593Smuzhiyun mov %i4, %l6 479*4882a593Smuzhiyun1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src1 + 0x00 */ 480*4882a593Smuzhiyun ldda [%l7 + 0x00] %asi, %i4 /* %i4/%i5 = src2 + 0x00 */ 481*4882a593Smuzhiyun ldda [%l6 + 0x00] %asi, %g2 /* %g2/%g3 = src3 + 0x00 */ 482*4882a593Smuzhiyun ldda [%i0 + 0x00] %asi, %l0 /* %l0/%l1 = dest + 0x00 */ 483*4882a593Smuzhiyun xor %i4, %i2, %i4 484*4882a593Smuzhiyun xor %i5, %i3, %i5 485*4882a593Smuzhiyun ldda [%i1 + 0x10] %asi, %i2 /* %i2/%i3 = src1 + 0x10 */ 486*4882a593Smuzhiyun xor %g2, %i4, %g2 487*4882a593Smuzhiyun xor %g3, %i5, %g3 488*4882a593Smuzhiyun ldda [%l7 + 0x10] %asi, %i4 /* %i4/%i5 = src2 + 0x10 */ 489*4882a593Smuzhiyun xor %l0, %g2, %l0 490*4882a593Smuzhiyun xor %l1, %g3, %l1 491*4882a593Smuzhiyun stxa %l0, [%i0 + 0x00] %asi 492*4882a593Smuzhiyun stxa %l1, [%i0 + 0x08] %asi 493*4882a593Smuzhiyun ldda [%l6 + 0x10] %asi, %g2 /* %g2/%g3 = src3 + 0x10 */ 494*4882a593Smuzhiyun ldda [%i0 + 0x10] %asi, %l0 /* %l0/%l1 = dest + 0x10 */ 495*4882a593Smuzhiyun 496*4882a593Smuzhiyun xor %i4, %i2, %i4 497*4882a593Smuzhiyun xor %i5, %i3, %i5 498*4882a593Smuzhiyun ldda [%i1 + 0x20] %asi, %i2 /* %i2/%i3 = src1 + 0x20 */ 499*4882a593Smuzhiyun xor %g2, %i4, %g2 500*4882a593Smuzhiyun xor %g3, %i5, %g3 501*4882a593Smuzhiyun ldda [%l7 + 0x20] %asi, %i4 /* %i4/%i5 = src2 + 0x20 */ 502*4882a593Smuzhiyun xor %l0, %g2, %l0 503*4882a593Smuzhiyun xor %l1, %g3, %l1 504*4882a593Smuzhiyun stxa %l0, [%i0 + 0x10] %asi 505*4882a593Smuzhiyun stxa %l1, [%i0 + 0x18] %asi 506*4882a593Smuzhiyun ldda [%l6 + 0x20] %asi, %g2 /* %g2/%g3 = src3 + 0x20 */ 507*4882a593Smuzhiyun ldda [%i0 + 0x20] %asi, %l0 /* %l0/%l1 = dest + 0x20 */ 508*4882a593Smuzhiyun 509*4882a593Smuzhiyun xor %i4, %i2, %i4 510*4882a593Smuzhiyun xor %i5, %i3, %i5 511*4882a593Smuzhiyun ldda [%i1 + 0x30] %asi, %i2 /* %i2/%i3 = src1 + 0x30 */ 512*4882a593Smuzhiyun xor %g2, %i4, %g2 513*4882a593Smuzhiyun xor %g3, %i5, %g3 514*4882a593Smuzhiyun ldda [%l7 + 0x30] %asi, %i4 /* %i4/%i5 = src2 + 0x30 */ 515*4882a593Smuzhiyun xor %l0, %g2, %l0 516*4882a593Smuzhiyun xor %l1, %g3, %l1 517*4882a593Smuzhiyun stxa %l0, [%i0 + 0x20] %asi 518*4882a593Smuzhiyun stxa %l1, [%i0 + 0x28] %asi 519*4882a593Smuzhiyun ldda [%l6 + 0x30] %asi, %g2 /* %g2/%g3 = src3 + 0x30 */ 520*4882a593Smuzhiyun ldda [%i0 + 0x30] %asi, %l0 /* %l0/%l1 = dest + 0x30 */ 521*4882a593Smuzhiyun 522*4882a593Smuzhiyun prefetch [%i1 + 0x40], #one_read 523*4882a593Smuzhiyun prefetch [%l7 + 0x40], #one_read 524*4882a593Smuzhiyun prefetch [%l6 + 0x40], #one_read 525*4882a593Smuzhiyun prefetch [%i0 + 0x40], #n_writes 526*4882a593Smuzhiyun 527*4882a593Smuzhiyun xor %i4, %i2, %i4 528*4882a593Smuzhiyun xor %i5, %i3, %i5 529*4882a593Smuzhiyun xor %g2, %i4, %g2 530*4882a593Smuzhiyun xor %g3, %i5, %g3 531*4882a593Smuzhiyun xor %l0, %g2, %l0 532*4882a593Smuzhiyun xor %l1, %g3, %l1 533*4882a593Smuzhiyun stxa %l0, [%i0 + 0x30] %asi 534*4882a593Smuzhiyun stxa %l1, [%i0 + 0x38] %asi 535*4882a593Smuzhiyun 536*4882a593Smuzhiyun add %i0, 0x40, %i0 537*4882a593Smuzhiyun add %i1, 0x40, %i1 538*4882a593Smuzhiyun add %l7, 0x40, %l7 539*4882a593Smuzhiyun subcc %g1, 1, %g1 540*4882a593Smuzhiyun bne,pt %xcc, 1b 541*4882a593Smuzhiyun add %l6, 0x40, %l6 542*4882a593Smuzhiyun membar #Sync 543*4882a593Smuzhiyun wr %g7, 0x0, %asi 544*4882a593Smuzhiyun ret 545*4882a593Smuzhiyun restore 546*4882a593SmuzhiyunENDPROC(xor_niagara_4) 547*4882a593SmuzhiyunEXPORT_SYMBOL(xor_niagara_4) 548*4882a593Smuzhiyun 549*4882a593SmuzhiyunENTRY(xor_niagara_5) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 */ 550*4882a593Smuzhiyun save %sp, -192, %sp 551*4882a593Smuzhiyun prefetch [%i1], #n_writes 552*4882a593Smuzhiyun prefetch [%i2], #one_read 553*4882a593Smuzhiyun prefetch [%i3], #one_read 554*4882a593Smuzhiyun prefetch [%i4], #one_read 555*4882a593Smuzhiyun prefetch [%i5], #one_read 556*4882a593Smuzhiyun rd %asi, %g7 557*4882a593Smuzhiyun wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi 558*4882a593Smuzhiyun srlx %i0, 6, %g1 559*4882a593Smuzhiyun mov %i1, %i0 560*4882a593Smuzhiyun mov %i2, %i1 561*4882a593Smuzhiyun mov %i3, %l7 562*4882a593Smuzhiyun mov %i4, %l6 563*4882a593Smuzhiyun mov %i5, %l5 564*4882a593Smuzhiyun1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src1 + 0x00 */ 565*4882a593Smuzhiyun ldda [%l7 + 0x00] %asi, %i4 /* %i4/%i5 = src2 + 0x00 */ 566*4882a593Smuzhiyun ldda [%l6 + 0x00] %asi, %g2 /* %g2/%g3 = src3 + 0x00 */ 567*4882a593Smuzhiyun ldda [%l5 + 0x00] %asi, %l0 /* %l0/%l1 = src4 + 0x00 */ 568*4882a593Smuzhiyun ldda [%i0 + 0x00] %asi, %l2 /* %l2/%l3 = dest + 0x00 */ 569*4882a593Smuzhiyun xor %i4, %i2, %i4 570*4882a593Smuzhiyun xor %i5, %i3, %i5 571*4882a593Smuzhiyun ldda [%i1 + 0x10] %asi, %i2 /* %i2/%i3 = src1 + 0x10 */ 572*4882a593Smuzhiyun xor %g2, %i4, %g2 573*4882a593Smuzhiyun xor %g3, %i5, %g3 574*4882a593Smuzhiyun ldda [%l7 + 0x10] %asi, %i4 /* %i4/%i5 = src2 + 0x10 */ 575*4882a593Smuzhiyun xor %l0, %g2, %l0 576*4882a593Smuzhiyun xor %l1, %g3, %l1 577*4882a593Smuzhiyun ldda [%l6 + 0x10] %asi, %g2 /* %g2/%g3 = src3 + 0x10 */ 578*4882a593Smuzhiyun xor %l2, %l0, %l2 579*4882a593Smuzhiyun xor %l3, %l1, %l3 580*4882a593Smuzhiyun stxa %l2, [%i0 + 0x00] %asi 581*4882a593Smuzhiyun stxa %l3, [%i0 + 0x08] %asi 582*4882a593Smuzhiyun ldda [%l5 + 0x10] %asi, %l0 /* %l0/%l1 = src4 + 0x10 */ 583*4882a593Smuzhiyun ldda [%i0 + 0x10] %asi, %l2 /* %l2/%l3 = dest + 0x10 */ 584*4882a593Smuzhiyun 585*4882a593Smuzhiyun xor %i4, %i2, %i4 586*4882a593Smuzhiyun xor %i5, %i3, %i5 587*4882a593Smuzhiyun ldda [%i1 + 0x20] %asi, %i2 /* %i2/%i3 = src1 + 0x20 */ 588*4882a593Smuzhiyun xor %g2, %i4, %g2 589*4882a593Smuzhiyun xor %g3, %i5, %g3 590*4882a593Smuzhiyun ldda [%l7 + 0x20] %asi, %i4 /* %i4/%i5 = src2 + 0x20 */ 591*4882a593Smuzhiyun xor %l0, %g2, %l0 592*4882a593Smuzhiyun xor %l1, %g3, %l1 593*4882a593Smuzhiyun ldda [%l6 + 0x20] %asi, %g2 /* %g2/%g3 = src3 + 0x20 */ 594*4882a593Smuzhiyun xor %l2, %l0, %l2 595*4882a593Smuzhiyun xor %l3, %l1, %l3 596*4882a593Smuzhiyun stxa %l2, [%i0 + 0x10] %asi 597*4882a593Smuzhiyun stxa %l3, [%i0 + 0x18] %asi 598*4882a593Smuzhiyun ldda [%l5 + 0x20] %asi, %l0 /* %l0/%l1 = src4 + 0x20 */ 599*4882a593Smuzhiyun ldda [%i0 + 0x20] %asi, %l2 /* %l2/%l3 = dest + 0x20 */ 600*4882a593Smuzhiyun 601*4882a593Smuzhiyun xor %i4, %i2, %i4 602*4882a593Smuzhiyun xor %i5, %i3, %i5 603*4882a593Smuzhiyun ldda [%i1 + 0x30] %asi, %i2 /* %i2/%i3 = src1 + 0x30 */ 604*4882a593Smuzhiyun xor %g2, %i4, %g2 605*4882a593Smuzhiyun xor %g3, %i5, %g3 606*4882a593Smuzhiyun ldda [%l7 + 0x30] %asi, %i4 /* %i4/%i5 = src2 + 0x30 */ 607*4882a593Smuzhiyun xor %l0, %g2, %l0 608*4882a593Smuzhiyun xor %l1, %g3, %l1 609*4882a593Smuzhiyun ldda [%l6 + 0x30] %asi, %g2 /* %g2/%g3 = src3 + 0x30 */ 610*4882a593Smuzhiyun xor %l2, %l0, %l2 611*4882a593Smuzhiyun xor %l3, %l1, %l3 612*4882a593Smuzhiyun stxa %l2, [%i0 + 0x20] %asi 613*4882a593Smuzhiyun stxa %l3, [%i0 + 0x28] %asi 614*4882a593Smuzhiyun ldda [%l5 + 0x30] %asi, %l0 /* %l0/%l1 = src4 + 0x30 */ 615*4882a593Smuzhiyun ldda [%i0 + 0x30] %asi, %l2 /* %l2/%l3 = dest + 0x30 */ 616*4882a593Smuzhiyun 617*4882a593Smuzhiyun prefetch [%i1 + 0x40], #one_read 618*4882a593Smuzhiyun prefetch [%l7 + 0x40], #one_read 619*4882a593Smuzhiyun prefetch [%l6 + 0x40], #one_read 620*4882a593Smuzhiyun prefetch [%l5 + 0x40], #one_read 621*4882a593Smuzhiyun prefetch [%i0 + 0x40], #n_writes 622*4882a593Smuzhiyun 623*4882a593Smuzhiyun xor %i4, %i2, %i4 624*4882a593Smuzhiyun xor %i5, %i3, %i5 625*4882a593Smuzhiyun xor %g2, %i4, %g2 626*4882a593Smuzhiyun xor %g3, %i5, %g3 627*4882a593Smuzhiyun xor %l0, %g2, %l0 628*4882a593Smuzhiyun xor %l1, %g3, %l1 629*4882a593Smuzhiyun xor %l2, %l0, %l2 630*4882a593Smuzhiyun xor %l3, %l1, %l3 631*4882a593Smuzhiyun stxa %l2, [%i0 + 0x30] %asi 632*4882a593Smuzhiyun stxa %l3, [%i0 + 0x38] %asi 633*4882a593Smuzhiyun 634*4882a593Smuzhiyun add %i0, 0x40, %i0 635*4882a593Smuzhiyun add %i1, 0x40, %i1 636*4882a593Smuzhiyun add %l7, 0x40, %l7 637*4882a593Smuzhiyun add %l6, 0x40, %l6 638*4882a593Smuzhiyun subcc %g1, 1, %g1 639*4882a593Smuzhiyun bne,pt %xcc, 1b 640*4882a593Smuzhiyun add %l5, 0x40, %l5 641*4882a593Smuzhiyun membar #Sync 642*4882a593Smuzhiyun wr %g7, 0x0, %asi 643*4882a593Smuzhiyun ret 644*4882a593Smuzhiyun restore 645*4882a593SmuzhiyunENDPROC(xor_niagara_5) 646*4882a593SmuzhiyunEXPORT_SYMBOL(xor_niagara_5) 647