1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0-or-later */ 2*4882a593Smuzhiyun /* 3*4882a593Smuzhiyun * include/asm-alpha/xor.h 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Optimized RAID-5 checksumming functions for alpha EV5 and EV6 6*4882a593Smuzhiyun */ 7*4882a593Smuzhiyun 8*4882a593Smuzhiyun extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *); 9*4882a593Smuzhiyun extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *, 10*4882a593Smuzhiyun unsigned long *); 11*4882a593Smuzhiyun extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *, 12*4882a593Smuzhiyun unsigned long *, unsigned long *); 13*4882a593Smuzhiyun extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *, 14*4882a593Smuzhiyun unsigned long *, unsigned long *, unsigned long *); 15*4882a593Smuzhiyun 16*4882a593Smuzhiyun extern void xor_alpha_prefetch_2(unsigned long, unsigned long *, 17*4882a593Smuzhiyun unsigned long *); 18*4882a593Smuzhiyun extern void xor_alpha_prefetch_3(unsigned long, unsigned long *, 19*4882a593Smuzhiyun unsigned long *, unsigned long *); 20*4882a593Smuzhiyun extern void xor_alpha_prefetch_4(unsigned long, unsigned long *, 21*4882a593Smuzhiyun unsigned long *, unsigned long *, 22*4882a593Smuzhiyun unsigned long *); 23*4882a593Smuzhiyun extern void xor_alpha_prefetch_5(unsigned long, unsigned long *, 24*4882a593Smuzhiyun unsigned long *, unsigned long *, 25*4882a593Smuzhiyun unsigned long *, unsigned long *); 26*4882a593Smuzhiyun 27*4882a593Smuzhiyun asm(" \n\ 28*4882a593Smuzhiyun .text \n\ 29*4882a593Smuzhiyun .align 3 \n\ 30*4882a593Smuzhiyun .ent xor_alpha_2 \n\ 31*4882a593Smuzhiyun xor_alpha_2: \n\ 32*4882a593Smuzhiyun .prologue 0 \n\ 33*4882a593Smuzhiyun srl $16, 6, $16 \n\ 34*4882a593Smuzhiyun .align 4 \n\ 35*4882a593Smuzhiyun 2: \n\ 36*4882a593Smuzhiyun ldq $0,0($17) \n\ 37*4882a593Smuzhiyun ldq $1,0($18) \n\ 38*4882a593Smuzhiyun ldq $2,8($17) \n\ 39*4882a593Smuzhiyun ldq $3,8($18) \n\ 40*4882a593Smuzhiyun \n\ 41*4882a593Smuzhiyun ldq $4,16($17) \n\ 42*4882a593Smuzhiyun ldq $5,16($18) \n\ 43*4882a593Smuzhiyun ldq $6,24($17) \n\ 44*4882a593Smuzhiyun ldq $7,24($18) \n\ 45*4882a593Smuzhiyun \n\ 46*4882a593Smuzhiyun ldq $19,32($17) \n\ 47*4882a593Smuzhiyun ldq $20,32($18) \n\ 48*4882a593Smuzhiyun ldq $21,40($17) \n\ 49*4882a593Smuzhiyun ldq $22,40($18) \n\ 50*4882a593Smuzhiyun \n\ 51*4882a593Smuzhiyun ldq $23,48($17) \n\ 52*4882a593Smuzhiyun ldq $24,48($18) \n\ 53*4882a593Smuzhiyun ldq $25,56($17) \n\ 54*4882a593Smuzhiyun xor $0,$1,$0 # 7 cycles from $1 load \n\ 55*4882a593Smuzhiyun \n\ 56*4882a593Smuzhiyun ldq $27,56($18) \n\ 57*4882a593Smuzhiyun xor $2,$3,$2 \n\ 58*4882a593Smuzhiyun stq $0,0($17) \n\ 59*4882a593Smuzhiyun xor $4,$5,$4 \n\ 60*4882a593Smuzhiyun \n\ 61*4882a593Smuzhiyun stq $2,8($17) \n\ 62*4882a593Smuzhiyun xor $6,$7,$6 \n\ 63*4882a593Smuzhiyun stq $4,16($17) \n\ 64*4882a593Smuzhiyun xor $19,$20,$19 \n\ 65*4882a593Smuzhiyun \n\ 66*4882a593Smuzhiyun stq $6,24($17) \n\ 67*4882a593Smuzhiyun xor $21,$22,$21 \n\ 68*4882a593Smuzhiyun stq $19,32($17) \n\ 69*4882a593Smuzhiyun xor $23,$24,$23 \n\ 70*4882a593Smuzhiyun \n\ 71*4882a593Smuzhiyun stq $21,40($17) \n\ 72*4882a593Smuzhiyun xor $25,$27,$25 \n\ 73*4882a593Smuzhiyun stq $23,48($17) \n\ 74*4882a593Smuzhiyun subq $16,1,$16 \n\ 75*4882a593Smuzhiyun \n\ 76*4882a593Smuzhiyun stq $25,56($17) \n\ 77*4882a593Smuzhiyun addq $17,64,$17 \n\ 78*4882a593Smuzhiyun addq $18,64,$18 \n\ 79*4882a593Smuzhiyun bgt $16,2b \n\ 80*4882a593Smuzhiyun \n\ 81*4882a593Smuzhiyun ret \n\ 82*4882a593Smuzhiyun .end xor_alpha_2 \n\ 83*4882a593Smuzhiyun \n\ 84*4882a593Smuzhiyun .align 3 \n\ 85*4882a593Smuzhiyun .ent xor_alpha_3 \n\ 86*4882a593Smuzhiyun xor_alpha_3: \n\ 87*4882a593Smuzhiyun .prologue 0 \n\ 88*4882a593Smuzhiyun srl $16, 6, $16 \n\ 89*4882a593Smuzhiyun .align 4 \n\ 90*4882a593Smuzhiyun 3: \n\ 91*4882a593Smuzhiyun ldq $0,0($17) \n\ 92*4882a593Smuzhiyun ldq $1,0($18) \n\ 93*4882a593Smuzhiyun ldq $2,0($19) \n\ 94*4882a593Smuzhiyun ldq $3,8($17) \n\ 95*4882a593Smuzhiyun \n\ 96*4882a593Smuzhiyun ldq $4,8($18) \n\ 97*4882a593Smuzhiyun ldq $6,16($17) \n\ 98*4882a593Smuzhiyun ldq $7,16($18) \n\ 99*4882a593Smuzhiyun ldq $21,24($17) \n\ 100*4882a593Smuzhiyun \n\ 101*4882a593Smuzhiyun ldq $22,24($18) \n\ 102*4882a593Smuzhiyun ldq $24,32($17) \n\ 103*4882a593Smuzhiyun ldq $25,32($18) \n\ 104*4882a593Smuzhiyun ldq $5,8($19) \n\ 105*4882a593Smuzhiyun \n\ 106*4882a593Smuzhiyun ldq $20,16($19) \n\ 107*4882a593Smuzhiyun ldq $23,24($19) \n\ 108*4882a593Smuzhiyun ldq $27,32($19) \n\ 109*4882a593Smuzhiyun nop \n\ 110*4882a593Smuzhiyun \n\ 111*4882a593Smuzhiyun xor $0,$1,$1 # 8 cycles from $0 load \n\ 112*4882a593Smuzhiyun xor $3,$4,$4 # 6 cycles from $4 load \n\ 113*4882a593Smuzhiyun xor $6,$7,$7 # 6 cycles from $7 load \n\ 114*4882a593Smuzhiyun xor $21,$22,$22 # 5 cycles from $22 load \n\ 115*4882a593Smuzhiyun \n\ 116*4882a593Smuzhiyun xor $1,$2,$2 # 9 cycles from $2 load \n\ 117*4882a593Smuzhiyun xor $24,$25,$25 # 5 cycles from $25 load \n\ 118*4882a593Smuzhiyun stq $2,0($17) \n\ 119*4882a593Smuzhiyun xor $4,$5,$5 # 6 cycles from $5 load \n\ 120*4882a593Smuzhiyun \n\ 121*4882a593Smuzhiyun stq $5,8($17) \n\ 122*4882a593Smuzhiyun xor $7,$20,$20 # 7 cycles from $20 load \n\ 123*4882a593Smuzhiyun stq $20,16($17) \n\ 124*4882a593Smuzhiyun xor $22,$23,$23 # 7 cycles from $23 load \n\ 125*4882a593Smuzhiyun \n\ 126*4882a593Smuzhiyun stq $23,24($17) \n\ 127*4882a593Smuzhiyun xor $25,$27,$27 # 7 cycles from $27 load \n\ 128*4882a593Smuzhiyun stq $27,32($17) \n\ 129*4882a593Smuzhiyun nop \n\ 130*4882a593Smuzhiyun \n\ 131*4882a593Smuzhiyun ldq $0,40($17) \n\ 132*4882a593Smuzhiyun ldq $1,40($18) \n\ 133*4882a593Smuzhiyun ldq $3,48($17) \n\ 134*4882a593Smuzhiyun ldq $4,48($18) \n\ 135*4882a593Smuzhiyun \n\ 136*4882a593Smuzhiyun ldq $6,56($17) \n\ 137*4882a593Smuzhiyun ldq $7,56($18) \n\ 138*4882a593Smuzhiyun ldq $2,40($19) \n\ 139*4882a593Smuzhiyun ldq $5,48($19) \n\ 140*4882a593Smuzhiyun \n\ 141*4882a593Smuzhiyun ldq $20,56($19) \n\ 142*4882a593Smuzhiyun xor $0,$1,$1 # 4 cycles from $1 load \n\ 143*4882a593Smuzhiyun xor $3,$4,$4 # 5 cycles from $4 load \n\ 144*4882a593Smuzhiyun xor $6,$7,$7 # 5 cycles from $7 load \n\ 145*4882a593Smuzhiyun \n\ 146*4882a593Smuzhiyun xor $1,$2,$2 # 4 cycles from $2 load \n\ 147*4882a593Smuzhiyun xor $4,$5,$5 # 5 cycles from $5 load \n\ 148*4882a593Smuzhiyun stq $2,40($17) \n\ 149*4882a593Smuzhiyun xor $7,$20,$20 # 4 cycles from $20 load \n\ 150*4882a593Smuzhiyun \n\ 151*4882a593Smuzhiyun stq $5,48($17) \n\ 152*4882a593Smuzhiyun subq $16,1,$16 \n\ 153*4882a593Smuzhiyun stq $20,56($17) \n\ 154*4882a593Smuzhiyun addq $19,64,$19 \n\ 155*4882a593Smuzhiyun \n\ 156*4882a593Smuzhiyun addq $18,64,$18 \n\ 157*4882a593Smuzhiyun addq $17,64,$17 \n\ 158*4882a593Smuzhiyun bgt $16,3b \n\ 159*4882a593Smuzhiyun ret \n\ 160*4882a593Smuzhiyun .end xor_alpha_3 \n\ 161*4882a593Smuzhiyun \n\ 162*4882a593Smuzhiyun .align 3 \n\ 163*4882a593Smuzhiyun .ent xor_alpha_4 \n\ 164*4882a593Smuzhiyun xor_alpha_4: \n\ 165*4882a593Smuzhiyun .prologue 0 \n\ 166*4882a593Smuzhiyun srl $16, 6, $16 \n\ 167*4882a593Smuzhiyun .align 4 \n\ 168*4882a593Smuzhiyun 4: \n\ 169*4882a593Smuzhiyun ldq $0,0($17) \n\ 170*4882a593Smuzhiyun ldq $1,0($18) \n\ 171*4882a593Smuzhiyun ldq $2,0($19) \n\ 172*4882a593Smuzhiyun ldq $3,0($20) \n\ 173*4882a593Smuzhiyun \n\ 174*4882a593Smuzhiyun ldq $4,8($17) \n\ 175*4882a593Smuzhiyun ldq $5,8($18) \n\ 176*4882a593Smuzhiyun ldq $6,8($19) \n\ 177*4882a593Smuzhiyun ldq $7,8($20) \n\ 178*4882a593Smuzhiyun \n\ 179*4882a593Smuzhiyun ldq $21,16($17) \n\ 180*4882a593Smuzhiyun ldq $22,16($18) \n\ 181*4882a593Smuzhiyun ldq $23,16($19) \n\ 182*4882a593Smuzhiyun ldq $24,16($20) \n\ 183*4882a593Smuzhiyun \n\ 184*4882a593Smuzhiyun ldq $25,24($17) \n\ 185*4882a593Smuzhiyun xor $0,$1,$1 # 6 cycles from $1 load \n\ 186*4882a593Smuzhiyun ldq $27,24($18) \n\ 187*4882a593Smuzhiyun xor $2,$3,$3 # 6 cycles from $3 load \n\ 188*4882a593Smuzhiyun \n\ 189*4882a593Smuzhiyun ldq $0,24($19) \n\ 190*4882a593Smuzhiyun xor $1,$3,$3 \n\ 191*4882a593Smuzhiyun ldq $1,24($20) \n\ 192*4882a593Smuzhiyun xor $4,$5,$5 # 7 cycles from $5 load \n\ 193*4882a593Smuzhiyun \n\ 194*4882a593Smuzhiyun stq $3,0($17) \n\ 195*4882a593Smuzhiyun xor $6,$7,$7 \n\ 196*4882a593Smuzhiyun xor $21,$22,$22 # 7 cycles from $22 load \n\ 197*4882a593Smuzhiyun xor $5,$7,$7 \n\ 198*4882a593Smuzhiyun \n\ 199*4882a593Smuzhiyun stq $7,8($17) \n\ 200*4882a593Smuzhiyun xor $23,$24,$24 # 7 cycles from $24 load \n\ 201*4882a593Smuzhiyun ldq $2,32($17) \n\ 202*4882a593Smuzhiyun xor $22,$24,$24 \n\ 203*4882a593Smuzhiyun \n\ 204*4882a593Smuzhiyun ldq $3,32($18) \n\ 205*4882a593Smuzhiyun ldq $4,32($19) \n\ 206*4882a593Smuzhiyun ldq $5,32($20) \n\ 207*4882a593Smuzhiyun xor $25,$27,$27 # 8 cycles from $27 load \n\ 208*4882a593Smuzhiyun \n\ 209*4882a593Smuzhiyun ldq $6,40($17) \n\ 210*4882a593Smuzhiyun ldq $7,40($18) \n\ 211*4882a593Smuzhiyun ldq $21,40($19) \n\ 212*4882a593Smuzhiyun ldq $22,40($20) \n\ 213*4882a593Smuzhiyun \n\ 214*4882a593Smuzhiyun stq $24,16($17) \n\ 215*4882a593Smuzhiyun xor $0,$1,$1 # 9 cycles from $1 load \n\ 216*4882a593Smuzhiyun xor $2,$3,$3 # 5 cycles from $3 load \n\ 217*4882a593Smuzhiyun xor $27,$1,$1 \n\ 218*4882a593Smuzhiyun \n\ 219*4882a593Smuzhiyun stq $1,24($17) \n\ 220*4882a593Smuzhiyun xor $4,$5,$5 # 5 cycles from $5 load \n\ 221*4882a593Smuzhiyun ldq $23,48($17) \n\ 222*4882a593Smuzhiyun ldq $24,48($18) \n\ 223*4882a593Smuzhiyun \n\ 224*4882a593Smuzhiyun ldq $25,48($19) \n\ 225*4882a593Smuzhiyun xor $3,$5,$5 \n\ 226*4882a593Smuzhiyun ldq $27,48($20) \n\ 227*4882a593Smuzhiyun ldq $0,56($17) \n\ 228*4882a593Smuzhiyun \n\ 229*4882a593Smuzhiyun ldq $1,56($18) \n\ 230*4882a593Smuzhiyun ldq $2,56($19) \n\ 231*4882a593Smuzhiyun xor $6,$7,$7 # 8 cycles from $6 load \n\ 232*4882a593Smuzhiyun ldq $3,56($20) \n\ 233*4882a593Smuzhiyun \n\ 234*4882a593Smuzhiyun stq $5,32($17) \n\ 235*4882a593Smuzhiyun xor $21,$22,$22 # 8 cycles from $22 load \n\ 236*4882a593Smuzhiyun xor $7,$22,$22 \n\ 237*4882a593Smuzhiyun xor $23,$24,$24 # 5 cycles from $24 load \n\ 238*4882a593Smuzhiyun \n\ 239*4882a593Smuzhiyun stq $22,40($17) \n\ 240*4882a593Smuzhiyun xor $25,$27,$27 # 5 cycles from $27 load \n\ 241*4882a593Smuzhiyun xor $24,$27,$27 \n\ 242*4882a593Smuzhiyun xor $0,$1,$1 # 5 cycles from $1 load \n\ 243*4882a593Smuzhiyun \n\ 244*4882a593Smuzhiyun stq $27,48($17) \n\ 245*4882a593Smuzhiyun xor $2,$3,$3 # 4 cycles from $3 load \n\ 246*4882a593Smuzhiyun xor $1,$3,$3 \n\ 247*4882a593Smuzhiyun subq $16,1,$16 \n\ 248*4882a593Smuzhiyun \n\ 249*4882a593Smuzhiyun stq $3,56($17) \n\ 250*4882a593Smuzhiyun addq $20,64,$20 \n\ 251*4882a593Smuzhiyun addq $19,64,$19 \n\ 252*4882a593Smuzhiyun addq $18,64,$18 \n\ 253*4882a593Smuzhiyun \n\ 254*4882a593Smuzhiyun addq $17,64,$17 \n\ 255*4882a593Smuzhiyun bgt $16,4b \n\ 256*4882a593Smuzhiyun ret \n\ 257*4882a593Smuzhiyun .end xor_alpha_4 \n\ 258*4882a593Smuzhiyun \n\ 259*4882a593Smuzhiyun .align 3 \n\ 260*4882a593Smuzhiyun .ent xor_alpha_5 \n\ 261*4882a593Smuzhiyun xor_alpha_5: \n\ 262*4882a593Smuzhiyun .prologue 0 \n\ 263*4882a593Smuzhiyun srl $16, 6, $16 \n\ 264*4882a593Smuzhiyun .align 4 \n\ 265*4882a593Smuzhiyun 5: \n\ 266*4882a593Smuzhiyun ldq $0,0($17) \n\ 267*4882a593Smuzhiyun ldq $1,0($18) \n\ 268*4882a593Smuzhiyun ldq $2,0($19) \n\ 269*4882a593Smuzhiyun ldq $3,0($20) \n\ 270*4882a593Smuzhiyun \n\ 271*4882a593Smuzhiyun ldq $4,0($21) \n\ 272*4882a593Smuzhiyun ldq $5,8($17) \n\ 273*4882a593Smuzhiyun ldq $6,8($18) \n\ 274*4882a593Smuzhiyun ldq $7,8($19) \n\ 275*4882a593Smuzhiyun \n\ 276*4882a593Smuzhiyun ldq $22,8($20) \n\ 277*4882a593Smuzhiyun ldq $23,8($21) \n\ 278*4882a593Smuzhiyun ldq $24,16($17) \n\ 279*4882a593Smuzhiyun ldq $25,16($18) \n\ 280*4882a593Smuzhiyun \n\ 281*4882a593Smuzhiyun ldq $27,16($19) \n\ 282*4882a593Smuzhiyun xor $0,$1,$1 # 6 cycles from $1 load \n\ 283*4882a593Smuzhiyun ldq $28,16($20) \n\ 284*4882a593Smuzhiyun xor $2,$3,$3 # 6 cycles from $3 load \n\ 285*4882a593Smuzhiyun \n\ 286*4882a593Smuzhiyun ldq $0,16($21) \n\ 287*4882a593Smuzhiyun xor $1,$3,$3 \n\ 288*4882a593Smuzhiyun ldq $1,24($17) \n\ 289*4882a593Smuzhiyun xor $3,$4,$4 # 7 cycles from $4 load \n\ 290*4882a593Smuzhiyun \n\ 291*4882a593Smuzhiyun stq $4,0($17) \n\ 292*4882a593Smuzhiyun xor $5,$6,$6 # 7 cycles from $6 load \n\ 293*4882a593Smuzhiyun xor $7,$22,$22 # 7 cycles from $22 load \n\ 294*4882a593Smuzhiyun xor $6,$23,$23 # 7 cycles from $23 load \n\ 295*4882a593Smuzhiyun \n\ 296*4882a593Smuzhiyun ldq $2,24($18) \n\ 297*4882a593Smuzhiyun xor $22,$23,$23 \n\ 298*4882a593Smuzhiyun ldq $3,24($19) \n\ 299*4882a593Smuzhiyun xor $24,$25,$25 # 8 cycles from $25 load \n\ 300*4882a593Smuzhiyun \n\ 301*4882a593Smuzhiyun stq $23,8($17) \n\ 302*4882a593Smuzhiyun xor $25,$27,$27 # 8 cycles from $27 load \n\ 303*4882a593Smuzhiyun ldq $4,24($20) \n\ 304*4882a593Smuzhiyun xor $28,$0,$0 # 7 cycles from $0 load \n\ 305*4882a593Smuzhiyun \n\ 306*4882a593Smuzhiyun ldq $5,24($21) \n\ 307*4882a593Smuzhiyun xor $27,$0,$0 \n\ 308*4882a593Smuzhiyun ldq $6,32($17) \n\ 309*4882a593Smuzhiyun ldq $7,32($18) \n\ 310*4882a593Smuzhiyun \n\ 311*4882a593Smuzhiyun stq $0,16($17) \n\ 312*4882a593Smuzhiyun xor $1,$2,$2 # 6 cycles from $2 load \n\ 313*4882a593Smuzhiyun ldq $22,32($19) \n\ 314*4882a593Smuzhiyun xor $3,$4,$4 # 4 cycles from $4 load \n\ 315*4882a593Smuzhiyun \n\ 316*4882a593Smuzhiyun ldq $23,32($20) \n\ 317*4882a593Smuzhiyun xor $2,$4,$4 \n\ 318*4882a593Smuzhiyun ldq $24,32($21) \n\ 319*4882a593Smuzhiyun ldq $25,40($17) \n\ 320*4882a593Smuzhiyun \n\ 321*4882a593Smuzhiyun ldq $27,40($18) \n\ 322*4882a593Smuzhiyun ldq $28,40($19) \n\ 323*4882a593Smuzhiyun ldq $0,40($20) \n\ 324*4882a593Smuzhiyun xor $4,$5,$5 # 7 cycles from $5 load \n\ 325*4882a593Smuzhiyun \n\ 326*4882a593Smuzhiyun stq $5,24($17) \n\ 327*4882a593Smuzhiyun xor $6,$7,$7 # 7 cycles from $7 load \n\ 328*4882a593Smuzhiyun ldq $1,40($21) \n\ 329*4882a593Smuzhiyun ldq $2,48($17) \n\ 330*4882a593Smuzhiyun \n\ 331*4882a593Smuzhiyun ldq $3,48($18) \n\ 332*4882a593Smuzhiyun xor $7,$22,$22 # 7 cycles from $22 load \n\ 333*4882a593Smuzhiyun ldq $4,48($19) \n\ 334*4882a593Smuzhiyun xor $23,$24,$24 # 6 cycles from $24 load \n\ 335*4882a593Smuzhiyun \n\ 336*4882a593Smuzhiyun ldq $5,48($20) \n\ 337*4882a593Smuzhiyun xor $22,$24,$24 \n\ 338*4882a593Smuzhiyun ldq $6,48($21) \n\ 339*4882a593Smuzhiyun xor $25,$27,$27 # 7 cycles from $27 load \n\ 340*4882a593Smuzhiyun \n\ 341*4882a593Smuzhiyun stq $24,32($17) \n\ 342*4882a593Smuzhiyun xor $27,$28,$28 # 8 cycles from $28 load \n\ 343*4882a593Smuzhiyun ldq $7,56($17) \n\ 344*4882a593Smuzhiyun xor $0,$1,$1 # 6 cycles from $1 load \n\ 345*4882a593Smuzhiyun \n\ 346*4882a593Smuzhiyun ldq $22,56($18) \n\ 347*4882a593Smuzhiyun ldq $23,56($19) \n\ 348*4882a593Smuzhiyun ldq $24,56($20) \n\ 349*4882a593Smuzhiyun ldq $25,56($21) \n\ 350*4882a593Smuzhiyun \n\ 351*4882a593Smuzhiyun xor $28,$1,$1 \n\ 352*4882a593Smuzhiyun xor $2,$3,$3 # 9 cycles from $3 load \n\ 353*4882a593Smuzhiyun xor $3,$4,$4 # 9 cycles from $4 load \n\ 354*4882a593Smuzhiyun xor $5,$6,$6 # 8 cycles from $6 load \n\ 355*4882a593Smuzhiyun \n\ 356*4882a593Smuzhiyun stq $1,40($17) \n\ 357*4882a593Smuzhiyun xor $4,$6,$6 \n\ 358*4882a593Smuzhiyun xor $7,$22,$22 # 7 cycles from $22 load \n\ 359*4882a593Smuzhiyun xor $23,$24,$24 # 6 cycles from $24 load \n\ 360*4882a593Smuzhiyun \n\ 361*4882a593Smuzhiyun stq $6,48($17) \n\ 362*4882a593Smuzhiyun xor $22,$24,$24 \n\ 363*4882a593Smuzhiyun subq $16,1,$16 \n\ 364*4882a593Smuzhiyun xor $24,$25,$25 # 8 cycles from $25 load \n\ 365*4882a593Smuzhiyun \n\ 366*4882a593Smuzhiyun stq $25,56($17) \n\ 367*4882a593Smuzhiyun addq $21,64,$21 \n\ 368*4882a593Smuzhiyun addq $20,64,$20 \n\ 369*4882a593Smuzhiyun addq $19,64,$19 \n\ 370*4882a593Smuzhiyun \n\ 371*4882a593Smuzhiyun addq $18,64,$18 \n\ 372*4882a593Smuzhiyun addq $17,64,$17 \n\ 373*4882a593Smuzhiyun bgt $16,5b \n\ 374*4882a593Smuzhiyun ret \n\ 375*4882a593Smuzhiyun .end xor_alpha_5 \n\ 376*4882a593Smuzhiyun \n\ 377*4882a593Smuzhiyun .align 3 \n\ 378*4882a593Smuzhiyun .ent xor_alpha_prefetch_2 \n\ 379*4882a593Smuzhiyun xor_alpha_prefetch_2: \n\ 380*4882a593Smuzhiyun .prologue 0 \n\ 381*4882a593Smuzhiyun srl $16, 6, $16 \n\ 382*4882a593Smuzhiyun \n\ 383*4882a593Smuzhiyun ldq $31, 0($17) \n\ 384*4882a593Smuzhiyun ldq $31, 0($18) \n\ 385*4882a593Smuzhiyun \n\ 386*4882a593Smuzhiyun ldq $31, 64($17) \n\ 387*4882a593Smuzhiyun ldq $31, 64($18) \n\ 388*4882a593Smuzhiyun \n\ 389*4882a593Smuzhiyun ldq $31, 128($17) \n\ 390*4882a593Smuzhiyun ldq $31, 128($18) \n\ 391*4882a593Smuzhiyun \n\ 392*4882a593Smuzhiyun ldq $31, 192($17) \n\ 393*4882a593Smuzhiyun ldq $31, 192($18) \n\ 394*4882a593Smuzhiyun .align 4 \n\ 395*4882a593Smuzhiyun 2: \n\ 396*4882a593Smuzhiyun ldq $0,0($17) \n\ 397*4882a593Smuzhiyun ldq $1,0($18) \n\ 398*4882a593Smuzhiyun ldq $2,8($17) \n\ 399*4882a593Smuzhiyun ldq $3,8($18) \n\ 400*4882a593Smuzhiyun \n\ 401*4882a593Smuzhiyun ldq $4,16($17) \n\ 402*4882a593Smuzhiyun ldq $5,16($18) \n\ 403*4882a593Smuzhiyun ldq $6,24($17) \n\ 404*4882a593Smuzhiyun ldq $7,24($18) \n\ 405*4882a593Smuzhiyun \n\ 406*4882a593Smuzhiyun ldq $19,32($17) \n\ 407*4882a593Smuzhiyun ldq $20,32($18) \n\ 408*4882a593Smuzhiyun ldq $21,40($17) \n\ 409*4882a593Smuzhiyun ldq $22,40($18) \n\ 410*4882a593Smuzhiyun \n\ 411*4882a593Smuzhiyun ldq $23,48($17) \n\ 412*4882a593Smuzhiyun ldq $24,48($18) \n\ 413*4882a593Smuzhiyun ldq $25,56($17) \n\ 414*4882a593Smuzhiyun ldq $27,56($18) \n\ 415*4882a593Smuzhiyun \n\ 416*4882a593Smuzhiyun ldq $31,256($17) \n\ 417*4882a593Smuzhiyun xor $0,$1,$0 # 8 cycles from $1 load \n\ 418*4882a593Smuzhiyun ldq $31,256($18) \n\ 419*4882a593Smuzhiyun xor $2,$3,$2 \n\ 420*4882a593Smuzhiyun \n\ 421*4882a593Smuzhiyun stq $0,0($17) \n\ 422*4882a593Smuzhiyun xor $4,$5,$4 \n\ 423*4882a593Smuzhiyun stq $2,8($17) \n\ 424*4882a593Smuzhiyun xor $6,$7,$6 \n\ 425*4882a593Smuzhiyun \n\ 426*4882a593Smuzhiyun stq $4,16($17) \n\ 427*4882a593Smuzhiyun xor $19,$20,$19 \n\ 428*4882a593Smuzhiyun stq $6,24($17) \n\ 429*4882a593Smuzhiyun xor $21,$22,$21 \n\ 430*4882a593Smuzhiyun \n\ 431*4882a593Smuzhiyun stq $19,32($17) \n\ 432*4882a593Smuzhiyun xor $23,$24,$23 \n\ 433*4882a593Smuzhiyun stq $21,40($17) \n\ 434*4882a593Smuzhiyun xor $25,$27,$25 \n\ 435*4882a593Smuzhiyun \n\ 436*4882a593Smuzhiyun stq $23,48($17) \n\ 437*4882a593Smuzhiyun subq $16,1,$16 \n\ 438*4882a593Smuzhiyun stq $25,56($17) \n\ 439*4882a593Smuzhiyun addq $17,64,$17 \n\ 440*4882a593Smuzhiyun \n\ 441*4882a593Smuzhiyun addq $18,64,$18 \n\ 442*4882a593Smuzhiyun bgt $16,2b \n\ 443*4882a593Smuzhiyun ret \n\ 444*4882a593Smuzhiyun .end xor_alpha_prefetch_2 \n\ 445*4882a593Smuzhiyun \n\ 446*4882a593Smuzhiyun .align 3 \n\ 447*4882a593Smuzhiyun .ent xor_alpha_prefetch_3 \n\ 448*4882a593Smuzhiyun xor_alpha_prefetch_3: \n\ 449*4882a593Smuzhiyun .prologue 0 \n\ 450*4882a593Smuzhiyun srl $16, 6, $16 \n\ 451*4882a593Smuzhiyun \n\ 452*4882a593Smuzhiyun ldq $31, 0($17) \n\ 453*4882a593Smuzhiyun ldq $31, 0($18) \n\ 454*4882a593Smuzhiyun ldq $31, 0($19) \n\ 455*4882a593Smuzhiyun \n\ 456*4882a593Smuzhiyun ldq $31, 64($17) \n\ 457*4882a593Smuzhiyun ldq $31, 64($18) \n\ 458*4882a593Smuzhiyun ldq $31, 64($19) \n\ 459*4882a593Smuzhiyun \n\ 460*4882a593Smuzhiyun ldq $31, 128($17) \n\ 461*4882a593Smuzhiyun ldq $31, 128($18) \n\ 462*4882a593Smuzhiyun ldq $31, 128($19) \n\ 463*4882a593Smuzhiyun \n\ 464*4882a593Smuzhiyun ldq $31, 192($17) \n\ 465*4882a593Smuzhiyun ldq $31, 192($18) \n\ 466*4882a593Smuzhiyun ldq $31, 192($19) \n\ 467*4882a593Smuzhiyun .align 4 \n\ 468*4882a593Smuzhiyun 3: \n\ 469*4882a593Smuzhiyun ldq $0,0($17) \n\ 470*4882a593Smuzhiyun ldq $1,0($18) \n\ 471*4882a593Smuzhiyun ldq $2,0($19) \n\ 472*4882a593Smuzhiyun ldq $3,8($17) \n\ 473*4882a593Smuzhiyun \n\ 474*4882a593Smuzhiyun ldq $4,8($18) \n\ 475*4882a593Smuzhiyun ldq $6,16($17) \n\ 476*4882a593Smuzhiyun ldq $7,16($18) \n\ 477*4882a593Smuzhiyun ldq $21,24($17) \n\ 478*4882a593Smuzhiyun \n\ 479*4882a593Smuzhiyun ldq $22,24($18) \n\ 480*4882a593Smuzhiyun ldq $24,32($17) \n\ 481*4882a593Smuzhiyun ldq $25,32($18) \n\ 482*4882a593Smuzhiyun ldq $5,8($19) \n\ 483*4882a593Smuzhiyun \n\ 484*4882a593Smuzhiyun ldq $20,16($19) \n\ 485*4882a593Smuzhiyun ldq $23,24($19) \n\ 486*4882a593Smuzhiyun ldq $27,32($19) \n\ 487*4882a593Smuzhiyun nop \n\ 488*4882a593Smuzhiyun \n\ 489*4882a593Smuzhiyun xor $0,$1,$1 # 8 cycles from $0 load \n\ 490*4882a593Smuzhiyun xor $3,$4,$4 # 7 cycles from $4 load \n\ 491*4882a593Smuzhiyun xor $6,$7,$7 # 6 cycles from $7 load \n\ 492*4882a593Smuzhiyun xor $21,$22,$22 # 5 cycles from $22 load \n\ 493*4882a593Smuzhiyun \n\ 494*4882a593Smuzhiyun xor $1,$2,$2 # 9 cycles from $2 load \n\ 495*4882a593Smuzhiyun xor $24,$25,$25 # 5 cycles from $25 load \n\ 496*4882a593Smuzhiyun stq $2,0($17) \n\ 497*4882a593Smuzhiyun xor $4,$5,$5 # 6 cycles from $5 load \n\ 498*4882a593Smuzhiyun \n\ 499*4882a593Smuzhiyun stq $5,8($17) \n\ 500*4882a593Smuzhiyun xor $7,$20,$20 # 7 cycles from $20 load \n\ 501*4882a593Smuzhiyun stq $20,16($17) \n\ 502*4882a593Smuzhiyun xor $22,$23,$23 # 7 cycles from $23 load \n\ 503*4882a593Smuzhiyun \n\ 504*4882a593Smuzhiyun stq $23,24($17) \n\ 505*4882a593Smuzhiyun xor $25,$27,$27 # 7 cycles from $27 load \n\ 506*4882a593Smuzhiyun stq $27,32($17) \n\ 507*4882a593Smuzhiyun nop \n\ 508*4882a593Smuzhiyun \n\ 509*4882a593Smuzhiyun ldq $0,40($17) \n\ 510*4882a593Smuzhiyun ldq $1,40($18) \n\ 511*4882a593Smuzhiyun ldq $3,48($17) \n\ 512*4882a593Smuzhiyun ldq $4,48($18) \n\ 513*4882a593Smuzhiyun \n\ 514*4882a593Smuzhiyun ldq $6,56($17) \n\ 515*4882a593Smuzhiyun ldq $7,56($18) \n\ 516*4882a593Smuzhiyun ldq $2,40($19) \n\ 517*4882a593Smuzhiyun ldq $5,48($19) \n\ 518*4882a593Smuzhiyun \n\ 519*4882a593Smuzhiyun ldq $20,56($19) \n\ 520*4882a593Smuzhiyun ldq $31,256($17) \n\ 521*4882a593Smuzhiyun ldq $31,256($18) \n\ 522*4882a593Smuzhiyun ldq $31,256($19) \n\ 523*4882a593Smuzhiyun \n\ 524*4882a593Smuzhiyun xor $0,$1,$1 # 6 cycles from $1 load \n\ 525*4882a593Smuzhiyun xor $3,$4,$4 # 5 cycles from $4 load \n\ 526*4882a593Smuzhiyun xor $6,$7,$7 # 5 cycles from $7 load \n\ 527*4882a593Smuzhiyun xor $1,$2,$2 # 4 cycles from $2 load \n\ 528*4882a593Smuzhiyun \n\ 529*4882a593Smuzhiyun xor $4,$5,$5 # 5 cycles from $5 load \n\ 530*4882a593Smuzhiyun xor $7,$20,$20 # 4 cycles from $20 load \n\ 531*4882a593Smuzhiyun stq $2,40($17) \n\ 532*4882a593Smuzhiyun subq $16,1,$16 \n\ 533*4882a593Smuzhiyun \n\ 534*4882a593Smuzhiyun stq $5,48($17) \n\ 535*4882a593Smuzhiyun addq $19,64,$19 \n\ 536*4882a593Smuzhiyun stq $20,56($17) \n\ 537*4882a593Smuzhiyun addq $18,64,$18 \n\ 538*4882a593Smuzhiyun \n\ 539*4882a593Smuzhiyun addq $17,64,$17 \n\ 540*4882a593Smuzhiyun bgt $16,3b \n\ 541*4882a593Smuzhiyun ret \n\ 542*4882a593Smuzhiyun .end xor_alpha_prefetch_3 \n\ 543*4882a593Smuzhiyun \n\ 544*4882a593Smuzhiyun .align 3 \n\ 545*4882a593Smuzhiyun .ent xor_alpha_prefetch_4 \n\ 546*4882a593Smuzhiyun xor_alpha_prefetch_4: \n\ 547*4882a593Smuzhiyun .prologue 0 \n\ 548*4882a593Smuzhiyun srl $16, 6, $16 \n\ 549*4882a593Smuzhiyun \n\ 550*4882a593Smuzhiyun ldq $31, 0($17) \n\ 551*4882a593Smuzhiyun ldq $31, 0($18) \n\ 552*4882a593Smuzhiyun ldq $31, 0($19) \n\ 553*4882a593Smuzhiyun ldq $31, 0($20) \n\ 554*4882a593Smuzhiyun \n\ 555*4882a593Smuzhiyun ldq $31, 64($17) \n\ 556*4882a593Smuzhiyun ldq $31, 64($18) \n\ 557*4882a593Smuzhiyun ldq $31, 64($19) \n\ 558*4882a593Smuzhiyun ldq $31, 64($20) \n\ 559*4882a593Smuzhiyun \n\ 560*4882a593Smuzhiyun ldq $31, 128($17) \n\ 561*4882a593Smuzhiyun ldq $31, 128($18) \n\ 562*4882a593Smuzhiyun ldq $31, 128($19) \n\ 563*4882a593Smuzhiyun ldq $31, 128($20) \n\ 564*4882a593Smuzhiyun \n\ 565*4882a593Smuzhiyun ldq $31, 192($17) \n\ 566*4882a593Smuzhiyun ldq $31, 192($18) \n\ 567*4882a593Smuzhiyun ldq $31, 192($19) \n\ 568*4882a593Smuzhiyun ldq $31, 192($20) \n\ 569*4882a593Smuzhiyun .align 4 \n\ 570*4882a593Smuzhiyun 4: \n\ 571*4882a593Smuzhiyun ldq $0,0($17) \n\ 572*4882a593Smuzhiyun ldq $1,0($18) \n\ 573*4882a593Smuzhiyun ldq $2,0($19) \n\ 574*4882a593Smuzhiyun ldq $3,0($20) \n\ 575*4882a593Smuzhiyun \n\ 576*4882a593Smuzhiyun ldq $4,8($17) \n\ 577*4882a593Smuzhiyun ldq $5,8($18) \n\ 578*4882a593Smuzhiyun ldq $6,8($19) \n\ 579*4882a593Smuzhiyun ldq $7,8($20) \n\ 580*4882a593Smuzhiyun \n\ 581*4882a593Smuzhiyun ldq $21,16($17) \n\ 582*4882a593Smuzhiyun ldq $22,16($18) \n\ 583*4882a593Smuzhiyun ldq $23,16($19) \n\ 584*4882a593Smuzhiyun ldq $24,16($20) \n\ 585*4882a593Smuzhiyun \n\ 586*4882a593Smuzhiyun ldq $25,24($17) \n\ 587*4882a593Smuzhiyun xor $0,$1,$1 # 6 cycles from $1 load \n\ 588*4882a593Smuzhiyun ldq $27,24($18) \n\ 589*4882a593Smuzhiyun xor $2,$3,$3 # 6 cycles from $3 load \n\ 590*4882a593Smuzhiyun \n\ 591*4882a593Smuzhiyun ldq $0,24($19) \n\ 592*4882a593Smuzhiyun xor $1,$3,$3 \n\ 593*4882a593Smuzhiyun ldq $1,24($20) \n\ 594*4882a593Smuzhiyun xor $4,$5,$5 # 7 cycles from $5 load \n\ 595*4882a593Smuzhiyun \n\ 596*4882a593Smuzhiyun stq $3,0($17) \n\ 597*4882a593Smuzhiyun xor $6,$7,$7 \n\ 598*4882a593Smuzhiyun xor $21,$22,$22 # 7 cycles from $22 load \n\ 599*4882a593Smuzhiyun xor $5,$7,$7 \n\ 600*4882a593Smuzhiyun \n\ 601*4882a593Smuzhiyun stq $7,8($17) \n\ 602*4882a593Smuzhiyun xor $23,$24,$24 # 7 cycles from $24 load \n\ 603*4882a593Smuzhiyun ldq $2,32($17) \n\ 604*4882a593Smuzhiyun xor $22,$24,$24 \n\ 605*4882a593Smuzhiyun \n\ 606*4882a593Smuzhiyun ldq $3,32($18) \n\ 607*4882a593Smuzhiyun ldq $4,32($19) \n\ 608*4882a593Smuzhiyun ldq $5,32($20) \n\ 609*4882a593Smuzhiyun xor $25,$27,$27 # 8 cycles from $27 load \n\ 610*4882a593Smuzhiyun \n\ 611*4882a593Smuzhiyun ldq $6,40($17) \n\ 612*4882a593Smuzhiyun ldq $7,40($18) \n\ 613*4882a593Smuzhiyun ldq $21,40($19) \n\ 614*4882a593Smuzhiyun ldq $22,40($20) \n\ 615*4882a593Smuzhiyun \n\ 616*4882a593Smuzhiyun stq $24,16($17) \n\ 617*4882a593Smuzhiyun xor $0,$1,$1 # 9 cycles from $1 load \n\ 618*4882a593Smuzhiyun xor $2,$3,$3 # 5 cycles from $3 load \n\ 619*4882a593Smuzhiyun xor $27,$1,$1 \n\ 620*4882a593Smuzhiyun \n\ 621*4882a593Smuzhiyun stq $1,24($17) \n\ 622*4882a593Smuzhiyun xor $4,$5,$5 # 5 cycles from $5 load \n\ 623*4882a593Smuzhiyun ldq $23,48($17) \n\ 624*4882a593Smuzhiyun xor $3,$5,$5 \n\ 625*4882a593Smuzhiyun \n\ 626*4882a593Smuzhiyun ldq $24,48($18) \n\ 627*4882a593Smuzhiyun ldq $25,48($19) \n\ 628*4882a593Smuzhiyun ldq $27,48($20) \n\ 629*4882a593Smuzhiyun ldq $0,56($17) \n\ 630*4882a593Smuzhiyun \n\ 631*4882a593Smuzhiyun ldq $1,56($18) \n\ 632*4882a593Smuzhiyun ldq $2,56($19) \n\ 633*4882a593Smuzhiyun ldq $3,56($20) \n\ 634*4882a593Smuzhiyun xor $6,$7,$7 # 8 cycles from $6 load \n\ 635*4882a593Smuzhiyun \n\ 636*4882a593Smuzhiyun ldq $31,256($17) \n\ 637*4882a593Smuzhiyun xor $21,$22,$22 # 8 cycles from $22 load \n\ 638*4882a593Smuzhiyun ldq $31,256($18) \n\ 639*4882a593Smuzhiyun xor $7,$22,$22 \n\ 640*4882a593Smuzhiyun \n\ 641*4882a593Smuzhiyun ldq $31,256($19) \n\ 642*4882a593Smuzhiyun xor $23,$24,$24 # 6 cycles from $24 load \n\ 643*4882a593Smuzhiyun ldq $31,256($20) \n\ 644*4882a593Smuzhiyun xor $25,$27,$27 # 6 cycles from $27 load \n\ 645*4882a593Smuzhiyun \n\ 646*4882a593Smuzhiyun stq $5,32($17) \n\ 647*4882a593Smuzhiyun xor $24,$27,$27 \n\ 648*4882a593Smuzhiyun xor $0,$1,$1 # 7 cycles from $1 load \n\ 649*4882a593Smuzhiyun xor $2,$3,$3 # 6 cycles from $3 load \n\ 650*4882a593Smuzhiyun \n\ 651*4882a593Smuzhiyun stq $22,40($17) \n\ 652*4882a593Smuzhiyun xor $1,$3,$3 \n\ 653*4882a593Smuzhiyun stq $27,48($17) \n\ 654*4882a593Smuzhiyun subq $16,1,$16 \n\ 655*4882a593Smuzhiyun \n\ 656*4882a593Smuzhiyun stq $3,56($17) \n\ 657*4882a593Smuzhiyun addq $20,64,$20 \n\ 658*4882a593Smuzhiyun addq $19,64,$19 \n\ 659*4882a593Smuzhiyun addq $18,64,$18 \n\ 660*4882a593Smuzhiyun \n\ 661*4882a593Smuzhiyun addq $17,64,$17 \n\ 662*4882a593Smuzhiyun bgt $16,4b \n\ 663*4882a593Smuzhiyun ret \n\ 664*4882a593Smuzhiyun .end xor_alpha_prefetch_4 \n\ 665*4882a593Smuzhiyun \n\ 666*4882a593Smuzhiyun .align 3 \n\ 667*4882a593Smuzhiyun .ent xor_alpha_prefetch_5 \n\ 668*4882a593Smuzhiyun xor_alpha_prefetch_5: \n\ 669*4882a593Smuzhiyun .prologue 0 \n\ 670*4882a593Smuzhiyun srl $16, 6, $16 \n\ 671*4882a593Smuzhiyun \n\ 672*4882a593Smuzhiyun ldq $31, 0($17) \n\ 673*4882a593Smuzhiyun ldq $31, 0($18) \n\ 674*4882a593Smuzhiyun ldq $31, 0($19) \n\ 675*4882a593Smuzhiyun ldq $31, 0($20) \n\ 676*4882a593Smuzhiyun ldq $31, 0($21) \n\ 677*4882a593Smuzhiyun \n\ 678*4882a593Smuzhiyun ldq $31, 64($17) \n\ 679*4882a593Smuzhiyun ldq $31, 64($18) \n\ 680*4882a593Smuzhiyun ldq $31, 64($19) \n\ 681*4882a593Smuzhiyun ldq $31, 64($20) \n\ 682*4882a593Smuzhiyun ldq $31, 64($21) \n\ 683*4882a593Smuzhiyun \n\ 684*4882a593Smuzhiyun ldq $31, 128($17) \n\ 685*4882a593Smuzhiyun ldq $31, 128($18) \n\ 686*4882a593Smuzhiyun ldq $31, 128($19) \n\ 687*4882a593Smuzhiyun ldq $31, 128($20) \n\ 688*4882a593Smuzhiyun ldq $31, 128($21) \n\ 689*4882a593Smuzhiyun \n\ 690*4882a593Smuzhiyun ldq $31, 192($17) \n\ 691*4882a593Smuzhiyun ldq $31, 192($18) \n\ 692*4882a593Smuzhiyun ldq $31, 192($19) \n\ 693*4882a593Smuzhiyun ldq $31, 192($20) \n\ 694*4882a593Smuzhiyun ldq $31, 192($21) \n\ 695*4882a593Smuzhiyun .align 4 \n\ 696*4882a593Smuzhiyun 5: \n\ 697*4882a593Smuzhiyun ldq $0,0($17) \n\ 698*4882a593Smuzhiyun ldq $1,0($18) \n\ 699*4882a593Smuzhiyun ldq $2,0($19) \n\ 700*4882a593Smuzhiyun ldq $3,0($20) \n\ 701*4882a593Smuzhiyun \n\ 702*4882a593Smuzhiyun ldq $4,0($21) \n\ 703*4882a593Smuzhiyun ldq $5,8($17) \n\ 704*4882a593Smuzhiyun ldq $6,8($18) \n\ 705*4882a593Smuzhiyun ldq $7,8($19) \n\ 706*4882a593Smuzhiyun \n\ 707*4882a593Smuzhiyun ldq $22,8($20) \n\ 708*4882a593Smuzhiyun ldq $23,8($21) \n\ 709*4882a593Smuzhiyun ldq $24,16($17) \n\ 710*4882a593Smuzhiyun ldq $25,16($18) \n\ 711*4882a593Smuzhiyun \n\ 712*4882a593Smuzhiyun ldq $27,16($19) \n\ 713*4882a593Smuzhiyun xor $0,$1,$1 # 6 cycles from $1 load \n\ 714*4882a593Smuzhiyun ldq $28,16($20) \n\ 715*4882a593Smuzhiyun xor $2,$3,$3 # 6 cycles from $3 load \n\ 716*4882a593Smuzhiyun \n\ 717*4882a593Smuzhiyun ldq $0,16($21) \n\ 718*4882a593Smuzhiyun xor $1,$3,$3 \n\ 719*4882a593Smuzhiyun ldq $1,24($17) \n\ 720*4882a593Smuzhiyun xor $3,$4,$4 # 7 cycles from $4 load \n\ 721*4882a593Smuzhiyun \n\ 722*4882a593Smuzhiyun stq $4,0($17) \n\ 723*4882a593Smuzhiyun xor $5,$6,$6 # 7 cycles from $6 load \n\ 724*4882a593Smuzhiyun xor $7,$22,$22 # 7 cycles from $22 load \n\ 725*4882a593Smuzhiyun xor $6,$23,$23 # 7 cycles from $23 load \n\ 726*4882a593Smuzhiyun \n\ 727*4882a593Smuzhiyun ldq $2,24($18) \n\ 728*4882a593Smuzhiyun xor $22,$23,$23 \n\ 729*4882a593Smuzhiyun ldq $3,24($19) \n\ 730*4882a593Smuzhiyun xor $24,$25,$25 # 8 cycles from $25 load \n\ 731*4882a593Smuzhiyun \n\ 732*4882a593Smuzhiyun stq $23,8($17) \n\ 733*4882a593Smuzhiyun xor $25,$27,$27 # 8 cycles from $27 load \n\ 734*4882a593Smuzhiyun ldq $4,24($20) \n\ 735*4882a593Smuzhiyun xor $28,$0,$0 # 7 cycles from $0 load \n\ 736*4882a593Smuzhiyun \n\ 737*4882a593Smuzhiyun ldq $5,24($21) \n\ 738*4882a593Smuzhiyun xor $27,$0,$0 \n\ 739*4882a593Smuzhiyun ldq $6,32($17) \n\ 740*4882a593Smuzhiyun ldq $7,32($18) \n\ 741*4882a593Smuzhiyun \n\ 742*4882a593Smuzhiyun stq $0,16($17) \n\ 743*4882a593Smuzhiyun xor $1,$2,$2 # 6 cycles from $2 load \n\ 744*4882a593Smuzhiyun ldq $22,32($19) \n\ 745*4882a593Smuzhiyun xor $3,$4,$4 # 4 cycles from $4 load \n\ 746*4882a593Smuzhiyun \n\ 747*4882a593Smuzhiyun ldq $23,32($20) \n\ 748*4882a593Smuzhiyun xor $2,$4,$4 \n\ 749*4882a593Smuzhiyun ldq $24,32($21) \n\ 750*4882a593Smuzhiyun ldq $25,40($17) \n\ 751*4882a593Smuzhiyun \n\ 752*4882a593Smuzhiyun ldq $27,40($18) \n\ 753*4882a593Smuzhiyun ldq $28,40($19) \n\ 754*4882a593Smuzhiyun ldq $0,40($20) \n\ 755*4882a593Smuzhiyun xor $4,$5,$5 # 7 cycles from $5 load \n\ 756*4882a593Smuzhiyun \n\ 757*4882a593Smuzhiyun stq $5,24($17) \n\ 758*4882a593Smuzhiyun xor $6,$7,$7 # 7 cycles from $7 load \n\ 759*4882a593Smuzhiyun ldq $1,40($21) \n\ 760*4882a593Smuzhiyun ldq $2,48($17) \n\ 761*4882a593Smuzhiyun \n\ 762*4882a593Smuzhiyun ldq $3,48($18) \n\ 763*4882a593Smuzhiyun xor $7,$22,$22 # 7 cycles from $22 load \n\ 764*4882a593Smuzhiyun ldq $4,48($19) \n\ 765*4882a593Smuzhiyun xor $23,$24,$24 # 6 cycles from $24 load \n\ 766*4882a593Smuzhiyun \n\ 767*4882a593Smuzhiyun ldq $5,48($20) \n\ 768*4882a593Smuzhiyun xor $22,$24,$24 \n\ 769*4882a593Smuzhiyun ldq $6,48($21) \n\ 770*4882a593Smuzhiyun xor $25,$27,$27 # 7 cycles from $27 load \n\ 771*4882a593Smuzhiyun \n\ 772*4882a593Smuzhiyun stq $24,32($17) \n\ 773*4882a593Smuzhiyun xor $27,$28,$28 # 8 cycles from $28 load \n\ 774*4882a593Smuzhiyun ldq $7,56($17) \n\ 775*4882a593Smuzhiyun xor $0,$1,$1 # 6 cycles from $1 load \n\ 776*4882a593Smuzhiyun \n\ 777*4882a593Smuzhiyun ldq $22,56($18) \n\ 778*4882a593Smuzhiyun ldq $23,56($19) \n\ 779*4882a593Smuzhiyun ldq $24,56($20) \n\ 780*4882a593Smuzhiyun ldq $25,56($21) \n\ 781*4882a593Smuzhiyun \n\ 782*4882a593Smuzhiyun ldq $31,256($17) \n\ 783*4882a593Smuzhiyun xor $28,$1,$1 \n\ 784*4882a593Smuzhiyun ldq $31,256($18) \n\ 785*4882a593Smuzhiyun xor $2,$3,$3 # 9 cycles from $3 load \n\ 786*4882a593Smuzhiyun \n\ 787*4882a593Smuzhiyun ldq $31,256($19) \n\ 788*4882a593Smuzhiyun xor $3,$4,$4 # 9 cycles from $4 load \n\ 789*4882a593Smuzhiyun ldq $31,256($20) \n\ 790*4882a593Smuzhiyun xor $5,$6,$6 # 8 cycles from $6 load \n\ 791*4882a593Smuzhiyun \n\ 792*4882a593Smuzhiyun stq $1,40($17) \n\ 793*4882a593Smuzhiyun xor $4,$6,$6 \n\ 794*4882a593Smuzhiyun xor $7,$22,$22 # 7 cycles from $22 load \n\ 795*4882a593Smuzhiyun xor $23,$24,$24 # 6 cycles from $24 load \n\ 796*4882a593Smuzhiyun \n\ 797*4882a593Smuzhiyun stq $6,48($17) \n\ 798*4882a593Smuzhiyun xor $22,$24,$24 \n\ 799*4882a593Smuzhiyun ldq $31,256($21) \n\ 800*4882a593Smuzhiyun xor $24,$25,$25 # 8 cycles from $25 load \n\ 801*4882a593Smuzhiyun \n\ 802*4882a593Smuzhiyun stq $25,56($17) \n\ 803*4882a593Smuzhiyun subq $16,1,$16 \n\ 804*4882a593Smuzhiyun addq $21,64,$21 \n\ 805*4882a593Smuzhiyun addq $20,64,$20 \n\ 806*4882a593Smuzhiyun \n\ 807*4882a593Smuzhiyun addq $19,64,$19 \n\ 808*4882a593Smuzhiyun addq $18,64,$18 \n\ 809*4882a593Smuzhiyun addq $17,64,$17 \n\ 810*4882a593Smuzhiyun bgt $16,5b \n\ 811*4882a593Smuzhiyun \n\ 812*4882a593Smuzhiyun ret \n\ 813*4882a593Smuzhiyun .end xor_alpha_prefetch_5 \n\ 814*4882a593Smuzhiyun "); 815*4882a593Smuzhiyun 816*4882a593Smuzhiyun static struct xor_block_template xor_block_alpha = { 817*4882a593Smuzhiyun .name = "alpha", 818*4882a593Smuzhiyun .do_2 = xor_alpha_2, 819*4882a593Smuzhiyun .do_3 = xor_alpha_3, 820*4882a593Smuzhiyun .do_4 = xor_alpha_4, 821*4882a593Smuzhiyun .do_5 = xor_alpha_5, 822*4882a593Smuzhiyun }; 823*4882a593Smuzhiyun 824*4882a593Smuzhiyun static struct xor_block_template xor_block_alpha_prefetch = { 825*4882a593Smuzhiyun .name = "alpha prefetch", 826*4882a593Smuzhiyun .do_2 = xor_alpha_prefetch_2, 827*4882a593Smuzhiyun .do_3 = xor_alpha_prefetch_3, 828*4882a593Smuzhiyun .do_4 = xor_alpha_prefetch_4, 829*4882a593Smuzhiyun .do_5 = xor_alpha_prefetch_5, 830*4882a593Smuzhiyun }; 831*4882a593Smuzhiyun 832*4882a593Smuzhiyun /* For grins, also test the generic routines. */ 833*4882a593Smuzhiyun #include <asm-generic/xor.h> 834*4882a593Smuzhiyun 835*4882a593Smuzhiyun #undef XOR_TRY_TEMPLATES 836*4882a593Smuzhiyun #define XOR_TRY_TEMPLATES \ 837*4882a593Smuzhiyun do { \ 838*4882a593Smuzhiyun xor_speed(&xor_block_8regs); \ 839*4882a593Smuzhiyun xor_speed(&xor_block_32regs); \ 840*4882a593Smuzhiyun xor_speed(&xor_block_alpha); \ 841*4882a593Smuzhiyun xor_speed(&xor_block_alpha_prefetch); \ 842*4882a593Smuzhiyun } while (0) 843*4882a593Smuzhiyun 844*4882a593Smuzhiyun /* Force the use of alpha_prefetch if EV6, as it is significantly 845*4882a593Smuzhiyun faster in the cold cache case. */ 846*4882a593Smuzhiyun #define XOR_SELECT_TEMPLATE(FASTEST) \ 847*4882a593Smuzhiyun (implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST) 848