1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * arch/ia64/lib/xor.S 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Optimized RAID-5 checksumming functions for IA-64. 6*4882a593Smuzhiyun */ 7*4882a593Smuzhiyun 8*4882a593Smuzhiyun#include <asm/asmmacro.h> 9*4882a593Smuzhiyun#include <asm/export.h> 10*4882a593Smuzhiyun 11*4882a593SmuzhiyunGLOBAL_ENTRY(xor_ia64_2) 12*4882a593Smuzhiyun .prologue 13*4882a593Smuzhiyun .fframe 0 14*4882a593Smuzhiyun .save ar.pfs, r31 15*4882a593Smuzhiyun alloc r31 = ar.pfs, 3, 0, 13, 16 16*4882a593Smuzhiyun .save ar.lc, r30 17*4882a593Smuzhiyun mov r30 = ar.lc 18*4882a593Smuzhiyun .save pr, r29 19*4882a593Smuzhiyun mov r29 = pr 20*4882a593Smuzhiyun ;; 21*4882a593Smuzhiyun .body 22*4882a593Smuzhiyun mov r8 = in1 23*4882a593Smuzhiyun mov ar.ec = 6 + 2 24*4882a593Smuzhiyun shr in0 = in0, 3 25*4882a593Smuzhiyun ;; 26*4882a593Smuzhiyun adds in0 = -1, in0 27*4882a593Smuzhiyun mov r16 = in1 28*4882a593Smuzhiyun mov r17 = in2 29*4882a593Smuzhiyun ;; 30*4882a593Smuzhiyun mov ar.lc = in0 31*4882a593Smuzhiyun mov pr.rot = 1 << 16 32*4882a593Smuzhiyun ;; 33*4882a593Smuzhiyun .rotr s1[6+1], s2[6+1], d[2] 34*4882a593Smuzhiyun .rotp p[6+2] 35*4882a593Smuzhiyun0: 36*4882a593Smuzhiyun(p[0]) ld8.nta s1[0] = [r16], 8 37*4882a593Smuzhiyun(p[0]) ld8.nta s2[0] = [r17], 8 38*4882a593Smuzhiyun(p[6]) xor d[0] = s1[6], s2[6] 39*4882a593Smuzhiyun(p[6+1])st8.nta [r8] = d[1], 8 40*4882a593Smuzhiyun nop.f 0 41*4882a593Smuzhiyun br.ctop.dptk.few 0b 42*4882a593Smuzhiyun ;; 43*4882a593Smuzhiyun mov ar.lc = r30 44*4882a593Smuzhiyun mov pr = r29, -1 45*4882a593Smuzhiyun br.ret.sptk.few rp 46*4882a593SmuzhiyunEND(xor_ia64_2) 47*4882a593SmuzhiyunEXPORT_SYMBOL(xor_ia64_2) 48*4882a593Smuzhiyun 49*4882a593SmuzhiyunGLOBAL_ENTRY(xor_ia64_3) 50*4882a593Smuzhiyun .prologue 51*4882a593Smuzhiyun .fframe 0 52*4882a593Smuzhiyun .save ar.pfs, r31 53*4882a593Smuzhiyun alloc r31 = ar.pfs, 4, 0, 20, 24 54*4882a593Smuzhiyun .save ar.lc, r30 55*4882a593Smuzhiyun mov r30 = ar.lc 56*4882a593Smuzhiyun .save pr, r29 57*4882a593Smuzhiyun mov r29 = pr 58*4882a593Smuzhiyun ;; 59*4882a593Smuzhiyun .body 60*4882a593Smuzhiyun mov r8 = in1 61*4882a593Smuzhiyun mov ar.ec = 6 + 2 62*4882a593Smuzhiyun shr in0 = in0, 3 63*4882a593Smuzhiyun ;; 64*4882a593Smuzhiyun adds in0 = -1, in0 65*4882a593Smuzhiyun mov r16 = in1 66*4882a593Smuzhiyun mov r17 = in2 67*4882a593Smuzhiyun ;; 68*4882a593Smuzhiyun mov r18 = in3 69*4882a593Smuzhiyun mov ar.lc = in0 70*4882a593Smuzhiyun mov pr.rot = 1 << 16 71*4882a593Smuzhiyun ;; 72*4882a593Smuzhiyun .rotr s1[6+1], s2[6+1], s3[6+1], d[2] 73*4882a593Smuzhiyun .rotp p[6+2] 74*4882a593Smuzhiyun0: 75*4882a593Smuzhiyun(p[0]) ld8.nta s1[0] = [r16], 8 76*4882a593Smuzhiyun(p[0]) ld8.nta s2[0] = [r17], 8 77*4882a593Smuzhiyun(p[6]) xor d[0] = s1[6], s2[6] 78*4882a593Smuzhiyun ;; 79*4882a593Smuzhiyun(p[0]) ld8.nta s3[0] = [r18], 8 80*4882a593Smuzhiyun(p[6+1])st8.nta [r8] = d[1], 8 81*4882a593Smuzhiyun(p[6]) xor d[0] = d[0], s3[6] 82*4882a593Smuzhiyun br.ctop.dptk.few 0b 83*4882a593Smuzhiyun ;; 84*4882a593Smuzhiyun mov ar.lc = r30 85*4882a593Smuzhiyun mov pr = r29, -1 86*4882a593Smuzhiyun br.ret.sptk.few rp 87*4882a593SmuzhiyunEND(xor_ia64_3) 88*4882a593SmuzhiyunEXPORT_SYMBOL(xor_ia64_3) 89*4882a593Smuzhiyun 90*4882a593SmuzhiyunGLOBAL_ENTRY(xor_ia64_4) 91*4882a593Smuzhiyun .prologue 92*4882a593Smuzhiyun .fframe 0 93*4882a593Smuzhiyun .save ar.pfs, r31 94*4882a593Smuzhiyun alloc r31 = ar.pfs, 5, 0, 27, 32 95*4882a593Smuzhiyun .save ar.lc, r30 96*4882a593Smuzhiyun mov r30 = ar.lc 97*4882a593Smuzhiyun .save pr, r29 98*4882a593Smuzhiyun mov r29 = pr 99*4882a593Smuzhiyun ;; 100*4882a593Smuzhiyun .body 101*4882a593Smuzhiyun mov r8 = in1 102*4882a593Smuzhiyun mov ar.ec = 6 + 2 103*4882a593Smuzhiyun shr in0 = in0, 3 104*4882a593Smuzhiyun ;; 105*4882a593Smuzhiyun adds in0 = -1, in0 106*4882a593Smuzhiyun mov r16 = in1 107*4882a593Smuzhiyun mov r17 = in2 108*4882a593Smuzhiyun ;; 109*4882a593Smuzhiyun mov r18 = in3 110*4882a593Smuzhiyun mov ar.lc = in0 111*4882a593Smuzhiyun mov pr.rot = 1 << 16 112*4882a593Smuzhiyun mov r19 = in4 113*4882a593Smuzhiyun ;; 114*4882a593Smuzhiyun .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2] 115*4882a593Smuzhiyun .rotp p[6+2] 116*4882a593Smuzhiyun0: 117*4882a593Smuzhiyun(p[0]) ld8.nta s1[0] = [r16], 8 118*4882a593Smuzhiyun(p[0]) ld8.nta s2[0] = [r17], 8 119*4882a593Smuzhiyun(p[6]) xor d[0] = s1[6], s2[6] 120*4882a593Smuzhiyun(p[0]) ld8.nta s3[0] = [r18], 8 121*4882a593Smuzhiyun(p[0]) ld8.nta s4[0] = [r19], 8 122*4882a593Smuzhiyun(p[6]) xor r20 = s3[6], s4[6] 123*4882a593Smuzhiyun ;; 124*4882a593Smuzhiyun(p[6+1])st8.nta [r8] = d[1], 8 125*4882a593Smuzhiyun(p[6]) xor d[0] = d[0], r20 126*4882a593Smuzhiyun br.ctop.dptk.few 0b 127*4882a593Smuzhiyun ;; 128*4882a593Smuzhiyun mov ar.lc = r30 129*4882a593Smuzhiyun mov pr = r29, -1 130*4882a593Smuzhiyun br.ret.sptk.few rp 131*4882a593SmuzhiyunEND(xor_ia64_4) 132*4882a593SmuzhiyunEXPORT_SYMBOL(xor_ia64_4) 133*4882a593Smuzhiyun 134*4882a593SmuzhiyunGLOBAL_ENTRY(xor_ia64_5) 135*4882a593Smuzhiyun .prologue 136*4882a593Smuzhiyun .fframe 0 137*4882a593Smuzhiyun .save ar.pfs, r31 138*4882a593Smuzhiyun alloc r31 = ar.pfs, 6, 0, 34, 40 139*4882a593Smuzhiyun .save ar.lc, r30 140*4882a593Smuzhiyun mov r30 = ar.lc 141*4882a593Smuzhiyun .save pr, r29 142*4882a593Smuzhiyun mov r29 = pr 143*4882a593Smuzhiyun ;; 144*4882a593Smuzhiyun .body 145*4882a593Smuzhiyun mov r8 = in1 146*4882a593Smuzhiyun mov ar.ec = 6 + 2 147*4882a593Smuzhiyun shr in0 = in0, 3 148*4882a593Smuzhiyun ;; 149*4882a593Smuzhiyun adds in0 = -1, in0 150*4882a593Smuzhiyun mov r16 = in1 151*4882a593Smuzhiyun mov r17 = in2 152*4882a593Smuzhiyun ;; 153*4882a593Smuzhiyun mov r18 = in3 154*4882a593Smuzhiyun mov ar.lc = in0 155*4882a593Smuzhiyun mov pr.rot = 1 << 16 156*4882a593Smuzhiyun mov r19 = in4 157*4882a593Smuzhiyun mov r20 = in5 158*4882a593Smuzhiyun ;; 159*4882a593Smuzhiyun .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2] 160*4882a593Smuzhiyun .rotp p[6+2] 161*4882a593Smuzhiyun0: 162*4882a593Smuzhiyun(p[0]) ld8.nta s1[0] = [r16], 8 163*4882a593Smuzhiyun(p[0]) ld8.nta s2[0] = [r17], 8 164*4882a593Smuzhiyun(p[6]) xor d[0] = s1[6], s2[6] 165*4882a593Smuzhiyun(p[0]) ld8.nta s3[0] = [r18], 8 166*4882a593Smuzhiyun(p[0]) ld8.nta s4[0] = [r19], 8 167*4882a593Smuzhiyun(p[6]) xor r21 = s3[6], s4[6] 168*4882a593Smuzhiyun ;; 169*4882a593Smuzhiyun(p[0]) ld8.nta s5[0] = [r20], 8 170*4882a593Smuzhiyun(p[6+1])st8.nta [r8] = d[1], 8 171*4882a593Smuzhiyun(p[6]) xor d[0] = d[0], r21 172*4882a593Smuzhiyun ;; 173*4882a593Smuzhiyun(p[6]) xor d[0] = d[0], s5[6] 174*4882a593Smuzhiyun nop.f 0 175*4882a593Smuzhiyun br.ctop.dptk.few 0b 176*4882a593Smuzhiyun ;; 177*4882a593Smuzhiyun mov ar.lc = r30 178*4882a593Smuzhiyun mov pr = r29, -1 179*4882a593Smuzhiyun br.ret.sptk.few rp 180*4882a593SmuzhiyunEND(xor_ia64_5) 181*4882a593SmuzhiyunEXPORT_SYMBOL(xor_ia64_5) 182