1*4882a593Smuzhiyun// SPDX-License-Identifier: GPL-2.0 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * raid6_vx$#.c 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * $#-way unrolled RAID6 gen/xor functions for s390 6*4882a593Smuzhiyun * based on the vector facility 7*4882a593Smuzhiyun * 8*4882a593Smuzhiyun * Copyright IBM Corp. 2016 9*4882a593Smuzhiyun * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 10*4882a593Smuzhiyun * 11*4882a593Smuzhiyun * This file is postprocessed using unroll.awk. 12*4882a593Smuzhiyun */ 13*4882a593Smuzhiyun 14*4882a593Smuzhiyun#include <linux/raid/pq.h> 15*4882a593Smuzhiyun#include <asm/fpu/api.h> 16*4882a593Smuzhiyun 17*4882a593Smuzhiyunasm(".include \"asm/vx-insn.h\"\n"); 18*4882a593Smuzhiyun 19*4882a593Smuzhiyun#define NSIZE 16 20*4882a593Smuzhiyun 21*4882a593Smuzhiyunstatic inline void LOAD_CONST(void) 22*4882a593Smuzhiyun{ 23*4882a593Smuzhiyun asm volatile("VREPIB %v24,7"); 24*4882a593Smuzhiyun asm volatile("VREPIB %v25,0x1d"); 25*4882a593Smuzhiyun} 26*4882a593Smuzhiyun 27*4882a593Smuzhiyun/* 28*4882a593Smuzhiyun * The SHLBYTE() operation shifts each of the 16 bytes in 29*4882a593Smuzhiyun * vector register y left by 1 bit and stores the result in 30*4882a593Smuzhiyun * vector register x. 31*4882a593Smuzhiyun */ 32*4882a593Smuzhiyunstatic inline void SHLBYTE(int x, int y) 33*4882a593Smuzhiyun{ 34*4882a593Smuzhiyun asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y)); 35*4882a593Smuzhiyun} 36*4882a593Smuzhiyun 37*4882a593Smuzhiyun/* 38*4882a593Smuzhiyun * For each of the 16 bytes in the vector register y the MASK() 39*4882a593Smuzhiyun * operation returns 0xFF if the high bit of the byte is 1, 40*4882a593Smuzhiyun * or 0x00 if the high bit is 0. The result is stored in vector 41*4882a593Smuzhiyun * register x. 42*4882a593Smuzhiyun */ 43*4882a593Smuzhiyunstatic inline void MASK(int x, int y) 44*4882a593Smuzhiyun{ 45*4882a593Smuzhiyun asm volatile ("VESRAVB %0,%1,24" : : "i" (x), "i" (y)); 46*4882a593Smuzhiyun} 47*4882a593Smuzhiyun 48*4882a593Smuzhiyunstatic inline void AND(int x, int y, int z) 49*4882a593Smuzhiyun{ 50*4882a593Smuzhiyun asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); 51*4882a593Smuzhiyun} 52*4882a593Smuzhiyun 53*4882a593Smuzhiyunstatic inline void XOR(int x, int y, int z) 54*4882a593Smuzhiyun{ 55*4882a593Smuzhiyun asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); 56*4882a593Smuzhiyun} 57*4882a593Smuzhiyun 58*4882a593Smuzhiyunstatic inline void LOAD_DATA(int x, u8 *ptr) 59*4882a593Smuzhiyun{ 60*4882a593Smuzhiyun typedef struct { u8 _[16 * $#]; } addrtype; 61*4882a593Smuzhiyun register addrtype *__ptr asm("1") = (addrtype *) ptr; 62*4882a593Smuzhiyun 63*4882a593Smuzhiyun asm volatile ("VLM %2,%3,0,%1" 64*4882a593Smuzhiyun : : "m" (*__ptr), "a" (__ptr), "i" (x), 65*4882a593Smuzhiyun "i" (x + $# - 1)); 66*4882a593Smuzhiyun} 67*4882a593Smuzhiyun 68*4882a593Smuzhiyunstatic inline void STORE_DATA(int x, u8 *ptr) 69*4882a593Smuzhiyun{ 70*4882a593Smuzhiyun typedef struct { u8 _[16 * $#]; } addrtype; 71*4882a593Smuzhiyun register addrtype *__ptr asm("1") = (addrtype *) ptr; 72*4882a593Smuzhiyun 73*4882a593Smuzhiyun asm volatile ("VSTM %2,%3,0,1" 74*4882a593Smuzhiyun : "=m" (*__ptr) : "a" (__ptr), "i" (x), 75*4882a593Smuzhiyun "i" (x + $# - 1)); 76*4882a593Smuzhiyun} 77*4882a593Smuzhiyun 78*4882a593Smuzhiyunstatic inline void COPY_VEC(int x, int y) 79*4882a593Smuzhiyun{ 80*4882a593Smuzhiyun asm volatile ("VLR %0,%1" : : "i" (x), "i" (y)); 81*4882a593Smuzhiyun} 82*4882a593Smuzhiyun 83*4882a593Smuzhiyunstatic void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) 84*4882a593Smuzhiyun{ 85*4882a593Smuzhiyun struct kernel_fpu vxstate; 86*4882a593Smuzhiyun u8 **dptr, *p, *q; 87*4882a593Smuzhiyun int d, z, z0; 88*4882a593Smuzhiyun 89*4882a593Smuzhiyun kernel_fpu_begin(&vxstate, KERNEL_VXR); 90*4882a593Smuzhiyun LOAD_CONST(); 91*4882a593Smuzhiyun 92*4882a593Smuzhiyun dptr = (u8 **) ptrs; 93*4882a593Smuzhiyun z0 = disks - 3; /* Highest data disk */ 94*4882a593Smuzhiyun p = dptr[z0 + 1]; /* XOR parity */ 95*4882a593Smuzhiyun q = dptr[z0 + 2]; /* RS syndrome */ 96*4882a593Smuzhiyun 97*4882a593Smuzhiyun for (d = 0; d < bytes; d += $#*NSIZE) { 98*4882a593Smuzhiyun LOAD_DATA(0,&dptr[z0][d]); 99*4882a593Smuzhiyun COPY_VEC(8+$$,0+$$); 100*4882a593Smuzhiyun for (z = z0 - 1; z >= 0; z--) { 101*4882a593Smuzhiyun MASK(16+$$,8+$$); 102*4882a593Smuzhiyun AND(16+$$,16+$$,25); 103*4882a593Smuzhiyun SHLBYTE(8+$$,8+$$); 104*4882a593Smuzhiyun XOR(8+$$,8+$$,16+$$); 105*4882a593Smuzhiyun LOAD_DATA(16,&dptr[z][d]); 106*4882a593Smuzhiyun XOR(0+$$,0+$$,16+$$); 107*4882a593Smuzhiyun XOR(8+$$,8+$$,16+$$); 108*4882a593Smuzhiyun } 109*4882a593Smuzhiyun STORE_DATA(0,&p[d]); 110*4882a593Smuzhiyun STORE_DATA(8,&q[d]); 111*4882a593Smuzhiyun } 112*4882a593Smuzhiyun kernel_fpu_end(&vxstate, KERNEL_VXR); 113*4882a593Smuzhiyun} 114*4882a593Smuzhiyun 115*4882a593Smuzhiyunstatic void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, 116*4882a593Smuzhiyun size_t bytes, void **ptrs) 117*4882a593Smuzhiyun{ 118*4882a593Smuzhiyun struct kernel_fpu vxstate; 119*4882a593Smuzhiyun u8 **dptr, *p, *q; 120*4882a593Smuzhiyun int d, z, z0; 121*4882a593Smuzhiyun 122*4882a593Smuzhiyun dptr = (u8 **) ptrs; 123*4882a593Smuzhiyun z0 = stop; /* P/Q right side optimization */ 124*4882a593Smuzhiyun p = dptr[disks - 2]; /* XOR parity */ 125*4882a593Smuzhiyun q = dptr[disks - 1]; /* RS syndrome */ 126*4882a593Smuzhiyun 127*4882a593Smuzhiyun kernel_fpu_begin(&vxstate, KERNEL_VXR); 128*4882a593Smuzhiyun LOAD_CONST(); 129*4882a593Smuzhiyun 130*4882a593Smuzhiyun for (d = 0; d < bytes; d += $#*NSIZE) { 131*4882a593Smuzhiyun /* P/Q data pages */ 132*4882a593Smuzhiyun LOAD_DATA(0,&dptr[z0][d]); 133*4882a593Smuzhiyun COPY_VEC(8+$$,0+$$); 134*4882a593Smuzhiyun for (z = z0 - 1; z >= start; z--) { 135*4882a593Smuzhiyun MASK(16+$$,8+$$); 136*4882a593Smuzhiyun AND(16+$$,16+$$,25); 137*4882a593Smuzhiyun SHLBYTE(8+$$,8+$$); 138*4882a593Smuzhiyun XOR(8+$$,8+$$,16+$$); 139*4882a593Smuzhiyun LOAD_DATA(16,&dptr[z][d]); 140*4882a593Smuzhiyun XOR(0+$$,0+$$,16+$$); 141*4882a593Smuzhiyun XOR(8+$$,8+$$,16+$$); 142*4882a593Smuzhiyun } 143*4882a593Smuzhiyun /* P/Q left side optimization */ 144*4882a593Smuzhiyun for (z = start - 1; z >= 0; z--) { 145*4882a593Smuzhiyun MASK(16+$$,8+$$); 146*4882a593Smuzhiyun AND(16+$$,16+$$,25); 147*4882a593Smuzhiyun SHLBYTE(8+$$,8+$$); 148*4882a593Smuzhiyun XOR(8+$$,8+$$,16+$$); 149*4882a593Smuzhiyun } 150*4882a593Smuzhiyun LOAD_DATA(16,&p[d]); 151*4882a593Smuzhiyun XOR(16+$$,16+$$,0+$$); 152*4882a593Smuzhiyun STORE_DATA(16,&p[d]); 153*4882a593Smuzhiyun LOAD_DATA(16,&q[d]); 154*4882a593Smuzhiyun XOR(16+$$,16+$$,8+$$); 155*4882a593Smuzhiyun STORE_DATA(16,&q[d]); 156*4882a593Smuzhiyun } 157*4882a593Smuzhiyun kernel_fpu_end(&vxstate, KERNEL_VXR); 158*4882a593Smuzhiyun} 159*4882a593Smuzhiyun 160*4882a593Smuzhiyunstatic int raid6_s390vx$#_valid(void) 161*4882a593Smuzhiyun{ 162*4882a593Smuzhiyun return MACHINE_HAS_VX; 163*4882a593Smuzhiyun} 164*4882a593Smuzhiyun 165*4882a593Smuzhiyunconst struct raid6_calls raid6_s390vx$# = { 166*4882a593Smuzhiyun raid6_s390vx$#_gen_syndrome, 167*4882a593Smuzhiyun raid6_s390vx$#_xor_syndrome, 168*4882a593Smuzhiyun raid6_s390vx$#_valid, 169*4882a593Smuzhiyun "vx128x$#", 170*4882a593Smuzhiyun 1 171*4882a593Smuzhiyun}; 172