xref: /OK3568_Linux_fs/kernel/lib/raid6/s390vx.uc (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun// SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * raid6_vx$#.c
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * $#-way unrolled RAID6 gen/xor functions for s390
6*4882a593Smuzhiyun * based on the vector facility
7*4882a593Smuzhiyun *
8*4882a593Smuzhiyun * Copyright IBM Corp. 2016
9*4882a593Smuzhiyun * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
10*4882a593Smuzhiyun *
11*4882a593Smuzhiyun * This file is postprocessed using unroll.awk.
12*4882a593Smuzhiyun */
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun#include <linux/raid/pq.h>
15*4882a593Smuzhiyun#include <asm/fpu/api.h>
16*4882a593Smuzhiyun
17*4882a593Smuzhiyunasm(".include \"asm/vx-insn.h\"\n");
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun#define NSIZE 16
20*4882a593Smuzhiyun
21*4882a593Smuzhiyunstatic inline void LOAD_CONST(void)
22*4882a593Smuzhiyun{
23*4882a593Smuzhiyun	asm volatile("VREPIB %v24,7");
24*4882a593Smuzhiyun	asm volatile("VREPIB %v25,0x1d");
25*4882a593Smuzhiyun}
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun/*
28*4882a593Smuzhiyun * The SHLBYTE() operation shifts each of the 16 bytes in
29*4882a593Smuzhiyun * vector register y left by 1 bit and stores the result in
30*4882a593Smuzhiyun * vector register x.
31*4882a593Smuzhiyun */
32*4882a593Smuzhiyunstatic inline void SHLBYTE(int x, int y)
33*4882a593Smuzhiyun{
34*4882a593Smuzhiyun	asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y));
35*4882a593Smuzhiyun}
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun/*
38*4882a593Smuzhiyun * For each of the 16 bytes in the vector register y the MASK()
39*4882a593Smuzhiyun * operation returns 0xFF if the high bit of the byte is 1,
40*4882a593Smuzhiyun * or 0x00 if the high bit is 0. The result is stored in vector
41*4882a593Smuzhiyun * register x.
42*4882a593Smuzhiyun */
43*4882a593Smuzhiyunstatic inline void MASK(int x, int y)
44*4882a593Smuzhiyun{
45*4882a593Smuzhiyun	asm volatile ("VESRAVB	%0,%1,24" : : "i" (x), "i" (y));
46*4882a593Smuzhiyun}
47*4882a593Smuzhiyun
48*4882a593Smuzhiyunstatic inline void AND(int x, int y, int z)
49*4882a593Smuzhiyun{
50*4882a593Smuzhiyun	asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
51*4882a593Smuzhiyun}
52*4882a593Smuzhiyun
53*4882a593Smuzhiyunstatic inline void XOR(int x, int y, int z)
54*4882a593Smuzhiyun{
55*4882a593Smuzhiyun	asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
56*4882a593Smuzhiyun}
57*4882a593Smuzhiyun
58*4882a593Smuzhiyunstatic inline void LOAD_DATA(int x, u8 *ptr)
59*4882a593Smuzhiyun{
60*4882a593Smuzhiyun	typedef struct { u8 _[16 * $#]; } addrtype;
61*4882a593Smuzhiyun	register addrtype *__ptr asm("1") = (addrtype *) ptr;
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun	asm volatile ("VLM %2,%3,0,%1"
64*4882a593Smuzhiyun		      : : "m" (*__ptr), "a" (__ptr), "i" (x),
65*4882a593Smuzhiyun			  "i" (x + $# - 1));
66*4882a593Smuzhiyun}
67*4882a593Smuzhiyun
68*4882a593Smuzhiyunstatic inline void STORE_DATA(int x, u8 *ptr)
69*4882a593Smuzhiyun{
70*4882a593Smuzhiyun	typedef struct { u8 _[16 * $#]; } addrtype;
71*4882a593Smuzhiyun	register addrtype *__ptr asm("1") = (addrtype *) ptr;
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun	asm volatile ("VSTM %2,%3,0,1"
74*4882a593Smuzhiyun		      : "=m" (*__ptr) : "a" (__ptr), "i" (x),
75*4882a593Smuzhiyun			"i" (x + $# - 1));
76*4882a593Smuzhiyun}
77*4882a593Smuzhiyun
78*4882a593Smuzhiyunstatic inline void COPY_VEC(int x, int y)
79*4882a593Smuzhiyun{
80*4882a593Smuzhiyun	asm volatile ("VLR %0,%1" : : "i" (x), "i" (y));
81*4882a593Smuzhiyun}
82*4882a593Smuzhiyun
83*4882a593Smuzhiyunstatic void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
84*4882a593Smuzhiyun{
85*4882a593Smuzhiyun	struct kernel_fpu vxstate;
86*4882a593Smuzhiyun	u8 **dptr, *p, *q;
87*4882a593Smuzhiyun	int d, z, z0;
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun	kernel_fpu_begin(&vxstate, KERNEL_VXR);
90*4882a593Smuzhiyun	LOAD_CONST();
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun	dptr = (u8 **) ptrs;
93*4882a593Smuzhiyun	z0 = disks - 3;		/* Highest data disk */
94*4882a593Smuzhiyun	p = dptr[z0 + 1];	/* XOR parity */
95*4882a593Smuzhiyun	q = dptr[z0 + 2];	/* RS syndrome */
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun	for (d = 0; d < bytes; d += $#*NSIZE) {
98*4882a593Smuzhiyun		LOAD_DATA(0,&dptr[z0][d]);
99*4882a593Smuzhiyun		COPY_VEC(8+$$,0+$$);
100*4882a593Smuzhiyun		for (z = z0 - 1; z >= 0; z--) {
101*4882a593Smuzhiyun			MASK(16+$$,8+$$);
102*4882a593Smuzhiyun			AND(16+$$,16+$$,25);
103*4882a593Smuzhiyun			SHLBYTE(8+$$,8+$$);
104*4882a593Smuzhiyun			XOR(8+$$,8+$$,16+$$);
105*4882a593Smuzhiyun			LOAD_DATA(16,&dptr[z][d]);
106*4882a593Smuzhiyun			XOR(0+$$,0+$$,16+$$);
107*4882a593Smuzhiyun			XOR(8+$$,8+$$,16+$$);
108*4882a593Smuzhiyun		}
109*4882a593Smuzhiyun		STORE_DATA(0,&p[d]);
110*4882a593Smuzhiyun		STORE_DATA(8,&q[d]);
111*4882a593Smuzhiyun	}
112*4882a593Smuzhiyun	kernel_fpu_end(&vxstate, KERNEL_VXR);
113*4882a593Smuzhiyun}
114*4882a593Smuzhiyun
115*4882a593Smuzhiyunstatic void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
116*4882a593Smuzhiyun					size_t bytes, void **ptrs)
117*4882a593Smuzhiyun{
118*4882a593Smuzhiyun	struct kernel_fpu vxstate;
119*4882a593Smuzhiyun	u8 **dptr, *p, *q;
120*4882a593Smuzhiyun	int d, z, z0;
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun	dptr = (u8 **) ptrs;
123*4882a593Smuzhiyun	z0 = stop;		/* P/Q right side optimization */
124*4882a593Smuzhiyun	p = dptr[disks - 2];	/* XOR parity */
125*4882a593Smuzhiyun	q = dptr[disks - 1];	/* RS syndrome */
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun	kernel_fpu_begin(&vxstate, KERNEL_VXR);
128*4882a593Smuzhiyun	LOAD_CONST();
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun	for (d = 0; d < bytes; d += $#*NSIZE) {
131*4882a593Smuzhiyun		/* P/Q data pages */
132*4882a593Smuzhiyun		LOAD_DATA(0,&dptr[z0][d]);
133*4882a593Smuzhiyun		COPY_VEC(8+$$,0+$$);
134*4882a593Smuzhiyun		for (z = z0 - 1; z >= start; z--) {
135*4882a593Smuzhiyun			MASK(16+$$,8+$$);
136*4882a593Smuzhiyun			AND(16+$$,16+$$,25);
137*4882a593Smuzhiyun			SHLBYTE(8+$$,8+$$);
138*4882a593Smuzhiyun			XOR(8+$$,8+$$,16+$$);
139*4882a593Smuzhiyun			LOAD_DATA(16,&dptr[z][d]);
140*4882a593Smuzhiyun			XOR(0+$$,0+$$,16+$$);
141*4882a593Smuzhiyun			XOR(8+$$,8+$$,16+$$);
142*4882a593Smuzhiyun		}
143*4882a593Smuzhiyun		/* P/Q left side optimization */
144*4882a593Smuzhiyun		for (z = start - 1; z >= 0; z--) {
145*4882a593Smuzhiyun			MASK(16+$$,8+$$);
146*4882a593Smuzhiyun			AND(16+$$,16+$$,25);
147*4882a593Smuzhiyun			SHLBYTE(8+$$,8+$$);
148*4882a593Smuzhiyun			XOR(8+$$,8+$$,16+$$);
149*4882a593Smuzhiyun		}
150*4882a593Smuzhiyun		LOAD_DATA(16,&p[d]);
151*4882a593Smuzhiyun		XOR(16+$$,16+$$,0+$$);
152*4882a593Smuzhiyun		STORE_DATA(16,&p[d]);
153*4882a593Smuzhiyun		LOAD_DATA(16,&q[d]);
154*4882a593Smuzhiyun		XOR(16+$$,16+$$,8+$$);
155*4882a593Smuzhiyun		STORE_DATA(16,&q[d]);
156*4882a593Smuzhiyun	}
157*4882a593Smuzhiyun	kernel_fpu_end(&vxstate, KERNEL_VXR);
158*4882a593Smuzhiyun}
159*4882a593Smuzhiyun
160*4882a593Smuzhiyunstatic int raid6_s390vx$#_valid(void)
161*4882a593Smuzhiyun{
162*4882a593Smuzhiyun	return MACHINE_HAS_VX;
163*4882a593Smuzhiyun}
164*4882a593Smuzhiyun
165*4882a593Smuzhiyunconst struct raid6_calls raid6_s390vx$# = {
166*4882a593Smuzhiyun	raid6_s390vx$#_gen_syndrome,
167*4882a593Smuzhiyun	raid6_s390vx$#_xor_syndrome,
168*4882a593Smuzhiyun	raid6_s390vx$#_valid,
169*4882a593Smuzhiyun	"vx128x$#",
170*4882a593Smuzhiyun	1
171*4882a593Smuzhiyun};
172