xref: /OK3568_Linux_fs/kernel/arch/powerpc/lib/xor_vmx.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * Copyright (C) IBM Corporation, 2012
5*4882a593Smuzhiyun  *
6*4882a593Smuzhiyun  * Author: Anton Blanchard <anton@au.ibm.com>
7*4882a593Smuzhiyun  */
8*4882a593Smuzhiyun 
9*4882a593Smuzhiyun /*
10*4882a593Smuzhiyun  * Sparse (as at v0.5.0) gets very, very confused by this file.
11*4882a593Smuzhiyun  * Make it a bit simpler for it.
12*4882a593Smuzhiyun  */
13*4882a593Smuzhiyun #if !defined(__CHECKER__)
14*4882a593Smuzhiyun #include <altivec.h>
15*4882a593Smuzhiyun #else
16*4882a593Smuzhiyun #define vec_xor(a, b) a ^ b
17*4882a593Smuzhiyun #define vector __attribute__((vector_size(16)))
18*4882a593Smuzhiyun #endif
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun #include "xor_vmx.h"
21*4882a593Smuzhiyun 
22*4882a593Smuzhiyun typedef vector signed char unative_t;
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun #define DEFINE(V)				\
25*4882a593Smuzhiyun 	unative_t *V = (unative_t *)V##_in;	\
26*4882a593Smuzhiyun 	unative_t V##_0, V##_1, V##_2, V##_3
27*4882a593Smuzhiyun 
28*4882a593Smuzhiyun #define LOAD(V)			\
29*4882a593Smuzhiyun 	do {			\
30*4882a593Smuzhiyun 		V##_0 = V[0];	\
31*4882a593Smuzhiyun 		V##_1 = V[1];	\
32*4882a593Smuzhiyun 		V##_2 = V[2];	\
33*4882a593Smuzhiyun 		V##_3 = V[3];	\
34*4882a593Smuzhiyun 	} while (0)
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun #define STORE(V)		\
37*4882a593Smuzhiyun 	do {			\
38*4882a593Smuzhiyun 		V[0] = V##_0;	\
39*4882a593Smuzhiyun 		V[1] = V##_1;	\
40*4882a593Smuzhiyun 		V[2] = V##_2;	\
41*4882a593Smuzhiyun 		V[3] = V##_3;	\
42*4882a593Smuzhiyun 	} while (0)
43*4882a593Smuzhiyun 
44*4882a593Smuzhiyun #define XOR(V1, V2)					\
45*4882a593Smuzhiyun 	do {						\
46*4882a593Smuzhiyun 		V1##_0 = vec_xor(V1##_0, V2##_0);	\
47*4882a593Smuzhiyun 		V1##_1 = vec_xor(V1##_1, V2##_1);	\
48*4882a593Smuzhiyun 		V1##_2 = vec_xor(V1##_2, V2##_2);	\
49*4882a593Smuzhiyun 		V1##_3 = vec_xor(V1##_3, V2##_3);	\
50*4882a593Smuzhiyun 	} while (0)
51*4882a593Smuzhiyun 
__xor_altivec_2(unsigned long bytes,unsigned long * v1_in,unsigned long * v2_in)52*4882a593Smuzhiyun void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
53*4882a593Smuzhiyun 		     unsigned long *v2_in)
54*4882a593Smuzhiyun {
55*4882a593Smuzhiyun 	DEFINE(v1);
56*4882a593Smuzhiyun 	DEFINE(v2);
57*4882a593Smuzhiyun 	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
58*4882a593Smuzhiyun 
59*4882a593Smuzhiyun 	do {
60*4882a593Smuzhiyun 		LOAD(v1);
61*4882a593Smuzhiyun 		LOAD(v2);
62*4882a593Smuzhiyun 		XOR(v1, v2);
63*4882a593Smuzhiyun 		STORE(v1);
64*4882a593Smuzhiyun 
65*4882a593Smuzhiyun 		v1 += 4;
66*4882a593Smuzhiyun 		v2 += 4;
67*4882a593Smuzhiyun 	} while (--lines > 0);
68*4882a593Smuzhiyun }
69*4882a593Smuzhiyun 
__xor_altivec_3(unsigned long bytes,unsigned long * v1_in,unsigned long * v2_in,unsigned long * v3_in)70*4882a593Smuzhiyun void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
71*4882a593Smuzhiyun 		     unsigned long *v2_in, unsigned long *v3_in)
72*4882a593Smuzhiyun {
73*4882a593Smuzhiyun 	DEFINE(v1);
74*4882a593Smuzhiyun 	DEFINE(v2);
75*4882a593Smuzhiyun 	DEFINE(v3);
76*4882a593Smuzhiyun 	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun 	do {
79*4882a593Smuzhiyun 		LOAD(v1);
80*4882a593Smuzhiyun 		LOAD(v2);
81*4882a593Smuzhiyun 		LOAD(v3);
82*4882a593Smuzhiyun 		XOR(v1, v2);
83*4882a593Smuzhiyun 		XOR(v1, v3);
84*4882a593Smuzhiyun 		STORE(v1);
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun 		v1 += 4;
87*4882a593Smuzhiyun 		v2 += 4;
88*4882a593Smuzhiyun 		v3 += 4;
89*4882a593Smuzhiyun 	} while (--lines > 0);
90*4882a593Smuzhiyun }
91*4882a593Smuzhiyun 
__xor_altivec_4(unsigned long bytes,unsigned long * v1_in,unsigned long * v2_in,unsigned long * v3_in,unsigned long * v4_in)92*4882a593Smuzhiyun void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
93*4882a593Smuzhiyun 		     unsigned long *v2_in, unsigned long *v3_in,
94*4882a593Smuzhiyun 		     unsigned long *v4_in)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun 	DEFINE(v1);
97*4882a593Smuzhiyun 	DEFINE(v2);
98*4882a593Smuzhiyun 	DEFINE(v3);
99*4882a593Smuzhiyun 	DEFINE(v4);
100*4882a593Smuzhiyun 	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun 	do {
103*4882a593Smuzhiyun 		LOAD(v1);
104*4882a593Smuzhiyun 		LOAD(v2);
105*4882a593Smuzhiyun 		LOAD(v3);
106*4882a593Smuzhiyun 		LOAD(v4);
107*4882a593Smuzhiyun 		XOR(v1, v2);
108*4882a593Smuzhiyun 		XOR(v3, v4);
109*4882a593Smuzhiyun 		XOR(v1, v3);
110*4882a593Smuzhiyun 		STORE(v1);
111*4882a593Smuzhiyun 
112*4882a593Smuzhiyun 		v1 += 4;
113*4882a593Smuzhiyun 		v2 += 4;
114*4882a593Smuzhiyun 		v3 += 4;
115*4882a593Smuzhiyun 		v4 += 4;
116*4882a593Smuzhiyun 	} while (--lines > 0);
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun 
__xor_altivec_5(unsigned long bytes,unsigned long * v1_in,unsigned long * v2_in,unsigned long * v3_in,unsigned long * v4_in,unsigned long * v5_in)119*4882a593Smuzhiyun void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
120*4882a593Smuzhiyun 		     unsigned long *v2_in, unsigned long *v3_in,
121*4882a593Smuzhiyun 		     unsigned long *v4_in, unsigned long *v5_in)
122*4882a593Smuzhiyun {
123*4882a593Smuzhiyun 	DEFINE(v1);
124*4882a593Smuzhiyun 	DEFINE(v2);
125*4882a593Smuzhiyun 	DEFINE(v3);
126*4882a593Smuzhiyun 	DEFINE(v4);
127*4882a593Smuzhiyun 	DEFINE(v5);
128*4882a593Smuzhiyun 	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 	do {
131*4882a593Smuzhiyun 		LOAD(v1);
132*4882a593Smuzhiyun 		LOAD(v2);
133*4882a593Smuzhiyun 		LOAD(v3);
134*4882a593Smuzhiyun 		LOAD(v4);
135*4882a593Smuzhiyun 		LOAD(v5);
136*4882a593Smuzhiyun 		XOR(v1, v2);
137*4882a593Smuzhiyun 		XOR(v3, v4);
138*4882a593Smuzhiyun 		XOR(v1, v5);
139*4882a593Smuzhiyun 		XOR(v1, v3);
140*4882a593Smuzhiyun 		STORE(v1);
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun 		v1 += 4;
143*4882a593Smuzhiyun 		v2 += 4;
144*4882a593Smuzhiyun 		v3 += 4;
145*4882a593Smuzhiyun 		v4 += 4;
146*4882a593Smuzhiyun 		v5 += 4;
147*4882a593Smuzhiyun 	} while (--lines > 0);
148*4882a593Smuzhiyun }
149