1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Copyright (C) IBM Corporation, 2012
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * Author: Anton Blanchard <anton@au.ibm.com>
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun /*
10*4882a593Smuzhiyun * Sparse (as at v0.5.0) gets very, very confused by this file.
11*4882a593Smuzhiyun * Make it a bit simpler for it.
12*4882a593Smuzhiyun */
13*4882a593Smuzhiyun #if !defined(__CHECKER__)
14*4882a593Smuzhiyun #include <altivec.h>
15*4882a593Smuzhiyun #else
16*4882a593Smuzhiyun #define vec_xor(a, b) a ^ b
17*4882a593Smuzhiyun #define vector __attribute__((vector_size(16)))
18*4882a593Smuzhiyun #endif
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun #include "xor_vmx.h"
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun typedef vector signed char unative_t;
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun #define DEFINE(V) \
25*4882a593Smuzhiyun unative_t *V = (unative_t *)V##_in; \
26*4882a593Smuzhiyun unative_t V##_0, V##_1, V##_2, V##_3
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun #define LOAD(V) \
29*4882a593Smuzhiyun do { \
30*4882a593Smuzhiyun V##_0 = V[0]; \
31*4882a593Smuzhiyun V##_1 = V[1]; \
32*4882a593Smuzhiyun V##_2 = V[2]; \
33*4882a593Smuzhiyun V##_3 = V[3]; \
34*4882a593Smuzhiyun } while (0)
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun #define STORE(V) \
37*4882a593Smuzhiyun do { \
38*4882a593Smuzhiyun V[0] = V##_0; \
39*4882a593Smuzhiyun V[1] = V##_1; \
40*4882a593Smuzhiyun V[2] = V##_2; \
41*4882a593Smuzhiyun V[3] = V##_3; \
42*4882a593Smuzhiyun } while (0)
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun #define XOR(V1, V2) \
45*4882a593Smuzhiyun do { \
46*4882a593Smuzhiyun V1##_0 = vec_xor(V1##_0, V2##_0); \
47*4882a593Smuzhiyun V1##_1 = vec_xor(V1##_1, V2##_1); \
48*4882a593Smuzhiyun V1##_2 = vec_xor(V1##_2, V2##_2); \
49*4882a593Smuzhiyun V1##_3 = vec_xor(V1##_3, V2##_3); \
50*4882a593Smuzhiyun } while (0)
51*4882a593Smuzhiyun
__xor_altivec_2(unsigned long bytes,unsigned long * v1_in,unsigned long * v2_in)52*4882a593Smuzhiyun void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
53*4882a593Smuzhiyun unsigned long *v2_in)
54*4882a593Smuzhiyun {
55*4882a593Smuzhiyun DEFINE(v1);
56*4882a593Smuzhiyun DEFINE(v2);
57*4882a593Smuzhiyun unsigned long lines = bytes / (sizeof(unative_t)) / 4;
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun do {
60*4882a593Smuzhiyun LOAD(v1);
61*4882a593Smuzhiyun LOAD(v2);
62*4882a593Smuzhiyun XOR(v1, v2);
63*4882a593Smuzhiyun STORE(v1);
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun v1 += 4;
66*4882a593Smuzhiyun v2 += 4;
67*4882a593Smuzhiyun } while (--lines > 0);
68*4882a593Smuzhiyun }
69*4882a593Smuzhiyun
__xor_altivec_3(unsigned long bytes,unsigned long * v1_in,unsigned long * v2_in,unsigned long * v3_in)70*4882a593Smuzhiyun void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
71*4882a593Smuzhiyun unsigned long *v2_in, unsigned long *v3_in)
72*4882a593Smuzhiyun {
73*4882a593Smuzhiyun DEFINE(v1);
74*4882a593Smuzhiyun DEFINE(v2);
75*4882a593Smuzhiyun DEFINE(v3);
76*4882a593Smuzhiyun unsigned long lines = bytes / (sizeof(unative_t)) / 4;
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun do {
79*4882a593Smuzhiyun LOAD(v1);
80*4882a593Smuzhiyun LOAD(v2);
81*4882a593Smuzhiyun LOAD(v3);
82*4882a593Smuzhiyun XOR(v1, v2);
83*4882a593Smuzhiyun XOR(v1, v3);
84*4882a593Smuzhiyun STORE(v1);
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun v1 += 4;
87*4882a593Smuzhiyun v2 += 4;
88*4882a593Smuzhiyun v3 += 4;
89*4882a593Smuzhiyun } while (--lines > 0);
90*4882a593Smuzhiyun }
91*4882a593Smuzhiyun
__xor_altivec_4(unsigned long bytes,unsigned long * v1_in,unsigned long * v2_in,unsigned long * v3_in,unsigned long * v4_in)92*4882a593Smuzhiyun void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
93*4882a593Smuzhiyun unsigned long *v2_in, unsigned long *v3_in,
94*4882a593Smuzhiyun unsigned long *v4_in)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun DEFINE(v1);
97*4882a593Smuzhiyun DEFINE(v2);
98*4882a593Smuzhiyun DEFINE(v3);
99*4882a593Smuzhiyun DEFINE(v4);
100*4882a593Smuzhiyun unsigned long lines = bytes / (sizeof(unative_t)) / 4;
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun do {
103*4882a593Smuzhiyun LOAD(v1);
104*4882a593Smuzhiyun LOAD(v2);
105*4882a593Smuzhiyun LOAD(v3);
106*4882a593Smuzhiyun LOAD(v4);
107*4882a593Smuzhiyun XOR(v1, v2);
108*4882a593Smuzhiyun XOR(v3, v4);
109*4882a593Smuzhiyun XOR(v1, v3);
110*4882a593Smuzhiyun STORE(v1);
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun v1 += 4;
113*4882a593Smuzhiyun v2 += 4;
114*4882a593Smuzhiyun v3 += 4;
115*4882a593Smuzhiyun v4 += 4;
116*4882a593Smuzhiyun } while (--lines > 0);
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun
__xor_altivec_5(unsigned long bytes,unsigned long * v1_in,unsigned long * v2_in,unsigned long * v3_in,unsigned long * v4_in,unsigned long * v5_in)119*4882a593Smuzhiyun void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
120*4882a593Smuzhiyun unsigned long *v2_in, unsigned long *v3_in,
121*4882a593Smuzhiyun unsigned long *v4_in, unsigned long *v5_in)
122*4882a593Smuzhiyun {
123*4882a593Smuzhiyun DEFINE(v1);
124*4882a593Smuzhiyun DEFINE(v2);
125*4882a593Smuzhiyun DEFINE(v3);
126*4882a593Smuzhiyun DEFINE(v4);
127*4882a593Smuzhiyun DEFINE(v5);
128*4882a593Smuzhiyun unsigned long lines = bytes / (sizeof(unative_t)) / 4;
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun do {
131*4882a593Smuzhiyun LOAD(v1);
132*4882a593Smuzhiyun LOAD(v2);
133*4882a593Smuzhiyun LOAD(v3);
134*4882a593Smuzhiyun LOAD(v4);
135*4882a593Smuzhiyun LOAD(v5);
136*4882a593Smuzhiyun XOR(v1, v2);
137*4882a593Smuzhiyun XOR(v3, v4);
138*4882a593Smuzhiyun XOR(v1, v5);
139*4882a593Smuzhiyun XOR(v1, v3);
140*4882a593Smuzhiyun STORE(v1);
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun v1 += 4;
143*4882a593Smuzhiyun v2 += 4;
144*4882a593Smuzhiyun v3 += 4;
145*4882a593Smuzhiyun v4 += 4;
146*4882a593Smuzhiyun v5 += 4;
147*4882a593Smuzhiyun } while (--lines > 0);
148*4882a593Smuzhiyun }
149