1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright 2017, Gustavo Romero, IBM Corp.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Check if thread endianness is flipped inadvertently to BE on trap
6*4882a593Smuzhiyun * caught in TM whilst MSR.FP and MSR.VEC are zero (i.e. just after
7*4882a593Smuzhiyun * load_fp and load_vec overflowed).
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * The issue can be checked on LE machines simply by zeroing load_fp
10*4882a593Smuzhiyun * and load_vec and then causing a trap in TM. Since the endianness
11*4882a593Smuzhiyun * changes to BE on return from the signal handler, 'nop' is
12*4882a593Smuzhiyun * thread as an illegal instruction in following sequence:
13*4882a593Smuzhiyun * tbegin.
14*4882a593Smuzhiyun * beq 1f
15*4882a593Smuzhiyun * trap
16*4882a593Smuzhiyun * tend.
17*4882a593Smuzhiyun * 1: nop
18*4882a593Smuzhiyun *
19*4882a593Smuzhiyun * However, although the issue is also present on BE machines, it's a
20*4882a593Smuzhiyun * bit trickier to check it on BE machines because MSR.LE bit is set
21*4882a593Smuzhiyun * to zero which determines a BE endianness that is the native
22*4882a593Smuzhiyun * endianness on BE machines, so nothing notably critical happens,
23*4882a593Smuzhiyun * i.e. no illegal instruction is observed immediately after returning
24*4882a593Smuzhiyun * from the signal handler (as it happens on LE machines). Thus to test
25*4882a593Smuzhiyun * it on BE machines LE endianness is forced after a first trap and then
26*4882a593Smuzhiyun * the endianness is verified on subsequent traps to determine if the
27*4882a593Smuzhiyun * endianness "flipped back" to the native endianness (BE).
28*4882a593Smuzhiyun */
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun #define _GNU_SOURCE
31*4882a593Smuzhiyun #include <error.h>
32*4882a593Smuzhiyun #include <stdio.h>
33*4882a593Smuzhiyun #include <stdlib.h>
34*4882a593Smuzhiyun #include <unistd.h>
35*4882a593Smuzhiyun #include <htmintrin.h>
36*4882a593Smuzhiyun #include <inttypes.h>
37*4882a593Smuzhiyun #include <pthread.h>
38*4882a593Smuzhiyun #include <sched.h>
39*4882a593Smuzhiyun #include <signal.h>
40*4882a593Smuzhiyun #include <stdbool.h>
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun #include "tm.h"
43*4882a593Smuzhiyun #include "utils.h"
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun #define pr_error(error_code, format, ...) \
46*4882a593Smuzhiyun error_at_line(1, error_code, __FILE__, __LINE__, format, ##__VA_ARGS__)
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun #define MSR_LE 1UL
49*4882a593Smuzhiyun #define LE 1UL
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun pthread_t t0_ping;
52*4882a593Smuzhiyun pthread_t t1_pong;
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun int exit_from_pong;
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun int trap_event;
57*4882a593Smuzhiyun int le;
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun bool success;
60*4882a593Smuzhiyun
trap_signal_handler(int signo,siginfo_t * si,void * uc)61*4882a593Smuzhiyun void trap_signal_handler(int signo, siginfo_t *si, void *uc)
62*4882a593Smuzhiyun {
63*4882a593Smuzhiyun ucontext_t *ucp = uc;
64*4882a593Smuzhiyun uint64_t thread_endianness;
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun /* Get thread endianness: extract bit LE from MSR */
67*4882a593Smuzhiyun thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR];
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun /***
70*4882a593Smuzhiyun * Little-Endian Machine
71*4882a593Smuzhiyun */
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun if (le) {
74*4882a593Smuzhiyun /* First trap event */
75*4882a593Smuzhiyun if (trap_event == 0) {
76*4882a593Smuzhiyun /* Do nothing. Since it is returning from this trap
77*4882a593Smuzhiyun * event that endianness is flipped by the bug, so just
78*4882a593Smuzhiyun * let the process return from the signal handler and
79*4882a593Smuzhiyun * check on the second trap event if endianness is
80*4882a593Smuzhiyun * flipped or not.
81*4882a593Smuzhiyun */
82*4882a593Smuzhiyun }
83*4882a593Smuzhiyun /* Second trap event */
84*4882a593Smuzhiyun else if (trap_event == 1) {
85*4882a593Smuzhiyun /*
86*4882a593Smuzhiyun * Since trap was caught in TM on first trap event, if
87*4882a593Smuzhiyun * endianness was still LE (not flipped inadvertently)
88*4882a593Smuzhiyun * after returning from the signal handler instruction
89*4882a593Smuzhiyun * (1) is executed (basically a 'nop'), as it's located
90*4882a593Smuzhiyun * at address of tbegin. +4 (rollback addr). As (1) on
91*4882a593Smuzhiyun * LE endianness does in effect nothing, instruction (2)
92*4882a593Smuzhiyun * is then executed again as 'trap', generating a second
93*4882a593Smuzhiyun * trap event (note that in that case 'trap' is caught
94*4882a593Smuzhiyun * not in transacional mode). On te other hand, if after
95*4882a593Smuzhiyun * the return from the signal handler the endianness in-
96*4882a593Smuzhiyun * advertently flipped, instruction (1) is tread as a
97*4882a593Smuzhiyun * branch instruction, i.e. b .+8, hence instruction (3)
98*4882a593Smuzhiyun * and (4) are executed (tbegin.; trap;) and we get sim-
99*4882a593Smuzhiyun * ilaly on the trap signal handler, but now in TM mode.
100*4882a593Smuzhiyun * Either way, it's now possible to check the MSR LE bit
101*4882a593Smuzhiyun * once in the trap handler to verify if endianness was
102*4882a593Smuzhiyun * flipped or not after the return from the second trap
103*4882a593Smuzhiyun * event. If endianness is flipped, the bug is present.
104*4882a593Smuzhiyun * Finally, getting a trap in TM mode or not is just
105*4882a593Smuzhiyun * worth noting because it affects the math to determine
106*4882a593Smuzhiyun * the offset added to the NIP on return: the NIP for a
107*4882a593Smuzhiyun * trap caught in TM is the rollback address, i.e. the
108*4882a593Smuzhiyun * next instruction after 'tbegin.', whilst the NIP for
109*4882a593Smuzhiyun * a trap caught in non-transactional mode is the very
110*4882a593Smuzhiyun * same address of the 'trap' instruction that generated
111*4882a593Smuzhiyun * the trap event.
112*4882a593Smuzhiyun */
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun if (thread_endianness == LE) {
115*4882a593Smuzhiyun /* Go to 'success', i.e. instruction (6) */
116*4882a593Smuzhiyun ucp->uc_mcontext.gp_regs[PT_NIP] += 16;
117*4882a593Smuzhiyun } else {
118*4882a593Smuzhiyun /*
119*4882a593Smuzhiyun * Thread endianness is BE, so it flipped
120*4882a593Smuzhiyun * inadvertently. Thus we flip back to LE and
121*4882a593Smuzhiyun * set NIP to go to 'failure', instruction (5).
122*4882a593Smuzhiyun */
123*4882a593Smuzhiyun ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL;
124*4882a593Smuzhiyun ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
125*4882a593Smuzhiyun }
126*4882a593Smuzhiyun }
127*4882a593Smuzhiyun }
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun /***
130*4882a593Smuzhiyun * Big-Endian Machine
131*4882a593Smuzhiyun */
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun else {
134*4882a593Smuzhiyun /* First trap event */
135*4882a593Smuzhiyun if (trap_event == 0) {
136*4882a593Smuzhiyun /*
137*4882a593Smuzhiyun * Force thread endianness to be LE. Instructions (1),
138*4882a593Smuzhiyun * (3), and (4) will be executed, generating a second
139*4882a593Smuzhiyun * trap in TM mode.
140*4882a593Smuzhiyun */
141*4882a593Smuzhiyun ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL;
142*4882a593Smuzhiyun }
143*4882a593Smuzhiyun /* Second trap event */
144*4882a593Smuzhiyun else if (trap_event == 1) {
145*4882a593Smuzhiyun /*
146*4882a593Smuzhiyun * Do nothing. If bug is present on return from this
147*4882a593Smuzhiyun * second trap event endianness will flip back "automat-
148*4882a593Smuzhiyun * ically" to BE, otherwise thread endianness will
149*4882a593Smuzhiyun * continue to be LE, just as it was set above.
150*4882a593Smuzhiyun */
151*4882a593Smuzhiyun }
152*4882a593Smuzhiyun /* A third trap event */
153*4882a593Smuzhiyun else {
154*4882a593Smuzhiyun /*
155*4882a593Smuzhiyun * Once here it means that after returning from the sec-
156*4882a593Smuzhiyun * ond trap event instruction (4) (trap) was executed
157*4882a593Smuzhiyun * as LE, generating a third trap event. In that case
158*4882a593Smuzhiyun * endianness is still LE as set on return from the
159*4882a593Smuzhiyun * first trap event, hence no bug. Otherwise, bug
160*4882a593Smuzhiyun * flipped back to BE on return from the second trap
161*4882a593Smuzhiyun * event and instruction (4) was executed as 'tdi' (so
162*4882a593Smuzhiyun * basically a 'nop') and branch to 'failure' in
163*4882a593Smuzhiyun * instruction (5) was taken to indicate failure and we
164*4882a593Smuzhiyun * never get here.
165*4882a593Smuzhiyun */
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun /*
168*4882a593Smuzhiyun * Flip back to BE and go to instruction (6), i.e. go to
169*4882a593Smuzhiyun * 'success'.
170*4882a593Smuzhiyun */
171*4882a593Smuzhiyun ucp->uc_mcontext.gp_regs[PT_MSR] &= ~1UL;
172*4882a593Smuzhiyun ucp->uc_mcontext.gp_regs[PT_NIP] += 8;
173*4882a593Smuzhiyun }
174*4882a593Smuzhiyun }
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun trap_event++;
177*4882a593Smuzhiyun }
178*4882a593Smuzhiyun
usr1_signal_handler(int signo,siginfo_t * si,void * not_used)179*4882a593Smuzhiyun void usr1_signal_handler(int signo, siginfo_t *si, void *not_used)
180*4882a593Smuzhiyun {
181*4882a593Smuzhiyun /* Got a USR1 signal from ping(), so just tell pong() to exit */
182*4882a593Smuzhiyun exit_from_pong = 1;
183*4882a593Smuzhiyun }
184*4882a593Smuzhiyun
ping(void * not_used)185*4882a593Smuzhiyun void *ping(void *not_used)
186*4882a593Smuzhiyun {
187*4882a593Smuzhiyun uint64_t i;
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun trap_event = 0;
190*4882a593Smuzhiyun
191*4882a593Smuzhiyun /*
192*4882a593Smuzhiyun * Wait an amount of context switches so load_fp and load_vec overflows
193*4882a593Smuzhiyun * and MSR_[FP|VEC|V] is 0.
194*4882a593Smuzhiyun */
195*4882a593Smuzhiyun for (i = 0; i < 1024*1024*512; i++)
196*4882a593Smuzhiyun ;
197*4882a593Smuzhiyun
198*4882a593Smuzhiyun asm goto(
199*4882a593Smuzhiyun /*
200*4882a593Smuzhiyun * [NA] means "Native Endianness", i.e. it tells how a
201*4882a593Smuzhiyun * instruction is executed on machine's native endianness (in
202*4882a593Smuzhiyun * other words, native endianness matches kernel endianness).
203*4882a593Smuzhiyun * [OP] means "Opposite Endianness", i.e. on a BE machine, it
204*4882a593Smuzhiyun * tells how a instruction is executed as a LE instruction; con-
205*4882a593Smuzhiyun * versely, on a LE machine, it tells how a instruction is
206*4882a593Smuzhiyun * executed as a BE instruction. When [NA] is omitted, it means
207*4882a593Smuzhiyun * that the native interpretation of a given instruction is not
208*4882a593Smuzhiyun * relevant for the test. Likewise when [OP] is omitted.
209*4882a593Smuzhiyun */
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun " tbegin. ;" /* (0) tbegin. [NA] */
212*4882a593Smuzhiyun " tdi 0, 0, 0x48;" /* (1) nop [NA]; b (3) [OP] */
213*4882a593Smuzhiyun " trap ;" /* (2) trap [NA] */
214*4882a593Smuzhiyun ".long 0x1D05007C;" /* (3) tbegin. [OP] */
215*4882a593Smuzhiyun ".long 0x0800E07F;" /* (4) trap [OP]; nop [NA] */
216*4882a593Smuzhiyun " b %l[failure] ;" /* (5) b [NA]; MSR.LE flipped (bug) */
217*4882a593Smuzhiyun " b %l[success] ;" /* (6) b [NA]; MSR.LE did not flip (ok)*/
218*4882a593Smuzhiyun
219*4882a593Smuzhiyun : : : : failure, success);
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun failure:
222*4882a593Smuzhiyun success = false;
223*4882a593Smuzhiyun goto exit_from_ping;
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun success:
226*4882a593Smuzhiyun success = true;
227*4882a593Smuzhiyun
228*4882a593Smuzhiyun exit_from_ping:
229*4882a593Smuzhiyun /* Tell pong() to exit before leaving */
230*4882a593Smuzhiyun pthread_kill(t1_pong, SIGUSR1);
231*4882a593Smuzhiyun return NULL;
232*4882a593Smuzhiyun }
233*4882a593Smuzhiyun
pong(void * not_used)234*4882a593Smuzhiyun void *pong(void *not_used)
235*4882a593Smuzhiyun {
236*4882a593Smuzhiyun while (!exit_from_pong)
237*4882a593Smuzhiyun /*
238*4882a593Smuzhiyun * Induce context switches on ping() thread
239*4882a593Smuzhiyun * until ping() finishes its job and signs
240*4882a593Smuzhiyun * to exit from this loop.
241*4882a593Smuzhiyun */
242*4882a593Smuzhiyun sched_yield();
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun return NULL;
245*4882a593Smuzhiyun }
246*4882a593Smuzhiyun
tm_trap_test(void)247*4882a593Smuzhiyun int tm_trap_test(void)
248*4882a593Smuzhiyun {
249*4882a593Smuzhiyun uint16_t k = 1;
250*4882a593Smuzhiyun int cpu, rc;
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun pthread_attr_t attr;
253*4882a593Smuzhiyun cpu_set_t cpuset;
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun struct sigaction trap_sa;
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun SKIP_IF(!have_htm());
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun trap_sa.sa_flags = SA_SIGINFO;
260*4882a593Smuzhiyun trap_sa.sa_sigaction = trap_signal_handler;
261*4882a593Smuzhiyun sigaction(SIGTRAP, &trap_sa, NULL);
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun struct sigaction usr1_sa;
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun usr1_sa.sa_flags = SA_SIGINFO;
266*4882a593Smuzhiyun usr1_sa.sa_sigaction = usr1_signal_handler;
267*4882a593Smuzhiyun sigaction(SIGUSR1, &usr1_sa, NULL);
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun cpu = pick_online_cpu();
270*4882a593Smuzhiyun FAIL_IF(cpu < 0);
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun // Set only one CPU in the mask. Both threads will be bound to that CPU.
273*4882a593Smuzhiyun CPU_ZERO(&cpuset);
274*4882a593Smuzhiyun CPU_SET(cpu, &cpuset);
275*4882a593Smuzhiyun
276*4882a593Smuzhiyun /* Init pthread attribute */
277*4882a593Smuzhiyun rc = pthread_attr_init(&attr);
278*4882a593Smuzhiyun if (rc)
279*4882a593Smuzhiyun pr_error(rc, "pthread_attr_init()");
280*4882a593Smuzhiyun
281*4882a593Smuzhiyun /*
282*4882a593Smuzhiyun * Bind thread ping() and pong() both to CPU 0 so they ping-pong and
283*4882a593Smuzhiyun * speed up context switches on ping() thread, speeding up the load_fp
284*4882a593Smuzhiyun * and load_vec overflow.
285*4882a593Smuzhiyun */
286*4882a593Smuzhiyun rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
287*4882a593Smuzhiyun if (rc)
288*4882a593Smuzhiyun pr_error(rc, "pthread_attr_setaffinity()");
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun /* Figure out the machine endianness */
291*4882a593Smuzhiyun le = (int) *(uint8_t *)&k;
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun printf("%s machine detected. Checking if endianness flips %s",
294*4882a593Smuzhiyun le ? "Little-Endian" : "Big-Endian",
295*4882a593Smuzhiyun "inadvertently on trap in TM... ");
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun rc = fflush(0);
298*4882a593Smuzhiyun if (rc)
299*4882a593Smuzhiyun pr_error(rc, "fflush()");
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun /* Launch ping() */
302*4882a593Smuzhiyun rc = pthread_create(&t0_ping, &attr, ping, NULL);
303*4882a593Smuzhiyun if (rc)
304*4882a593Smuzhiyun pr_error(rc, "pthread_create()");
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun exit_from_pong = 0;
307*4882a593Smuzhiyun
308*4882a593Smuzhiyun /* Launch pong() */
309*4882a593Smuzhiyun rc = pthread_create(&t1_pong, &attr, pong, NULL);
310*4882a593Smuzhiyun if (rc)
311*4882a593Smuzhiyun pr_error(rc, "pthread_create()");
312*4882a593Smuzhiyun
313*4882a593Smuzhiyun rc = pthread_join(t0_ping, NULL);
314*4882a593Smuzhiyun if (rc)
315*4882a593Smuzhiyun pr_error(rc, "pthread_join()");
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun rc = pthread_join(t1_pong, NULL);
318*4882a593Smuzhiyun if (rc)
319*4882a593Smuzhiyun pr_error(rc, "pthread_join()");
320*4882a593Smuzhiyun
321*4882a593Smuzhiyun if (success) {
322*4882a593Smuzhiyun printf("no.\n"); /* no, endianness did not flip inadvertently */
323*4882a593Smuzhiyun return EXIT_SUCCESS;
324*4882a593Smuzhiyun }
325*4882a593Smuzhiyun
326*4882a593Smuzhiyun printf("yes!\n"); /* yes, endianness did flip inadvertently */
327*4882a593Smuzhiyun return EXIT_FAILURE;
328*4882a593Smuzhiyun }
329*4882a593Smuzhiyun
main(int argc,char ** argv)330*4882a593Smuzhiyun int main(int argc, char **argv)
331*4882a593Smuzhiyun {
332*4882a593Smuzhiyun return test_harness(tm_trap_test, "tm_trap_test");
333*4882a593Smuzhiyun }
334