xref: /OK3568_Linux_fs/kernel/tools/virtio/ringtest/main.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0-only */
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (C) 2016 Red Hat, Inc.
4*4882a593Smuzhiyun  * Author: Michael S. Tsirkin <mst@redhat.com>
5*4882a593Smuzhiyun  *
6*4882a593Smuzhiyun  * Common macros and functions for ring benchmarking.
7*4882a593Smuzhiyun  */
8*4882a593Smuzhiyun #ifndef MAIN_H
9*4882a593Smuzhiyun #define MAIN_H
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun #include <stdbool.h>
12*4882a593Smuzhiyun 
13*4882a593Smuzhiyun extern int param;
14*4882a593Smuzhiyun 
15*4882a593Smuzhiyun extern bool do_exit;
16*4882a593Smuzhiyun 
17*4882a593Smuzhiyun #if defined(__x86_64__) || defined(__i386__)
18*4882a593Smuzhiyun #include "x86intrin.h"
19*4882a593Smuzhiyun 
wait_cycles(unsigned long long cycles)20*4882a593Smuzhiyun static inline void wait_cycles(unsigned long long cycles)
21*4882a593Smuzhiyun {
22*4882a593Smuzhiyun 	unsigned long long t;
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun 	t = __rdtsc();
25*4882a593Smuzhiyun 	while (__rdtsc() - t < cycles) {}
26*4882a593Smuzhiyun }
27*4882a593Smuzhiyun 
28*4882a593Smuzhiyun #define VMEXIT_CYCLES 500
29*4882a593Smuzhiyun #define VMENTRY_CYCLES 500
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun #elif defined(__s390x__)
wait_cycles(unsigned long long cycles)32*4882a593Smuzhiyun static inline void wait_cycles(unsigned long long cycles)
33*4882a593Smuzhiyun {
34*4882a593Smuzhiyun 	asm volatile("0: brctg %0,0b" : : "d" (cycles));
35*4882a593Smuzhiyun }
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun /* tweak me */
38*4882a593Smuzhiyun #define VMEXIT_CYCLES 200
39*4882a593Smuzhiyun #define VMENTRY_CYCLES 200
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun #else
wait_cycles(unsigned long long cycles)42*4882a593Smuzhiyun static inline void wait_cycles(unsigned long long cycles)
43*4882a593Smuzhiyun {
44*4882a593Smuzhiyun 	_Exit(5);
45*4882a593Smuzhiyun }
46*4882a593Smuzhiyun #define VMEXIT_CYCLES 0
47*4882a593Smuzhiyun #define VMENTRY_CYCLES 0
48*4882a593Smuzhiyun #endif
49*4882a593Smuzhiyun 
vmexit(void)50*4882a593Smuzhiyun static inline void vmexit(void)
51*4882a593Smuzhiyun {
52*4882a593Smuzhiyun 	if (!do_exit)
53*4882a593Smuzhiyun 		return;
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun 	wait_cycles(VMEXIT_CYCLES);
56*4882a593Smuzhiyun }
vmentry(void)57*4882a593Smuzhiyun static inline void vmentry(void)
58*4882a593Smuzhiyun {
59*4882a593Smuzhiyun 	if (!do_exit)
60*4882a593Smuzhiyun 		return;
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun 	wait_cycles(VMENTRY_CYCLES);
63*4882a593Smuzhiyun }
64*4882a593Smuzhiyun 
65*4882a593Smuzhiyun /* implemented by ring */
66*4882a593Smuzhiyun void alloc_ring(void);
67*4882a593Smuzhiyun /* guest side */
68*4882a593Smuzhiyun int add_inbuf(unsigned, void *, void *);
69*4882a593Smuzhiyun void *get_buf(unsigned *, void **);
70*4882a593Smuzhiyun void disable_call();
71*4882a593Smuzhiyun bool used_empty();
72*4882a593Smuzhiyun bool enable_call();
73*4882a593Smuzhiyun void kick_available();
74*4882a593Smuzhiyun /* host side */
75*4882a593Smuzhiyun void disable_kick();
76*4882a593Smuzhiyun bool avail_empty();
77*4882a593Smuzhiyun bool enable_kick();
78*4882a593Smuzhiyun bool use_buf(unsigned *, void **);
79*4882a593Smuzhiyun void call_used();
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun /* implemented by main */
82*4882a593Smuzhiyun extern bool do_sleep;
83*4882a593Smuzhiyun void kick(void);
84*4882a593Smuzhiyun void wait_for_kick(void);
85*4882a593Smuzhiyun void call(void);
86*4882a593Smuzhiyun void wait_for_call(void);
87*4882a593Smuzhiyun 
88*4882a593Smuzhiyun extern unsigned ring_size;
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun /* Compiler barrier - similar to what Linux uses */
91*4882a593Smuzhiyun #define barrier() asm volatile("" ::: "memory")
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun /* Is there a portable way to do this? */
94*4882a593Smuzhiyun #if defined(__x86_64__) || defined(__i386__)
95*4882a593Smuzhiyun #define cpu_relax() asm ("rep; nop" ::: "memory")
96*4882a593Smuzhiyun #elif defined(__s390x__)
97*4882a593Smuzhiyun #define cpu_relax() barrier()
98*4882a593Smuzhiyun #else
99*4882a593Smuzhiyun #define cpu_relax() assert(0)
100*4882a593Smuzhiyun #endif
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun extern bool do_relax;
103*4882a593Smuzhiyun 
busy_wait(void)104*4882a593Smuzhiyun static inline void busy_wait(void)
105*4882a593Smuzhiyun {
106*4882a593Smuzhiyun 	if (do_relax)
107*4882a593Smuzhiyun 		cpu_relax();
108*4882a593Smuzhiyun 	else
109*4882a593Smuzhiyun 		/* prevent compiler from removing busy loops */
110*4882a593Smuzhiyun 		barrier();
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun #if defined(__x86_64__) || defined(__i386__)
114*4882a593Smuzhiyun #define smp_mb()     asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
115*4882a593Smuzhiyun #else
116*4882a593Smuzhiyun /*
117*4882a593Smuzhiyun  * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
118*4882a593Smuzhiyun  * with other __ATOMIC_SEQ_CST calls.
119*4882a593Smuzhiyun  */
120*4882a593Smuzhiyun #define smp_mb() __sync_synchronize()
121*4882a593Smuzhiyun #endif
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun /*
124*4882a593Smuzhiyun  * This abuses the atomic builtins for thread fences, and
125*4882a593Smuzhiyun  * adds a compiler barrier.
126*4882a593Smuzhiyun  */
127*4882a593Smuzhiyun #define smp_release() do { \
128*4882a593Smuzhiyun     barrier(); \
129*4882a593Smuzhiyun     __atomic_thread_fence(__ATOMIC_RELEASE); \
130*4882a593Smuzhiyun } while (0)
131*4882a593Smuzhiyun 
132*4882a593Smuzhiyun #define smp_acquire() do { \
133*4882a593Smuzhiyun     __atomic_thread_fence(__ATOMIC_ACQUIRE); \
134*4882a593Smuzhiyun     barrier(); \
135*4882a593Smuzhiyun } while (0)
136*4882a593Smuzhiyun 
137*4882a593Smuzhiyun #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
138*4882a593Smuzhiyun #define smp_wmb() barrier()
139*4882a593Smuzhiyun #else
140*4882a593Smuzhiyun #define smp_wmb() smp_release()
141*4882a593Smuzhiyun #endif
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun #ifdef __alpha__
144*4882a593Smuzhiyun #define smp_read_barrier_depends() smp_acquire()
145*4882a593Smuzhiyun #else
146*4882a593Smuzhiyun #define smp_read_barrier_depends() do {} while(0)
147*4882a593Smuzhiyun #endif
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun static __always_inline
__read_once_size(const volatile void * p,void * res,int size)150*4882a593Smuzhiyun void __read_once_size(const volatile void *p, void *res, int size)
151*4882a593Smuzhiyun {
152*4882a593Smuzhiyun         switch (size) {                                                 \
153*4882a593Smuzhiyun         case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break;              \
154*4882a593Smuzhiyun         case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break;            \
155*4882a593Smuzhiyun         case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break;            \
156*4882a593Smuzhiyun         case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break;            \
157*4882a593Smuzhiyun         default:                                                        \
158*4882a593Smuzhiyun                 barrier();                                              \
159*4882a593Smuzhiyun                 __builtin_memcpy((void *)res, (const void *)p, size);   \
160*4882a593Smuzhiyun                 barrier();                                              \
161*4882a593Smuzhiyun         }                                                               \
162*4882a593Smuzhiyun }
163*4882a593Smuzhiyun 
__write_once_size(volatile void * p,void * res,int size)164*4882a593Smuzhiyun static __always_inline void __write_once_size(volatile void *p, void *res, int size)
165*4882a593Smuzhiyun {
166*4882a593Smuzhiyun 	switch (size) {
167*4882a593Smuzhiyun 	case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break;
168*4882a593Smuzhiyun 	case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break;
169*4882a593Smuzhiyun 	case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break;
170*4882a593Smuzhiyun 	case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break;
171*4882a593Smuzhiyun 	default:
172*4882a593Smuzhiyun 		barrier();
173*4882a593Smuzhiyun 		__builtin_memcpy((void *)p, (const void *)res, size);
174*4882a593Smuzhiyun 		barrier();
175*4882a593Smuzhiyun 	}
176*4882a593Smuzhiyun }
177*4882a593Smuzhiyun 
178*4882a593Smuzhiyun #define READ_ONCE(x) \
179*4882a593Smuzhiyun ({									\
180*4882a593Smuzhiyun 	union { typeof(x) __val; char __c[1]; } __u;			\
181*4882a593Smuzhiyun 	__read_once_size(&(x), __u.__c, sizeof(x));		\
182*4882a593Smuzhiyun 	smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
183*4882a593Smuzhiyun 	__u.__val;							\
184*4882a593Smuzhiyun })
185*4882a593Smuzhiyun 
186*4882a593Smuzhiyun #define WRITE_ONCE(x, val) \
187*4882a593Smuzhiyun ({							\
188*4882a593Smuzhiyun 	union { typeof(x) __val; char __c[1]; } __u =	\
189*4882a593Smuzhiyun 		{ .__val = (typeof(x)) (val) }; \
190*4882a593Smuzhiyun 	__write_once_size(&(x), __u.__c, sizeof(x));	\
191*4882a593Smuzhiyun 	__u.__val;					\
192*4882a593Smuzhiyun })
193*4882a593Smuzhiyun 
194*4882a593Smuzhiyun #endif
195