xref: /OK3568_Linux_fs/kernel/tools/virtio/ringtest/ring.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (C) 2016 Red Hat, Inc.
4*4882a593Smuzhiyun  * Author: Michael S. Tsirkin <mst@redhat.com>
5*4882a593Smuzhiyun  *
6*4882a593Smuzhiyun  * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
7*4882a593Smuzhiyun  * signalling, unconditionally.
8*4882a593Smuzhiyun  */
9*4882a593Smuzhiyun #define _GNU_SOURCE
10*4882a593Smuzhiyun #include "main.h"
11*4882a593Smuzhiyun #include <stdlib.h>
12*4882a593Smuzhiyun #include <stdio.h>
13*4882a593Smuzhiyun #include <string.h>
14*4882a593Smuzhiyun 
15*4882a593Smuzhiyun /* Next - Where next entry will be written.
16*4882a593Smuzhiyun  * Prev - "Next" value when event triggered previously.
17*4882a593Smuzhiyun  * Event - Peer requested event after writing this entry.
18*4882a593Smuzhiyun  */
need_event(unsigned short event,unsigned short next,unsigned short prev)19*4882a593Smuzhiyun static inline bool need_event(unsigned short event,
20*4882a593Smuzhiyun 			      unsigned short next,
21*4882a593Smuzhiyun 			      unsigned short prev)
22*4882a593Smuzhiyun {
23*4882a593Smuzhiyun 	return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
24*4882a593Smuzhiyun }
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun /* Design:
27*4882a593Smuzhiyun  * Guest adds descriptors with unique index values and DESC_HW in flags.
28*4882a593Smuzhiyun  * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
29*4882a593Smuzhiyun  * Flags are always set last.
30*4882a593Smuzhiyun  */
31*4882a593Smuzhiyun #define DESC_HW 0x1
32*4882a593Smuzhiyun 
33*4882a593Smuzhiyun struct desc {
34*4882a593Smuzhiyun 	unsigned short flags;
35*4882a593Smuzhiyun 	unsigned short index;
36*4882a593Smuzhiyun 	unsigned len;
37*4882a593Smuzhiyun 	unsigned long long addr;
38*4882a593Smuzhiyun };
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun /* how much padding is needed to avoid false cache sharing */
41*4882a593Smuzhiyun #define HOST_GUEST_PADDING 0x80
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun /* Mostly read */
44*4882a593Smuzhiyun struct event {
45*4882a593Smuzhiyun 	unsigned short kick_index;
46*4882a593Smuzhiyun 	unsigned char reserved0[HOST_GUEST_PADDING - 2];
47*4882a593Smuzhiyun 	unsigned short call_index;
48*4882a593Smuzhiyun 	unsigned char reserved1[HOST_GUEST_PADDING - 2];
49*4882a593Smuzhiyun };
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun struct data {
52*4882a593Smuzhiyun 	void *buf; /* descriptor is writeable, we can't get buf from there */
53*4882a593Smuzhiyun 	void *data;
54*4882a593Smuzhiyun } *data;
55*4882a593Smuzhiyun 
56*4882a593Smuzhiyun struct desc *ring;
57*4882a593Smuzhiyun struct event *event;
58*4882a593Smuzhiyun 
59*4882a593Smuzhiyun struct guest {
60*4882a593Smuzhiyun 	unsigned avail_idx;
61*4882a593Smuzhiyun 	unsigned last_used_idx;
62*4882a593Smuzhiyun 	unsigned num_free;
63*4882a593Smuzhiyun 	unsigned kicked_avail_idx;
64*4882a593Smuzhiyun 	unsigned char reserved[HOST_GUEST_PADDING - 12];
65*4882a593Smuzhiyun } guest;
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun struct host {
68*4882a593Smuzhiyun 	/* we do not need to track last avail index
69*4882a593Smuzhiyun 	 * unless we have more than one in flight.
70*4882a593Smuzhiyun 	 */
71*4882a593Smuzhiyun 	unsigned used_idx;
72*4882a593Smuzhiyun 	unsigned called_used_idx;
73*4882a593Smuzhiyun 	unsigned char reserved[HOST_GUEST_PADDING - 4];
74*4882a593Smuzhiyun } host;
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun /* implemented by ring */
alloc_ring(void)77*4882a593Smuzhiyun void alloc_ring(void)
78*4882a593Smuzhiyun {
79*4882a593Smuzhiyun 	int ret;
80*4882a593Smuzhiyun 	int i;
81*4882a593Smuzhiyun 
82*4882a593Smuzhiyun 	ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
83*4882a593Smuzhiyun 	if (ret) {
84*4882a593Smuzhiyun 		perror("Unable to allocate ring buffer.\n");
85*4882a593Smuzhiyun 		exit(3);
86*4882a593Smuzhiyun 	}
87*4882a593Smuzhiyun 	event = calloc(1, sizeof(*event));
88*4882a593Smuzhiyun 	if (!event) {
89*4882a593Smuzhiyun 		perror("Unable to allocate event buffer.\n");
90*4882a593Smuzhiyun 		exit(3);
91*4882a593Smuzhiyun 	}
92*4882a593Smuzhiyun 	guest.avail_idx = 0;
93*4882a593Smuzhiyun 	guest.kicked_avail_idx = -1;
94*4882a593Smuzhiyun 	guest.last_used_idx = 0;
95*4882a593Smuzhiyun 	host.used_idx = 0;
96*4882a593Smuzhiyun 	host.called_used_idx = -1;
97*4882a593Smuzhiyun 	for (i = 0; i < ring_size; ++i) {
98*4882a593Smuzhiyun 		struct desc desc = {
99*4882a593Smuzhiyun 			.index = i,
100*4882a593Smuzhiyun 		};
101*4882a593Smuzhiyun 		ring[i] = desc;
102*4882a593Smuzhiyun 	}
103*4882a593Smuzhiyun 	guest.num_free = ring_size;
104*4882a593Smuzhiyun 	data = calloc(ring_size, sizeof(*data));
105*4882a593Smuzhiyun 	if (!data) {
106*4882a593Smuzhiyun 		perror("Unable to allocate data buffer.\n");
107*4882a593Smuzhiyun 		exit(3);
108*4882a593Smuzhiyun 	}
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun /* guest side */
add_inbuf(unsigned len,void * buf,void * datap)112*4882a593Smuzhiyun int add_inbuf(unsigned len, void *buf, void *datap)
113*4882a593Smuzhiyun {
114*4882a593Smuzhiyun 	unsigned head, index;
115*4882a593Smuzhiyun 
116*4882a593Smuzhiyun 	if (!guest.num_free)
117*4882a593Smuzhiyun 		return -1;
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun 	guest.num_free--;
120*4882a593Smuzhiyun 	head = (ring_size - 1) & (guest.avail_idx++);
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun 	/* Start with a write. On MESI architectures this helps
123*4882a593Smuzhiyun 	 * avoid a shared state with consumer that is polling this descriptor.
124*4882a593Smuzhiyun 	 */
125*4882a593Smuzhiyun 	ring[head].addr = (unsigned long)(void*)buf;
126*4882a593Smuzhiyun 	ring[head].len = len;
127*4882a593Smuzhiyun 	/* read below might bypass write above. That is OK because it's just an
128*4882a593Smuzhiyun 	 * optimization. If this happens, we will get the cache line in a
129*4882a593Smuzhiyun 	 * shared state which is unfortunate, but probably not worth it to
130*4882a593Smuzhiyun 	 * add an explicit full barrier to avoid this.
131*4882a593Smuzhiyun 	 */
132*4882a593Smuzhiyun 	barrier();
133*4882a593Smuzhiyun 	index = ring[head].index;
134*4882a593Smuzhiyun 	data[index].buf = buf;
135*4882a593Smuzhiyun 	data[index].data = datap;
136*4882a593Smuzhiyun 	/* Barrier A (for pairing) */
137*4882a593Smuzhiyun 	smp_release();
138*4882a593Smuzhiyun 	ring[head].flags = DESC_HW;
139*4882a593Smuzhiyun 
140*4882a593Smuzhiyun 	return 0;
141*4882a593Smuzhiyun }
142*4882a593Smuzhiyun 
get_buf(unsigned * lenp,void ** bufp)143*4882a593Smuzhiyun void *get_buf(unsigned *lenp, void **bufp)
144*4882a593Smuzhiyun {
145*4882a593Smuzhiyun 	unsigned head = (ring_size - 1) & guest.last_used_idx;
146*4882a593Smuzhiyun 	unsigned index;
147*4882a593Smuzhiyun 	void *datap;
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 	if (ring[head].flags & DESC_HW)
150*4882a593Smuzhiyun 		return NULL;
151*4882a593Smuzhiyun 	/* Barrier B (for pairing) */
152*4882a593Smuzhiyun 	smp_acquire();
153*4882a593Smuzhiyun 	*lenp = ring[head].len;
154*4882a593Smuzhiyun 	index = ring[head].index & (ring_size - 1);
155*4882a593Smuzhiyun 	datap = data[index].data;
156*4882a593Smuzhiyun 	*bufp = data[index].buf;
157*4882a593Smuzhiyun 	data[index].buf = NULL;
158*4882a593Smuzhiyun 	data[index].data = NULL;
159*4882a593Smuzhiyun 	guest.num_free++;
160*4882a593Smuzhiyun 	guest.last_used_idx++;
161*4882a593Smuzhiyun 	return datap;
162*4882a593Smuzhiyun }
163*4882a593Smuzhiyun 
used_empty()164*4882a593Smuzhiyun bool used_empty()
165*4882a593Smuzhiyun {
166*4882a593Smuzhiyun 	unsigned head = (ring_size - 1) & guest.last_used_idx;
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 	return (ring[head].flags & DESC_HW);
169*4882a593Smuzhiyun }
170*4882a593Smuzhiyun 
disable_call()171*4882a593Smuzhiyun void disable_call()
172*4882a593Smuzhiyun {
173*4882a593Smuzhiyun 	/* Doing nothing to disable calls might cause
174*4882a593Smuzhiyun 	 * extra interrupts, but reduces the number of cache misses.
175*4882a593Smuzhiyun 	 */
176*4882a593Smuzhiyun }
177*4882a593Smuzhiyun 
enable_call()178*4882a593Smuzhiyun bool enable_call()
179*4882a593Smuzhiyun {
180*4882a593Smuzhiyun 	event->call_index = guest.last_used_idx;
181*4882a593Smuzhiyun 	/* Flush call index write */
182*4882a593Smuzhiyun 	/* Barrier D (for pairing) */
183*4882a593Smuzhiyun 	smp_mb();
184*4882a593Smuzhiyun 	return used_empty();
185*4882a593Smuzhiyun }
186*4882a593Smuzhiyun 
kick_available(void)187*4882a593Smuzhiyun void kick_available(void)
188*4882a593Smuzhiyun {
189*4882a593Smuzhiyun 	bool need;
190*4882a593Smuzhiyun 
191*4882a593Smuzhiyun 	/* Flush in previous flags write */
192*4882a593Smuzhiyun 	/* Barrier C (for pairing) */
193*4882a593Smuzhiyun 	smp_mb();
194*4882a593Smuzhiyun 	need = need_event(event->kick_index,
195*4882a593Smuzhiyun 			   guest.avail_idx,
196*4882a593Smuzhiyun 			   guest.kicked_avail_idx);
197*4882a593Smuzhiyun 
198*4882a593Smuzhiyun 	guest.kicked_avail_idx = guest.avail_idx;
199*4882a593Smuzhiyun 	if (need)
200*4882a593Smuzhiyun 		kick();
201*4882a593Smuzhiyun }
202*4882a593Smuzhiyun 
203*4882a593Smuzhiyun /* host side */
disable_kick()204*4882a593Smuzhiyun void disable_kick()
205*4882a593Smuzhiyun {
206*4882a593Smuzhiyun 	/* Doing nothing to disable kicks might cause
207*4882a593Smuzhiyun 	 * extra interrupts, but reduces the number of cache misses.
208*4882a593Smuzhiyun 	 */
209*4882a593Smuzhiyun }
210*4882a593Smuzhiyun 
enable_kick()211*4882a593Smuzhiyun bool enable_kick()
212*4882a593Smuzhiyun {
213*4882a593Smuzhiyun 	event->kick_index = host.used_idx;
214*4882a593Smuzhiyun 	/* Barrier C (for pairing) */
215*4882a593Smuzhiyun 	smp_mb();
216*4882a593Smuzhiyun 	return avail_empty();
217*4882a593Smuzhiyun }
218*4882a593Smuzhiyun 
avail_empty()219*4882a593Smuzhiyun bool avail_empty()
220*4882a593Smuzhiyun {
221*4882a593Smuzhiyun 	unsigned head = (ring_size - 1) & host.used_idx;
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun 	return !(ring[head].flags & DESC_HW);
224*4882a593Smuzhiyun }
225*4882a593Smuzhiyun 
use_buf(unsigned * lenp,void ** bufp)226*4882a593Smuzhiyun bool use_buf(unsigned *lenp, void **bufp)
227*4882a593Smuzhiyun {
228*4882a593Smuzhiyun 	unsigned head = (ring_size - 1) & host.used_idx;
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 	if (!(ring[head].flags & DESC_HW))
231*4882a593Smuzhiyun 		return false;
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 	/* make sure length read below is not speculated */
234*4882a593Smuzhiyun 	/* Barrier A (for pairing) */
235*4882a593Smuzhiyun 	smp_acquire();
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun 	/* simple in-order completion: we don't need
238*4882a593Smuzhiyun 	 * to touch index at all. This also means we
239*4882a593Smuzhiyun 	 * can just modify the descriptor in-place.
240*4882a593Smuzhiyun 	 */
241*4882a593Smuzhiyun 	ring[head].len--;
242*4882a593Smuzhiyun 	/* Make sure len is valid before flags.
243*4882a593Smuzhiyun 	 * Note: alternative is to write len and flags in one access -
244*4882a593Smuzhiyun 	 * possible on 64 bit architectures but wmb is free on Intel anyway
245*4882a593Smuzhiyun 	 * so I have no way to test whether it's a gain.
246*4882a593Smuzhiyun 	 */
247*4882a593Smuzhiyun 	/* Barrier B (for pairing) */
248*4882a593Smuzhiyun 	smp_release();
249*4882a593Smuzhiyun 	ring[head].flags = 0;
250*4882a593Smuzhiyun 	host.used_idx++;
251*4882a593Smuzhiyun 	return true;
252*4882a593Smuzhiyun }
253*4882a593Smuzhiyun 
call_used(void)254*4882a593Smuzhiyun void call_used(void)
255*4882a593Smuzhiyun {
256*4882a593Smuzhiyun 	bool need;
257*4882a593Smuzhiyun 
258*4882a593Smuzhiyun 	/* Flush in previous flags write */
259*4882a593Smuzhiyun 	/* Barrier D (for pairing) */
260*4882a593Smuzhiyun 	smp_mb();
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun 	need = need_event(event->call_index,
263*4882a593Smuzhiyun 			host.used_idx,
264*4882a593Smuzhiyun 			host.called_used_idx);
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 	host.called_used_idx = host.used_idx;
267*4882a593Smuzhiyun 
268*4882a593Smuzhiyun 	if (need)
269*4882a593Smuzhiyun 		call();
270*4882a593Smuzhiyun }
271