1*4882a593Smuzhiyun /******************************************************************************
2*4882a593Smuzhiyun * evtchn.c
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Driver for receiving and demuxing event-channel signals.
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * Copyright (c) 2004-2005, K A Fraser
7*4882a593Smuzhiyun * Multi-process extensions Copyright (c) 2004, Steven Smith
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * This program is free software; you can redistribute it and/or
10*4882a593Smuzhiyun * modify it under the terms of the GNU General Public License version 2
11*4882a593Smuzhiyun * as published by the Free Software Foundation; or, when distributed
12*4882a593Smuzhiyun * separately from the Linux kernel or incorporated into other
13*4882a593Smuzhiyun * software packages, subject to the following license:
14*4882a593Smuzhiyun *
15*4882a593Smuzhiyun * Permission is hereby granted, free of charge, to any person obtaining a copy
16*4882a593Smuzhiyun * of this source file (the "Software"), to deal in the Software without
17*4882a593Smuzhiyun * restriction, including without limitation the rights to use, copy, modify,
18*4882a593Smuzhiyun * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19*4882a593Smuzhiyun * and to permit persons to whom the Software is furnished to do so, subject to
20*4882a593Smuzhiyun * the following conditions:
21*4882a593Smuzhiyun *
22*4882a593Smuzhiyun * The above copyright notice and this permission notice shall be included in
23*4882a593Smuzhiyun * all copies or substantial portions of the Software.
24*4882a593Smuzhiyun *
25*4882a593Smuzhiyun * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26*4882a593Smuzhiyun * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27*4882a593Smuzhiyun * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28*4882a593Smuzhiyun * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29*4882a593Smuzhiyun * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30*4882a593Smuzhiyun * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31*4882a593Smuzhiyun * IN THE SOFTWARE.
32*4882a593Smuzhiyun */
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun #include <linux/module.h>
37*4882a593Smuzhiyun #include <linux/kernel.h>
38*4882a593Smuzhiyun #include <linux/sched.h>
39*4882a593Smuzhiyun #include <linux/slab.h>
40*4882a593Smuzhiyun #include <linux/string.h>
41*4882a593Smuzhiyun #include <linux/errno.h>
42*4882a593Smuzhiyun #include <linux/fs.h>
43*4882a593Smuzhiyun #include <linux/miscdevice.h>
44*4882a593Smuzhiyun #include <linux/major.h>
45*4882a593Smuzhiyun #include <linux/proc_fs.h>
46*4882a593Smuzhiyun #include <linux/stat.h>
47*4882a593Smuzhiyun #include <linux/poll.h>
48*4882a593Smuzhiyun #include <linux/irq.h>
49*4882a593Smuzhiyun #include <linux/init.h>
50*4882a593Smuzhiyun #include <linux/mutex.h>
51*4882a593Smuzhiyun #include <linux/cpu.h>
52*4882a593Smuzhiyun #include <linux/mm.h>
53*4882a593Smuzhiyun #include <linux/vmalloc.h>
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun #include <xen/xen.h>
56*4882a593Smuzhiyun #include <xen/events.h>
57*4882a593Smuzhiyun #include <xen/evtchn.h>
58*4882a593Smuzhiyun #include <xen/xen-ops.h>
59*4882a593Smuzhiyun #include <asm/xen/hypervisor.h>
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun struct per_user_data {
62*4882a593Smuzhiyun struct mutex bind_mutex; /* serialize bind/unbind operations */
63*4882a593Smuzhiyun struct rb_root evtchns;
64*4882a593Smuzhiyun unsigned int nr_evtchns;
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun /* Notification ring, accessed via /dev/xen/evtchn. */
67*4882a593Smuzhiyun unsigned int ring_size;
68*4882a593Smuzhiyun evtchn_port_t *ring;
69*4882a593Smuzhiyun unsigned int ring_cons, ring_prod, ring_overflow;
70*4882a593Smuzhiyun struct mutex ring_cons_mutex; /* protect against concurrent readers */
71*4882a593Smuzhiyun spinlock_t ring_prod_lock; /* product against concurrent interrupts */
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun /* Processes wait on this queue when ring is empty. */
74*4882a593Smuzhiyun wait_queue_head_t evtchn_wait;
75*4882a593Smuzhiyun struct fasync_struct *evtchn_async_queue;
76*4882a593Smuzhiyun const char *name;
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun domid_t restrict_domid;
79*4882a593Smuzhiyun };
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun #define UNRESTRICTED_DOMID ((domid_t)-1)
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun struct user_evtchn {
84*4882a593Smuzhiyun struct rb_node node;
85*4882a593Smuzhiyun struct per_user_data *user;
86*4882a593Smuzhiyun evtchn_port_t port;
87*4882a593Smuzhiyun bool enabled;
88*4882a593Smuzhiyun };
89*4882a593Smuzhiyun
evtchn_free_ring(evtchn_port_t * ring)90*4882a593Smuzhiyun static void evtchn_free_ring(evtchn_port_t *ring)
91*4882a593Smuzhiyun {
92*4882a593Smuzhiyun kvfree(ring);
93*4882a593Smuzhiyun }
94*4882a593Smuzhiyun
evtchn_ring_offset(struct per_user_data * u,unsigned int idx)95*4882a593Smuzhiyun static unsigned int evtchn_ring_offset(struct per_user_data *u,
96*4882a593Smuzhiyun unsigned int idx)
97*4882a593Smuzhiyun {
98*4882a593Smuzhiyun return idx & (u->ring_size - 1);
99*4882a593Smuzhiyun }
100*4882a593Smuzhiyun
evtchn_ring_entry(struct per_user_data * u,unsigned int idx)101*4882a593Smuzhiyun static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u,
102*4882a593Smuzhiyun unsigned int idx)
103*4882a593Smuzhiyun {
104*4882a593Smuzhiyun return u->ring + evtchn_ring_offset(u, idx);
105*4882a593Smuzhiyun }
106*4882a593Smuzhiyun
add_evtchn(struct per_user_data * u,struct user_evtchn * evtchn)107*4882a593Smuzhiyun static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
108*4882a593Smuzhiyun {
109*4882a593Smuzhiyun struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun u->nr_evtchns++;
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun while (*new) {
114*4882a593Smuzhiyun struct user_evtchn *this;
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun this = rb_entry(*new, struct user_evtchn, node);
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun parent = *new;
119*4882a593Smuzhiyun if (this->port < evtchn->port)
120*4882a593Smuzhiyun new = &((*new)->rb_left);
121*4882a593Smuzhiyun else if (this->port > evtchn->port)
122*4882a593Smuzhiyun new = &((*new)->rb_right);
123*4882a593Smuzhiyun else
124*4882a593Smuzhiyun return -EEXIST;
125*4882a593Smuzhiyun }
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun /* Add new node and rebalance tree. */
128*4882a593Smuzhiyun rb_link_node(&evtchn->node, parent, new);
129*4882a593Smuzhiyun rb_insert_color(&evtchn->node, &u->evtchns);
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun return 0;
132*4882a593Smuzhiyun }
133*4882a593Smuzhiyun
del_evtchn(struct per_user_data * u,struct user_evtchn * evtchn)134*4882a593Smuzhiyun static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
135*4882a593Smuzhiyun {
136*4882a593Smuzhiyun u->nr_evtchns--;
137*4882a593Smuzhiyun rb_erase(&evtchn->node, &u->evtchns);
138*4882a593Smuzhiyun kfree(evtchn);
139*4882a593Smuzhiyun }
140*4882a593Smuzhiyun
find_evtchn(struct per_user_data * u,evtchn_port_t port)141*4882a593Smuzhiyun static struct user_evtchn *find_evtchn(struct per_user_data *u,
142*4882a593Smuzhiyun evtchn_port_t port)
143*4882a593Smuzhiyun {
144*4882a593Smuzhiyun struct rb_node *node = u->evtchns.rb_node;
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun while (node) {
147*4882a593Smuzhiyun struct user_evtchn *evtchn;
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun evtchn = rb_entry(node, struct user_evtchn, node);
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun if (evtchn->port < port)
152*4882a593Smuzhiyun node = node->rb_left;
153*4882a593Smuzhiyun else if (evtchn->port > port)
154*4882a593Smuzhiyun node = node->rb_right;
155*4882a593Smuzhiyun else
156*4882a593Smuzhiyun return evtchn;
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun return NULL;
159*4882a593Smuzhiyun }
160*4882a593Smuzhiyun
evtchn_interrupt(int irq,void * data)161*4882a593Smuzhiyun static irqreturn_t evtchn_interrupt(int irq, void *data)
162*4882a593Smuzhiyun {
163*4882a593Smuzhiyun struct user_evtchn *evtchn = data;
164*4882a593Smuzhiyun struct per_user_data *u = evtchn->user;
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun WARN(!evtchn->enabled,
167*4882a593Smuzhiyun "Interrupt for port %u, but apparently not enabled; per-user %p\n",
168*4882a593Smuzhiyun evtchn->port, u);
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun evtchn->enabled = false;
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun spin_lock(&u->ring_prod_lock);
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun if ((u->ring_prod - u->ring_cons) < u->ring_size) {
175*4882a593Smuzhiyun *evtchn_ring_entry(u, u->ring_prod) = evtchn->port;
176*4882a593Smuzhiyun wmb(); /* Ensure ring contents visible */
177*4882a593Smuzhiyun if (u->ring_cons == u->ring_prod++) {
178*4882a593Smuzhiyun wake_up_interruptible(&u->evtchn_wait);
179*4882a593Smuzhiyun kill_fasync(&u->evtchn_async_queue,
180*4882a593Smuzhiyun SIGIO, POLL_IN);
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun } else
183*4882a593Smuzhiyun u->ring_overflow = 1;
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun spin_unlock(&u->ring_prod_lock);
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun return IRQ_HANDLED;
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun
evtchn_read(struct file * file,char __user * buf,size_t count,loff_t * ppos)190*4882a593Smuzhiyun static ssize_t evtchn_read(struct file *file, char __user *buf,
191*4882a593Smuzhiyun size_t count, loff_t *ppos)
192*4882a593Smuzhiyun {
193*4882a593Smuzhiyun int rc;
194*4882a593Smuzhiyun unsigned int c, p, bytes1 = 0, bytes2 = 0;
195*4882a593Smuzhiyun struct per_user_data *u = file->private_data;
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun /* Whole number of ports. */
198*4882a593Smuzhiyun count &= ~(sizeof(evtchn_port_t)-1);
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun if (count == 0)
201*4882a593Smuzhiyun return 0;
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun if (count > PAGE_SIZE)
204*4882a593Smuzhiyun count = PAGE_SIZE;
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun for (;;) {
207*4882a593Smuzhiyun mutex_lock(&u->ring_cons_mutex);
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun rc = -EFBIG;
210*4882a593Smuzhiyun if (u->ring_overflow)
211*4882a593Smuzhiyun goto unlock_out;
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun c = u->ring_cons;
214*4882a593Smuzhiyun p = u->ring_prod;
215*4882a593Smuzhiyun if (c != p)
216*4882a593Smuzhiyun break;
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun mutex_unlock(&u->ring_cons_mutex);
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun if (file->f_flags & O_NONBLOCK)
221*4882a593Smuzhiyun return -EAGAIN;
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun rc = wait_event_interruptible(u->evtchn_wait,
224*4882a593Smuzhiyun u->ring_cons != u->ring_prod);
225*4882a593Smuzhiyun if (rc)
226*4882a593Smuzhiyun return rc;
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
230*4882a593Smuzhiyun if (((c ^ p) & u->ring_size) != 0) {
231*4882a593Smuzhiyun bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) *
232*4882a593Smuzhiyun sizeof(evtchn_port_t);
233*4882a593Smuzhiyun bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t);
234*4882a593Smuzhiyun } else {
235*4882a593Smuzhiyun bytes1 = (p - c) * sizeof(evtchn_port_t);
236*4882a593Smuzhiyun bytes2 = 0;
237*4882a593Smuzhiyun }
238*4882a593Smuzhiyun
239*4882a593Smuzhiyun /* Truncate chunks according to caller's maximum byte count. */
240*4882a593Smuzhiyun if (bytes1 > count) {
241*4882a593Smuzhiyun bytes1 = count;
242*4882a593Smuzhiyun bytes2 = 0;
243*4882a593Smuzhiyun } else if ((bytes1 + bytes2) > count) {
244*4882a593Smuzhiyun bytes2 = count - bytes1;
245*4882a593Smuzhiyun }
246*4882a593Smuzhiyun
247*4882a593Smuzhiyun rc = -EFAULT;
248*4882a593Smuzhiyun rmb(); /* Ensure that we see the port before we copy it. */
249*4882a593Smuzhiyun if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) ||
250*4882a593Smuzhiyun ((bytes2 != 0) &&
251*4882a593Smuzhiyun copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
252*4882a593Smuzhiyun goto unlock_out;
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
255*4882a593Smuzhiyun rc = bytes1 + bytes2;
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun unlock_out:
258*4882a593Smuzhiyun mutex_unlock(&u->ring_cons_mutex);
259*4882a593Smuzhiyun return rc;
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun
evtchn_write(struct file * file,const char __user * buf,size_t count,loff_t * ppos)262*4882a593Smuzhiyun static ssize_t evtchn_write(struct file *file, const char __user *buf,
263*4882a593Smuzhiyun size_t count, loff_t *ppos)
264*4882a593Smuzhiyun {
265*4882a593Smuzhiyun int rc, i;
266*4882a593Smuzhiyun evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
267*4882a593Smuzhiyun struct per_user_data *u = file->private_data;
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun if (kbuf == NULL)
270*4882a593Smuzhiyun return -ENOMEM;
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun /* Whole number of ports. */
273*4882a593Smuzhiyun count &= ~(sizeof(evtchn_port_t)-1);
274*4882a593Smuzhiyun
275*4882a593Smuzhiyun rc = 0;
276*4882a593Smuzhiyun if (count == 0)
277*4882a593Smuzhiyun goto out;
278*4882a593Smuzhiyun
279*4882a593Smuzhiyun if (count > PAGE_SIZE)
280*4882a593Smuzhiyun count = PAGE_SIZE;
281*4882a593Smuzhiyun
282*4882a593Smuzhiyun rc = -EFAULT;
283*4882a593Smuzhiyun if (copy_from_user(kbuf, buf, count) != 0)
284*4882a593Smuzhiyun goto out;
285*4882a593Smuzhiyun
286*4882a593Smuzhiyun mutex_lock(&u->bind_mutex);
287*4882a593Smuzhiyun
288*4882a593Smuzhiyun for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
289*4882a593Smuzhiyun evtchn_port_t port = kbuf[i];
290*4882a593Smuzhiyun struct user_evtchn *evtchn;
291*4882a593Smuzhiyun
292*4882a593Smuzhiyun evtchn = find_evtchn(u, port);
293*4882a593Smuzhiyun if (evtchn && !evtchn->enabled) {
294*4882a593Smuzhiyun evtchn->enabled = true;
295*4882a593Smuzhiyun xen_irq_lateeoi(irq_from_evtchn(port), 0);
296*4882a593Smuzhiyun }
297*4882a593Smuzhiyun }
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun mutex_unlock(&u->bind_mutex);
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun rc = count;
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun out:
304*4882a593Smuzhiyun free_page((unsigned long)kbuf);
305*4882a593Smuzhiyun return rc;
306*4882a593Smuzhiyun }
307*4882a593Smuzhiyun
evtchn_resize_ring(struct per_user_data * u)308*4882a593Smuzhiyun static int evtchn_resize_ring(struct per_user_data *u)
309*4882a593Smuzhiyun {
310*4882a593Smuzhiyun unsigned int new_size;
311*4882a593Smuzhiyun evtchn_port_t *new_ring, *old_ring;
312*4882a593Smuzhiyun
313*4882a593Smuzhiyun /*
314*4882a593Smuzhiyun * Ensure the ring is large enough to capture all possible
315*4882a593Smuzhiyun * events. i.e., one free slot for each bound event.
316*4882a593Smuzhiyun */
317*4882a593Smuzhiyun if (u->nr_evtchns <= u->ring_size)
318*4882a593Smuzhiyun return 0;
319*4882a593Smuzhiyun
320*4882a593Smuzhiyun if (u->ring_size == 0)
321*4882a593Smuzhiyun new_size = 64;
322*4882a593Smuzhiyun else
323*4882a593Smuzhiyun new_size = 2 * u->ring_size;
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun new_ring = kvmalloc_array(new_size, sizeof(*new_ring), GFP_KERNEL);
326*4882a593Smuzhiyun if (!new_ring)
327*4882a593Smuzhiyun return -ENOMEM;
328*4882a593Smuzhiyun
329*4882a593Smuzhiyun old_ring = u->ring;
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun /*
332*4882a593Smuzhiyun * Access to the ring contents is serialized by either the
333*4882a593Smuzhiyun * prod /or/ cons lock so take both when resizing.
334*4882a593Smuzhiyun */
335*4882a593Smuzhiyun mutex_lock(&u->ring_cons_mutex);
336*4882a593Smuzhiyun spin_lock_irq(&u->ring_prod_lock);
337*4882a593Smuzhiyun
338*4882a593Smuzhiyun /*
339*4882a593Smuzhiyun * Copy the old ring contents to the new ring.
340*4882a593Smuzhiyun *
341*4882a593Smuzhiyun * To take care of wrapping, a full ring, and the new index
342*4882a593Smuzhiyun * pointing into the second half, simply copy the old contents
343*4882a593Smuzhiyun * twice.
344*4882a593Smuzhiyun *
345*4882a593Smuzhiyun * +---------+ +------------------+
346*4882a593Smuzhiyun * |34567 12| -> |34567 1234567 12|
347*4882a593Smuzhiyun * +-----p-c-+ +-------c------p---+
348*4882a593Smuzhiyun */
349*4882a593Smuzhiyun memcpy(new_ring, old_ring, u->ring_size * sizeof(*u->ring));
350*4882a593Smuzhiyun memcpy(new_ring + u->ring_size, old_ring,
351*4882a593Smuzhiyun u->ring_size * sizeof(*u->ring));
352*4882a593Smuzhiyun
353*4882a593Smuzhiyun u->ring = new_ring;
354*4882a593Smuzhiyun u->ring_size = new_size;
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun spin_unlock_irq(&u->ring_prod_lock);
357*4882a593Smuzhiyun mutex_unlock(&u->ring_cons_mutex);
358*4882a593Smuzhiyun
359*4882a593Smuzhiyun evtchn_free_ring(old_ring);
360*4882a593Smuzhiyun
361*4882a593Smuzhiyun return 0;
362*4882a593Smuzhiyun }
363*4882a593Smuzhiyun
evtchn_bind_to_user(struct per_user_data * u,evtchn_port_t port)364*4882a593Smuzhiyun static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
365*4882a593Smuzhiyun {
366*4882a593Smuzhiyun struct user_evtchn *evtchn;
367*4882a593Smuzhiyun struct evtchn_close close;
368*4882a593Smuzhiyun int rc = 0;
369*4882a593Smuzhiyun
370*4882a593Smuzhiyun /*
371*4882a593Smuzhiyun * Ports are never reused, so every caller should pass in a
372*4882a593Smuzhiyun * unique port.
373*4882a593Smuzhiyun *
374*4882a593Smuzhiyun * (Locking not necessary because we haven't registered the
375*4882a593Smuzhiyun * interrupt handler yet, and our caller has already
376*4882a593Smuzhiyun * serialized bind operations.)
377*4882a593Smuzhiyun */
378*4882a593Smuzhiyun
379*4882a593Smuzhiyun evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL);
380*4882a593Smuzhiyun if (!evtchn)
381*4882a593Smuzhiyun return -ENOMEM;
382*4882a593Smuzhiyun
383*4882a593Smuzhiyun evtchn->user = u;
384*4882a593Smuzhiyun evtchn->port = port;
385*4882a593Smuzhiyun evtchn->enabled = true; /* start enabled */
386*4882a593Smuzhiyun
387*4882a593Smuzhiyun rc = add_evtchn(u, evtchn);
388*4882a593Smuzhiyun if (rc < 0)
389*4882a593Smuzhiyun goto err;
390*4882a593Smuzhiyun
391*4882a593Smuzhiyun rc = evtchn_resize_ring(u);
392*4882a593Smuzhiyun if (rc < 0)
393*4882a593Smuzhiyun goto err;
394*4882a593Smuzhiyun
395*4882a593Smuzhiyun rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0,
396*4882a593Smuzhiyun u->name, evtchn);
397*4882a593Smuzhiyun if (rc < 0)
398*4882a593Smuzhiyun goto err;
399*4882a593Smuzhiyun
400*4882a593Smuzhiyun rc = evtchn_make_refcounted(port);
401*4882a593Smuzhiyun return rc;
402*4882a593Smuzhiyun
403*4882a593Smuzhiyun err:
404*4882a593Smuzhiyun /* bind failed, should close the port now */
405*4882a593Smuzhiyun close.port = port;
406*4882a593Smuzhiyun if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
407*4882a593Smuzhiyun BUG();
408*4882a593Smuzhiyun del_evtchn(u, evtchn);
409*4882a593Smuzhiyun return rc;
410*4882a593Smuzhiyun }
411*4882a593Smuzhiyun
evtchn_unbind_from_user(struct per_user_data * u,struct user_evtchn * evtchn)412*4882a593Smuzhiyun static void evtchn_unbind_from_user(struct per_user_data *u,
413*4882a593Smuzhiyun struct user_evtchn *evtchn)
414*4882a593Smuzhiyun {
415*4882a593Smuzhiyun int irq = irq_from_evtchn(evtchn->port);
416*4882a593Smuzhiyun
417*4882a593Smuzhiyun BUG_ON(irq < 0);
418*4882a593Smuzhiyun
419*4882a593Smuzhiyun unbind_from_irqhandler(irq, evtchn);
420*4882a593Smuzhiyun
421*4882a593Smuzhiyun del_evtchn(u, evtchn);
422*4882a593Smuzhiyun }
423*4882a593Smuzhiyun
424*4882a593Smuzhiyun static DEFINE_PER_CPU(int, bind_last_selected_cpu);
425*4882a593Smuzhiyun
evtchn_bind_interdom_next_vcpu(evtchn_port_t evtchn)426*4882a593Smuzhiyun static void evtchn_bind_interdom_next_vcpu(evtchn_port_t evtchn)
427*4882a593Smuzhiyun {
428*4882a593Smuzhiyun unsigned int selected_cpu, irq;
429*4882a593Smuzhiyun struct irq_desc *desc;
430*4882a593Smuzhiyun unsigned long flags;
431*4882a593Smuzhiyun
432*4882a593Smuzhiyun irq = irq_from_evtchn(evtchn);
433*4882a593Smuzhiyun desc = irq_to_desc(irq);
434*4882a593Smuzhiyun
435*4882a593Smuzhiyun if (!desc)
436*4882a593Smuzhiyun return;
437*4882a593Smuzhiyun
438*4882a593Smuzhiyun raw_spin_lock_irqsave(&desc->lock, flags);
439*4882a593Smuzhiyun selected_cpu = this_cpu_read(bind_last_selected_cpu);
440*4882a593Smuzhiyun selected_cpu = cpumask_next_and(selected_cpu,
441*4882a593Smuzhiyun desc->irq_common_data.affinity, cpu_online_mask);
442*4882a593Smuzhiyun
443*4882a593Smuzhiyun if (unlikely(selected_cpu >= nr_cpu_ids))
444*4882a593Smuzhiyun selected_cpu = cpumask_first_and(desc->irq_common_data.affinity,
445*4882a593Smuzhiyun cpu_online_mask);
446*4882a593Smuzhiyun
447*4882a593Smuzhiyun this_cpu_write(bind_last_selected_cpu, selected_cpu);
448*4882a593Smuzhiyun
449*4882a593Smuzhiyun /* unmask expects irqs to be disabled */
450*4882a593Smuzhiyun xen_set_affinity_evtchn(desc, selected_cpu);
451*4882a593Smuzhiyun raw_spin_unlock_irqrestore(&desc->lock, flags);
452*4882a593Smuzhiyun }
453*4882a593Smuzhiyun
evtchn_ioctl(struct file * file,unsigned int cmd,unsigned long arg)454*4882a593Smuzhiyun static long evtchn_ioctl(struct file *file,
455*4882a593Smuzhiyun unsigned int cmd, unsigned long arg)
456*4882a593Smuzhiyun {
457*4882a593Smuzhiyun int rc;
458*4882a593Smuzhiyun struct per_user_data *u = file->private_data;
459*4882a593Smuzhiyun void __user *uarg = (void __user *) arg;
460*4882a593Smuzhiyun
461*4882a593Smuzhiyun /* Prevent bind from racing with unbind */
462*4882a593Smuzhiyun mutex_lock(&u->bind_mutex);
463*4882a593Smuzhiyun
464*4882a593Smuzhiyun switch (cmd) {
465*4882a593Smuzhiyun case IOCTL_EVTCHN_BIND_VIRQ: {
466*4882a593Smuzhiyun struct ioctl_evtchn_bind_virq bind;
467*4882a593Smuzhiyun struct evtchn_bind_virq bind_virq;
468*4882a593Smuzhiyun
469*4882a593Smuzhiyun rc = -EACCES;
470*4882a593Smuzhiyun if (u->restrict_domid != UNRESTRICTED_DOMID)
471*4882a593Smuzhiyun break;
472*4882a593Smuzhiyun
473*4882a593Smuzhiyun rc = -EFAULT;
474*4882a593Smuzhiyun if (copy_from_user(&bind, uarg, sizeof(bind)))
475*4882a593Smuzhiyun break;
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun bind_virq.virq = bind.virq;
478*4882a593Smuzhiyun bind_virq.vcpu = xen_vcpu_nr(0);
479*4882a593Smuzhiyun rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
480*4882a593Smuzhiyun &bind_virq);
481*4882a593Smuzhiyun if (rc != 0)
482*4882a593Smuzhiyun break;
483*4882a593Smuzhiyun
484*4882a593Smuzhiyun rc = evtchn_bind_to_user(u, bind_virq.port);
485*4882a593Smuzhiyun if (rc == 0)
486*4882a593Smuzhiyun rc = bind_virq.port;
487*4882a593Smuzhiyun break;
488*4882a593Smuzhiyun }
489*4882a593Smuzhiyun
490*4882a593Smuzhiyun case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
491*4882a593Smuzhiyun struct ioctl_evtchn_bind_interdomain bind;
492*4882a593Smuzhiyun struct evtchn_bind_interdomain bind_interdomain;
493*4882a593Smuzhiyun
494*4882a593Smuzhiyun rc = -EFAULT;
495*4882a593Smuzhiyun if (copy_from_user(&bind, uarg, sizeof(bind)))
496*4882a593Smuzhiyun break;
497*4882a593Smuzhiyun
498*4882a593Smuzhiyun rc = -EACCES;
499*4882a593Smuzhiyun if (u->restrict_domid != UNRESTRICTED_DOMID &&
500*4882a593Smuzhiyun u->restrict_domid != bind.remote_domain)
501*4882a593Smuzhiyun break;
502*4882a593Smuzhiyun
503*4882a593Smuzhiyun bind_interdomain.remote_dom = bind.remote_domain;
504*4882a593Smuzhiyun bind_interdomain.remote_port = bind.remote_port;
505*4882a593Smuzhiyun rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
506*4882a593Smuzhiyun &bind_interdomain);
507*4882a593Smuzhiyun if (rc != 0)
508*4882a593Smuzhiyun break;
509*4882a593Smuzhiyun
510*4882a593Smuzhiyun rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
511*4882a593Smuzhiyun if (rc == 0) {
512*4882a593Smuzhiyun rc = bind_interdomain.local_port;
513*4882a593Smuzhiyun evtchn_bind_interdom_next_vcpu(rc);
514*4882a593Smuzhiyun }
515*4882a593Smuzhiyun break;
516*4882a593Smuzhiyun }
517*4882a593Smuzhiyun
518*4882a593Smuzhiyun case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
519*4882a593Smuzhiyun struct ioctl_evtchn_bind_unbound_port bind;
520*4882a593Smuzhiyun struct evtchn_alloc_unbound alloc_unbound;
521*4882a593Smuzhiyun
522*4882a593Smuzhiyun rc = -EACCES;
523*4882a593Smuzhiyun if (u->restrict_domid != UNRESTRICTED_DOMID)
524*4882a593Smuzhiyun break;
525*4882a593Smuzhiyun
526*4882a593Smuzhiyun rc = -EFAULT;
527*4882a593Smuzhiyun if (copy_from_user(&bind, uarg, sizeof(bind)))
528*4882a593Smuzhiyun break;
529*4882a593Smuzhiyun
530*4882a593Smuzhiyun alloc_unbound.dom = DOMID_SELF;
531*4882a593Smuzhiyun alloc_unbound.remote_dom = bind.remote_domain;
532*4882a593Smuzhiyun rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
533*4882a593Smuzhiyun &alloc_unbound);
534*4882a593Smuzhiyun if (rc != 0)
535*4882a593Smuzhiyun break;
536*4882a593Smuzhiyun
537*4882a593Smuzhiyun rc = evtchn_bind_to_user(u, alloc_unbound.port);
538*4882a593Smuzhiyun if (rc == 0)
539*4882a593Smuzhiyun rc = alloc_unbound.port;
540*4882a593Smuzhiyun break;
541*4882a593Smuzhiyun }
542*4882a593Smuzhiyun
543*4882a593Smuzhiyun case IOCTL_EVTCHN_UNBIND: {
544*4882a593Smuzhiyun struct ioctl_evtchn_unbind unbind;
545*4882a593Smuzhiyun struct user_evtchn *evtchn;
546*4882a593Smuzhiyun
547*4882a593Smuzhiyun rc = -EFAULT;
548*4882a593Smuzhiyun if (copy_from_user(&unbind, uarg, sizeof(unbind)))
549*4882a593Smuzhiyun break;
550*4882a593Smuzhiyun
551*4882a593Smuzhiyun rc = -EINVAL;
552*4882a593Smuzhiyun if (unbind.port >= xen_evtchn_nr_channels())
553*4882a593Smuzhiyun break;
554*4882a593Smuzhiyun
555*4882a593Smuzhiyun rc = -ENOTCONN;
556*4882a593Smuzhiyun evtchn = find_evtchn(u, unbind.port);
557*4882a593Smuzhiyun if (!evtchn)
558*4882a593Smuzhiyun break;
559*4882a593Smuzhiyun
560*4882a593Smuzhiyun disable_irq(irq_from_evtchn(unbind.port));
561*4882a593Smuzhiyun evtchn_unbind_from_user(u, evtchn);
562*4882a593Smuzhiyun rc = 0;
563*4882a593Smuzhiyun break;
564*4882a593Smuzhiyun }
565*4882a593Smuzhiyun
566*4882a593Smuzhiyun case IOCTL_EVTCHN_NOTIFY: {
567*4882a593Smuzhiyun struct ioctl_evtchn_notify notify;
568*4882a593Smuzhiyun struct user_evtchn *evtchn;
569*4882a593Smuzhiyun
570*4882a593Smuzhiyun rc = -EFAULT;
571*4882a593Smuzhiyun if (copy_from_user(¬ify, uarg, sizeof(notify)))
572*4882a593Smuzhiyun break;
573*4882a593Smuzhiyun
574*4882a593Smuzhiyun rc = -ENOTCONN;
575*4882a593Smuzhiyun evtchn = find_evtchn(u, notify.port);
576*4882a593Smuzhiyun if (evtchn) {
577*4882a593Smuzhiyun notify_remote_via_evtchn(notify.port);
578*4882a593Smuzhiyun rc = 0;
579*4882a593Smuzhiyun }
580*4882a593Smuzhiyun break;
581*4882a593Smuzhiyun }
582*4882a593Smuzhiyun
583*4882a593Smuzhiyun case IOCTL_EVTCHN_RESET: {
584*4882a593Smuzhiyun /* Initialise the ring to empty. Clear errors. */
585*4882a593Smuzhiyun mutex_lock(&u->ring_cons_mutex);
586*4882a593Smuzhiyun spin_lock_irq(&u->ring_prod_lock);
587*4882a593Smuzhiyun u->ring_cons = u->ring_prod = u->ring_overflow = 0;
588*4882a593Smuzhiyun spin_unlock_irq(&u->ring_prod_lock);
589*4882a593Smuzhiyun mutex_unlock(&u->ring_cons_mutex);
590*4882a593Smuzhiyun rc = 0;
591*4882a593Smuzhiyun break;
592*4882a593Smuzhiyun }
593*4882a593Smuzhiyun
594*4882a593Smuzhiyun case IOCTL_EVTCHN_RESTRICT_DOMID: {
595*4882a593Smuzhiyun struct ioctl_evtchn_restrict_domid ierd;
596*4882a593Smuzhiyun
597*4882a593Smuzhiyun rc = -EACCES;
598*4882a593Smuzhiyun if (u->restrict_domid != UNRESTRICTED_DOMID)
599*4882a593Smuzhiyun break;
600*4882a593Smuzhiyun
601*4882a593Smuzhiyun rc = -EFAULT;
602*4882a593Smuzhiyun if (copy_from_user(&ierd, uarg, sizeof(ierd)))
603*4882a593Smuzhiyun break;
604*4882a593Smuzhiyun
605*4882a593Smuzhiyun rc = -EINVAL;
606*4882a593Smuzhiyun if (ierd.domid == 0 || ierd.domid >= DOMID_FIRST_RESERVED)
607*4882a593Smuzhiyun break;
608*4882a593Smuzhiyun
609*4882a593Smuzhiyun u->restrict_domid = ierd.domid;
610*4882a593Smuzhiyun rc = 0;
611*4882a593Smuzhiyun
612*4882a593Smuzhiyun break;
613*4882a593Smuzhiyun }
614*4882a593Smuzhiyun
615*4882a593Smuzhiyun default:
616*4882a593Smuzhiyun rc = -ENOSYS;
617*4882a593Smuzhiyun break;
618*4882a593Smuzhiyun }
619*4882a593Smuzhiyun mutex_unlock(&u->bind_mutex);
620*4882a593Smuzhiyun
621*4882a593Smuzhiyun return rc;
622*4882a593Smuzhiyun }
623*4882a593Smuzhiyun
evtchn_poll(struct file * file,poll_table * wait)624*4882a593Smuzhiyun static __poll_t evtchn_poll(struct file *file, poll_table *wait)
625*4882a593Smuzhiyun {
626*4882a593Smuzhiyun __poll_t mask = EPOLLOUT | EPOLLWRNORM;
627*4882a593Smuzhiyun struct per_user_data *u = file->private_data;
628*4882a593Smuzhiyun
629*4882a593Smuzhiyun poll_wait(file, &u->evtchn_wait, wait);
630*4882a593Smuzhiyun if (u->ring_cons != u->ring_prod)
631*4882a593Smuzhiyun mask |= EPOLLIN | EPOLLRDNORM;
632*4882a593Smuzhiyun if (u->ring_overflow)
633*4882a593Smuzhiyun mask = EPOLLERR;
634*4882a593Smuzhiyun return mask;
635*4882a593Smuzhiyun }
636*4882a593Smuzhiyun
evtchn_fasync(int fd,struct file * filp,int on)637*4882a593Smuzhiyun static int evtchn_fasync(int fd, struct file *filp, int on)
638*4882a593Smuzhiyun {
639*4882a593Smuzhiyun struct per_user_data *u = filp->private_data;
640*4882a593Smuzhiyun return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
641*4882a593Smuzhiyun }
642*4882a593Smuzhiyun
evtchn_open(struct inode * inode,struct file * filp)643*4882a593Smuzhiyun static int evtchn_open(struct inode *inode, struct file *filp)
644*4882a593Smuzhiyun {
645*4882a593Smuzhiyun struct per_user_data *u;
646*4882a593Smuzhiyun
647*4882a593Smuzhiyun u = kzalloc(sizeof(*u), GFP_KERNEL);
648*4882a593Smuzhiyun if (u == NULL)
649*4882a593Smuzhiyun return -ENOMEM;
650*4882a593Smuzhiyun
651*4882a593Smuzhiyun u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm);
652*4882a593Smuzhiyun if (u->name == NULL) {
653*4882a593Smuzhiyun kfree(u);
654*4882a593Smuzhiyun return -ENOMEM;
655*4882a593Smuzhiyun }
656*4882a593Smuzhiyun
657*4882a593Smuzhiyun init_waitqueue_head(&u->evtchn_wait);
658*4882a593Smuzhiyun
659*4882a593Smuzhiyun mutex_init(&u->bind_mutex);
660*4882a593Smuzhiyun mutex_init(&u->ring_cons_mutex);
661*4882a593Smuzhiyun spin_lock_init(&u->ring_prod_lock);
662*4882a593Smuzhiyun
663*4882a593Smuzhiyun u->restrict_domid = UNRESTRICTED_DOMID;
664*4882a593Smuzhiyun
665*4882a593Smuzhiyun filp->private_data = u;
666*4882a593Smuzhiyun
667*4882a593Smuzhiyun return stream_open(inode, filp);
668*4882a593Smuzhiyun }
669*4882a593Smuzhiyun
evtchn_release(struct inode * inode,struct file * filp)670*4882a593Smuzhiyun static int evtchn_release(struct inode *inode, struct file *filp)
671*4882a593Smuzhiyun {
672*4882a593Smuzhiyun struct per_user_data *u = filp->private_data;
673*4882a593Smuzhiyun struct rb_node *node;
674*4882a593Smuzhiyun
675*4882a593Smuzhiyun while ((node = u->evtchns.rb_node)) {
676*4882a593Smuzhiyun struct user_evtchn *evtchn;
677*4882a593Smuzhiyun
678*4882a593Smuzhiyun evtchn = rb_entry(node, struct user_evtchn, node);
679*4882a593Smuzhiyun disable_irq(irq_from_evtchn(evtchn->port));
680*4882a593Smuzhiyun evtchn_unbind_from_user(u, evtchn);
681*4882a593Smuzhiyun }
682*4882a593Smuzhiyun
683*4882a593Smuzhiyun evtchn_free_ring(u->ring);
684*4882a593Smuzhiyun kfree(u->name);
685*4882a593Smuzhiyun kfree(u);
686*4882a593Smuzhiyun
687*4882a593Smuzhiyun return 0;
688*4882a593Smuzhiyun }
689*4882a593Smuzhiyun
690*4882a593Smuzhiyun static const struct file_operations evtchn_fops = {
691*4882a593Smuzhiyun .owner = THIS_MODULE,
692*4882a593Smuzhiyun .read = evtchn_read,
693*4882a593Smuzhiyun .write = evtchn_write,
694*4882a593Smuzhiyun .unlocked_ioctl = evtchn_ioctl,
695*4882a593Smuzhiyun .poll = evtchn_poll,
696*4882a593Smuzhiyun .fasync = evtchn_fasync,
697*4882a593Smuzhiyun .open = evtchn_open,
698*4882a593Smuzhiyun .release = evtchn_release,
699*4882a593Smuzhiyun .llseek = no_llseek,
700*4882a593Smuzhiyun };
701*4882a593Smuzhiyun
702*4882a593Smuzhiyun static struct miscdevice evtchn_miscdev = {
703*4882a593Smuzhiyun .minor = MISC_DYNAMIC_MINOR,
704*4882a593Smuzhiyun .name = "xen/evtchn",
705*4882a593Smuzhiyun .fops = &evtchn_fops,
706*4882a593Smuzhiyun };
evtchn_init(void)707*4882a593Smuzhiyun static int __init evtchn_init(void)
708*4882a593Smuzhiyun {
709*4882a593Smuzhiyun int err;
710*4882a593Smuzhiyun
711*4882a593Smuzhiyun if (!xen_domain())
712*4882a593Smuzhiyun return -ENODEV;
713*4882a593Smuzhiyun
714*4882a593Smuzhiyun /* Create '/dev/xen/evtchn'. */
715*4882a593Smuzhiyun err = misc_register(&evtchn_miscdev);
716*4882a593Smuzhiyun if (err != 0) {
717*4882a593Smuzhiyun pr_err("Could not register /dev/xen/evtchn\n");
718*4882a593Smuzhiyun return err;
719*4882a593Smuzhiyun }
720*4882a593Smuzhiyun
721*4882a593Smuzhiyun pr_info("Event-channel device installed\n");
722*4882a593Smuzhiyun
723*4882a593Smuzhiyun return 0;
724*4882a593Smuzhiyun }
725*4882a593Smuzhiyun
evtchn_cleanup(void)726*4882a593Smuzhiyun static void __exit evtchn_cleanup(void)
727*4882a593Smuzhiyun {
728*4882a593Smuzhiyun misc_deregister(&evtchn_miscdev);
729*4882a593Smuzhiyun }
730*4882a593Smuzhiyun
731*4882a593Smuzhiyun module_init(evtchn_init);
732*4882a593Smuzhiyun module_exit(evtchn_cleanup);
733*4882a593Smuzhiyun
734*4882a593Smuzhiyun MODULE_LICENSE("GPL");
735