xref: /OK3568_Linux_fs/kernel/virt/kvm/eventfd.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * kvm eventfd support - use eventfd objects to signal various KVM events
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright 2009 Novell.  All Rights Reserved.
6*4882a593Smuzhiyun  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
7*4882a593Smuzhiyun  *
8*4882a593Smuzhiyun  * Author:
9*4882a593Smuzhiyun  *	Gregory Haskins <ghaskins@novell.com>
10*4882a593Smuzhiyun  */
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun #include <linux/kvm_host.h>
13*4882a593Smuzhiyun #include <linux/kvm.h>
14*4882a593Smuzhiyun #include <linux/kvm_irqfd.h>
15*4882a593Smuzhiyun #include <linux/workqueue.h>
16*4882a593Smuzhiyun #include <linux/syscalls.h>
17*4882a593Smuzhiyun #include <linux/wait.h>
18*4882a593Smuzhiyun #include <linux/poll.h>
19*4882a593Smuzhiyun #include <linux/file.h>
20*4882a593Smuzhiyun #include <linux/list.h>
21*4882a593Smuzhiyun #include <linux/eventfd.h>
22*4882a593Smuzhiyun #include <linux/kernel.h>
23*4882a593Smuzhiyun #include <linux/srcu.h>
24*4882a593Smuzhiyun #include <linux/slab.h>
25*4882a593Smuzhiyun #include <linux/seqlock.h>
26*4882a593Smuzhiyun #include <linux/irqbypass.h>
27*4882a593Smuzhiyun #include <trace/events/kvm.h>
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun #include <kvm/iodev.h>
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun #ifdef CONFIG_HAVE_KVM_IRQFD
32*4882a593Smuzhiyun 
33*4882a593Smuzhiyun static struct workqueue_struct *irqfd_cleanup_wq;
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun bool __attribute__((weak))
kvm_arch_irqfd_allowed(struct kvm * kvm,struct kvm_irqfd * args)36*4882a593Smuzhiyun kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args)
37*4882a593Smuzhiyun {
38*4882a593Smuzhiyun 	return true;
39*4882a593Smuzhiyun }
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun static void
irqfd_inject(struct work_struct * work)42*4882a593Smuzhiyun irqfd_inject(struct work_struct *work)
43*4882a593Smuzhiyun {
44*4882a593Smuzhiyun 	struct kvm_kernel_irqfd *irqfd =
45*4882a593Smuzhiyun 		container_of(work, struct kvm_kernel_irqfd, inject);
46*4882a593Smuzhiyun 	struct kvm *kvm = irqfd->kvm;
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun 	if (!irqfd->resampler) {
49*4882a593Smuzhiyun 		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
50*4882a593Smuzhiyun 				false);
51*4882a593Smuzhiyun 		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
52*4882a593Smuzhiyun 				false);
53*4882a593Smuzhiyun 	} else
54*4882a593Smuzhiyun 		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
55*4882a593Smuzhiyun 			    irqfd->gsi, 1, false);
56*4882a593Smuzhiyun }
57*4882a593Smuzhiyun 
58*4882a593Smuzhiyun /*
59*4882a593Smuzhiyun  * Since resampler irqfds share an IRQ source ID, we de-assert once
60*4882a593Smuzhiyun  * then notify all of the resampler irqfds using this GSI.  We can't
61*4882a593Smuzhiyun  * do multiple de-asserts or we risk racing with incoming re-asserts.
62*4882a593Smuzhiyun  */
63*4882a593Smuzhiyun static void
irqfd_resampler_ack(struct kvm_irq_ack_notifier * kian)64*4882a593Smuzhiyun irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
65*4882a593Smuzhiyun {
66*4882a593Smuzhiyun 	struct kvm_kernel_irqfd_resampler *resampler;
67*4882a593Smuzhiyun 	struct kvm *kvm;
68*4882a593Smuzhiyun 	struct kvm_kernel_irqfd *irqfd;
69*4882a593Smuzhiyun 	int idx;
70*4882a593Smuzhiyun 
71*4882a593Smuzhiyun 	resampler = container_of(kian,
72*4882a593Smuzhiyun 			struct kvm_kernel_irqfd_resampler, notifier);
73*4882a593Smuzhiyun 	kvm = resampler->kvm;
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun 	kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
76*4882a593Smuzhiyun 		    resampler->notifier.gsi, 0, false);
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun 	idx = srcu_read_lock(&kvm->irq_srcu);
79*4882a593Smuzhiyun 
80*4882a593Smuzhiyun 	list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
81*4882a593Smuzhiyun 		eventfd_signal(irqfd->resamplefd, 1);
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun 	srcu_read_unlock(&kvm->irq_srcu, idx);
84*4882a593Smuzhiyun }
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun static void
irqfd_resampler_shutdown(struct kvm_kernel_irqfd * irqfd)87*4882a593Smuzhiyun irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
88*4882a593Smuzhiyun {
89*4882a593Smuzhiyun 	struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
90*4882a593Smuzhiyun 	struct kvm *kvm = resampler->kvm;
91*4882a593Smuzhiyun 
92*4882a593Smuzhiyun 	mutex_lock(&kvm->irqfds.resampler_lock);
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun 	list_del_rcu(&irqfd->resampler_link);
95*4882a593Smuzhiyun 	synchronize_srcu(&kvm->irq_srcu);
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun 	if (list_empty(&resampler->list)) {
98*4882a593Smuzhiyun 		list_del(&resampler->link);
99*4882a593Smuzhiyun 		kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
100*4882a593Smuzhiyun 		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
101*4882a593Smuzhiyun 			    resampler->notifier.gsi, 0, false);
102*4882a593Smuzhiyun 		kfree(resampler);
103*4882a593Smuzhiyun 	}
104*4882a593Smuzhiyun 
105*4882a593Smuzhiyun 	mutex_unlock(&kvm->irqfds.resampler_lock);
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun 
108*4882a593Smuzhiyun /*
109*4882a593Smuzhiyun  * Race-free decouple logic (ordering is critical)
110*4882a593Smuzhiyun  */
111*4882a593Smuzhiyun static void
irqfd_shutdown(struct work_struct * work)112*4882a593Smuzhiyun irqfd_shutdown(struct work_struct *work)
113*4882a593Smuzhiyun {
114*4882a593Smuzhiyun 	struct kvm_kernel_irqfd *irqfd =
115*4882a593Smuzhiyun 		container_of(work, struct kvm_kernel_irqfd, shutdown);
116*4882a593Smuzhiyun 	struct kvm *kvm = irqfd->kvm;
117*4882a593Smuzhiyun 	u64 cnt;
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun 	/* Make sure irqfd has been initialized in assign path. */
120*4882a593Smuzhiyun 	synchronize_srcu(&kvm->irq_srcu);
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun 	/*
123*4882a593Smuzhiyun 	 * Synchronize with the wait-queue and unhook ourselves to prevent
124*4882a593Smuzhiyun 	 * further events.
125*4882a593Smuzhiyun 	 */
126*4882a593Smuzhiyun 	eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
127*4882a593Smuzhiyun 
128*4882a593Smuzhiyun 	/*
129*4882a593Smuzhiyun 	 * We know no new events will be scheduled at this point, so block
130*4882a593Smuzhiyun 	 * until all previously outstanding events have completed
131*4882a593Smuzhiyun 	 */
132*4882a593Smuzhiyun 	flush_work(&irqfd->inject);
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun 	if (irqfd->resampler) {
135*4882a593Smuzhiyun 		irqfd_resampler_shutdown(irqfd);
136*4882a593Smuzhiyun 		eventfd_ctx_put(irqfd->resamplefd);
137*4882a593Smuzhiyun 	}
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun 	/*
140*4882a593Smuzhiyun 	 * It is now safe to release the object's resources
141*4882a593Smuzhiyun 	 */
142*4882a593Smuzhiyun #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
143*4882a593Smuzhiyun 	irq_bypass_unregister_consumer(&irqfd->consumer);
144*4882a593Smuzhiyun #endif
145*4882a593Smuzhiyun 	eventfd_ctx_put(irqfd->eventfd);
146*4882a593Smuzhiyun 	kfree(irqfd);
147*4882a593Smuzhiyun }
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 
150*4882a593Smuzhiyun /* assumes kvm->irqfds.lock is held */
151*4882a593Smuzhiyun static bool
irqfd_is_active(struct kvm_kernel_irqfd * irqfd)152*4882a593Smuzhiyun irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
153*4882a593Smuzhiyun {
154*4882a593Smuzhiyun 	return list_empty(&irqfd->list) ? false : true;
155*4882a593Smuzhiyun }
156*4882a593Smuzhiyun 
157*4882a593Smuzhiyun /*
158*4882a593Smuzhiyun  * Mark the irqfd as inactive and schedule it for removal
159*4882a593Smuzhiyun  *
160*4882a593Smuzhiyun  * assumes kvm->irqfds.lock is held
161*4882a593Smuzhiyun  */
162*4882a593Smuzhiyun static void
irqfd_deactivate(struct kvm_kernel_irqfd * irqfd)163*4882a593Smuzhiyun irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
164*4882a593Smuzhiyun {
165*4882a593Smuzhiyun 	BUG_ON(!irqfd_is_active(irqfd));
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun 	list_del_init(&irqfd->list);
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun 	queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
170*4882a593Smuzhiyun }
171*4882a593Smuzhiyun 
kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry * irq,struct kvm * kvm,int irq_source_id,int level,bool line_status)172*4882a593Smuzhiyun int __attribute__((weak)) kvm_arch_set_irq_inatomic(
173*4882a593Smuzhiyun 				struct kvm_kernel_irq_routing_entry *irq,
174*4882a593Smuzhiyun 				struct kvm *kvm, int irq_source_id,
175*4882a593Smuzhiyun 				int level,
176*4882a593Smuzhiyun 				bool line_status)
177*4882a593Smuzhiyun {
178*4882a593Smuzhiyun 	return -EWOULDBLOCK;
179*4882a593Smuzhiyun }
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun /*
182*4882a593Smuzhiyun  * Called with wqh->lock held and interrupts disabled
183*4882a593Smuzhiyun  */
184*4882a593Smuzhiyun static int
irqfd_wakeup(wait_queue_entry_t * wait,unsigned mode,int sync,void * key)185*4882a593Smuzhiyun irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
186*4882a593Smuzhiyun {
187*4882a593Smuzhiyun 	struct kvm_kernel_irqfd *irqfd =
188*4882a593Smuzhiyun 		container_of(wait, struct kvm_kernel_irqfd, wait);
189*4882a593Smuzhiyun 	__poll_t flags = key_to_poll(key);
190*4882a593Smuzhiyun 	struct kvm_kernel_irq_routing_entry irq;
191*4882a593Smuzhiyun 	struct kvm *kvm = irqfd->kvm;
192*4882a593Smuzhiyun 	unsigned seq;
193*4882a593Smuzhiyun 	int idx;
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun 	if (flags & EPOLLIN) {
196*4882a593Smuzhiyun 		idx = srcu_read_lock(&kvm->irq_srcu);
197*4882a593Smuzhiyun 		do {
198*4882a593Smuzhiyun 			seq = read_seqcount_begin(&irqfd->irq_entry_sc);
199*4882a593Smuzhiyun 			irq = irqfd->irq_entry;
200*4882a593Smuzhiyun 		} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
201*4882a593Smuzhiyun 		/* An event has been signaled, inject an interrupt */
202*4882a593Smuzhiyun 		if (kvm_arch_set_irq_inatomic(&irq, kvm,
203*4882a593Smuzhiyun 					      KVM_USERSPACE_IRQ_SOURCE_ID, 1,
204*4882a593Smuzhiyun 					      false) == -EWOULDBLOCK)
205*4882a593Smuzhiyun 			schedule_work(&irqfd->inject);
206*4882a593Smuzhiyun 		srcu_read_unlock(&kvm->irq_srcu, idx);
207*4882a593Smuzhiyun 	}
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun 	if (flags & EPOLLHUP) {
210*4882a593Smuzhiyun 		/* The eventfd is closing, detach from KVM */
211*4882a593Smuzhiyun 		unsigned long iflags;
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun 		spin_lock_irqsave(&kvm->irqfds.lock, iflags);
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 		/*
216*4882a593Smuzhiyun 		 * We must check if someone deactivated the irqfd before
217*4882a593Smuzhiyun 		 * we could acquire the irqfds.lock since the item is
218*4882a593Smuzhiyun 		 * deactivated from the KVM side before it is unhooked from
219*4882a593Smuzhiyun 		 * the wait-queue.  If it is already deactivated, we can
220*4882a593Smuzhiyun 		 * simply return knowing the other side will cleanup for us.
221*4882a593Smuzhiyun 		 * We cannot race against the irqfd going away since the
222*4882a593Smuzhiyun 		 * other side is required to acquire wqh->lock, which we hold
223*4882a593Smuzhiyun 		 */
224*4882a593Smuzhiyun 		if (irqfd_is_active(irqfd))
225*4882a593Smuzhiyun 			irqfd_deactivate(irqfd);
226*4882a593Smuzhiyun 
227*4882a593Smuzhiyun 		spin_unlock_irqrestore(&kvm->irqfds.lock, iflags);
228*4882a593Smuzhiyun 	}
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 	return 0;
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun static void
irqfd_ptable_queue_proc(struct file * file,wait_queue_head_t * wqh,poll_table * pt)234*4882a593Smuzhiyun irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
235*4882a593Smuzhiyun 			poll_table *pt)
236*4882a593Smuzhiyun {
237*4882a593Smuzhiyun 	struct kvm_kernel_irqfd *irqfd =
238*4882a593Smuzhiyun 		container_of(pt, struct kvm_kernel_irqfd, pt);
239*4882a593Smuzhiyun 	add_wait_queue(wqh, &irqfd->wait);
240*4882a593Smuzhiyun }
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun /* Must be called under irqfds.lock */
irqfd_update(struct kvm * kvm,struct kvm_kernel_irqfd * irqfd)243*4882a593Smuzhiyun static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
244*4882a593Smuzhiyun {
245*4882a593Smuzhiyun 	struct kvm_kernel_irq_routing_entry *e;
246*4882a593Smuzhiyun 	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
247*4882a593Smuzhiyun 	int n_entries;
248*4882a593Smuzhiyun 
249*4882a593Smuzhiyun 	n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	write_seqcount_begin(&irqfd->irq_entry_sc);
252*4882a593Smuzhiyun 
253*4882a593Smuzhiyun 	e = entries;
254*4882a593Smuzhiyun 	if (n_entries == 1)
255*4882a593Smuzhiyun 		irqfd->irq_entry = *e;
256*4882a593Smuzhiyun 	else
257*4882a593Smuzhiyun 		irqfd->irq_entry.type = 0;
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun 	write_seqcount_end(&irqfd->irq_entry_sc);
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
kvm_arch_irq_bypass_stop(struct irq_bypass_consumer * cons)263*4882a593Smuzhiyun void __attribute__((weak)) kvm_arch_irq_bypass_stop(
264*4882a593Smuzhiyun 				struct irq_bypass_consumer *cons)
265*4882a593Smuzhiyun {
266*4882a593Smuzhiyun }
267*4882a593Smuzhiyun 
kvm_arch_irq_bypass_start(struct irq_bypass_consumer * cons)268*4882a593Smuzhiyun void __attribute__((weak)) kvm_arch_irq_bypass_start(
269*4882a593Smuzhiyun 				struct irq_bypass_consumer *cons)
270*4882a593Smuzhiyun {
271*4882a593Smuzhiyun }
272*4882a593Smuzhiyun 
kvm_arch_update_irqfd_routing(struct kvm * kvm,unsigned int host_irq,uint32_t guest_irq,bool set)273*4882a593Smuzhiyun int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
274*4882a593Smuzhiyun 				struct kvm *kvm, unsigned int host_irq,
275*4882a593Smuzhiyun 				uint32_t guest_irq, bool set)
276*4882a593Smuzhiyun {
277*4882a593Smuzhiyun 	return 0;
278*4882a593Smuzhiyun }
279*4882a593Smuzhiyun #endif
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun static int
kvm_irqfd_assign(struct kvm * kvm,struct kvm_irqfd * args)282*4882a593Smuzhiyun kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
283*4882a593Smuzhiyun {
284*4882a593Smuzhiyun 	struct kvm_kernel_irqfd *irqfd, *tmp;
285*4882a593Smuzhiyun 	struct fd f;
286*4882a593Smuzhiyun 	struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
287*4882a593Smuzhiyun 	int ret;
288*4882a593Smuzhiyun 	__poll_t events;
289*4882a593Smuzhiyun 	int idx;
290*4882a593Smuzhiyun 
291*4882a593Smuzhiyun 	if (!kvm_arch_intc_initialized(kvm))
292*4882a593Smuzhiyun 		return -EAGAIN;
293*4882a593Smuzhiyun 
294*4882a593Smuzhiyun 	if (!kvm_arch_irqfd_allowed(kvm, args))
295*4882a593Smuzhiyun 		return -EINVAL;
296*4882a593Smuzhiyun 
297*4882a593Smuzhiyun 	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT);
298*4882a593Smuzhiyun 	if (!irqfd)
299*4882a593Smuzhiyun 		return -ENOMEM;
300*4882a593Smuzhiyun 
301*4882a593Smuzhiyun 	irqfd->kvm = kvm;
302*4882a593Smuzhiyun 	irqfd->gsi = args->gsi;
303*4882a593Smuzhiyun 	INIT_LIST_HEAD(&irqfd->list);
304*4882a593Smuzhiyun 	INIT_WORK(&irqfd->inject, irqfd_inject);
305*4882a593Smuzhiyun 	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
306*4882a593Smuzhiyun 	seqcount_spinlock_init(&irqfd->irq_entry_sc, &kvm->irqfds.lock);
307*4882a593Smuzhiyun 
308*4882a593Smuzhiyun 	f = fdget(args->fd);
309*4882a593Smuzhiyun 	if (!f.file) {
310*4882a593Smuzhiyun 		ret = -EBADF;
311*4882a593Smuzhiyun 		goto out;
312*4882a593Smuzhiyun 	}
313*4882a593Smuzhiyun 
314*4882a593Smuzhiyun 	eventfd = eventfd_ctx_fileget(f.file);
315*4882a593Smuzhiyun 	if (IS_ERR(eventfd)) {
316*4882a593Smuzhiyun 		ret = PTR_ERR(eventfd);
317*4882a593Smuzhiyun 		goto fail;
318*4882a593Smuzhiyun 	}
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun 	irqfd->eventfd = eventfd;
321*4882a593Smuzhiyun 
322*4882a593Smuzhiyun 	if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
323*4882a593Smuzhiyun 		struct kvm_kernel_irqfd_resampler *resampler;
324*4882a593Smuzhiyun 
325*4882a593Smuzhiyun 		resamplefd = eventfd_ctx_fdget(args->resamplefd);
326*4882a593Smuzhiyun 		if (IS_ERR(resamplefd)) {
327*4882a593Smuzhiyun 			ret = PTR_ERR(resamplefd);
328*4882a593Smuzhiyun 			goto fail;
329*4882a593Smuzhiyun 		}
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun 		irqfd->resamplefd = resamplefd;
332*4882a593Smuzhiyun 		INIT_LIST_HEAD(&irqfd->resampler_link);
333*4882a593Smuzhiyun 
334*4882a593Smuzhiyun 		mutex_lock(&kvm->irqfds.resampler_lock);
335*4882a593Smuzhiyun 
336*4882a593Smuzhiyun 		list_for_each_entry(resampler,
337*4882a593Smuzhiyun 				    &kvm->irqfds.resampler_list, link) {
338*4882a593Smuzhiyun 			if (resampler->notifier.gsi == irqfd->gsi) {
339*4882a593Smuzhiyun 				irqfd->resampler = resampler;
340*4882a593Smuzhiyun 				break;
341*4882a593Smuzhiyun 			}
342*4882a593Smuzhiyun 		}
343*4882a593Smuzhiyun 
344*4882a593Smuzhiyun 		if (!irqfd->resampler) {
345*4882a593Smuzhiyun 			resampler = kzalloc(sizeof(*resampler),
346*4882a593Smuzhiyun 					    GFP_KERNEL_ACCOUNT);
347*4882a593Smuzhiyun 			if (!resampler) {
348*4882a593Smuzhiyun 				ret = -ENOMEM;
349*4882a593Smuzhiyun 				mutex_unlock(&kvm->irqfds.resampler_lock);
350*4882a593Smuzhiyun 				goto fail;
351*4882a593Smuzhiyun 			}
352*4882a593Smuzhiyun 
353*4882a593Smuzhiyun 			resampler->kvm = kvm;
354*4882a593Smuzhiyun 			INIT_LIST_HEAD(&resampler->list);
355*4882a593Smuzhiyun 			resampler->notifier.gsi = irqfd->gsi;
356*4882a593Smuzhiyun 			resampler->notifier.irq_acked = irqfd_resampler_ack;
357*4882a593Smuzhiyun 			INIT_LIST_HEAD(&resampler->link);
358*4882a593Smuzhiyun 
359*4882a593Smuzhiyun 			list_add(&resampler->link, &kvm->irqfds.resampler_list);
360*4882a593Smuzhiyun 			kvm_register_irq_ack_notifier(kvm,
361*4882a593Smuzhiyun 						      &resampler->notifier);
362*4882a593Smuzhiyun 			irqfd->resampler = resampler;
363*4882a593Smuzhiyun 		}
364*4882a593Smuzhiyun 
365*4882a593Smuzhiyun 		list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
366*4882a593Smuzhiyun 		synchronize_srcu(&kvm->irq_srcu);
367*4882a593Smuzhiyun 
368*4882a593Smuzhiyun 		mutex_unlock(&kvm->irqfds.resampler_lock);
369*4882a593Smuzhiyun 	}
370*4882a593Smuzhiyun 
371*4882a593Smuzhiyun 	/*
372*4882a593Smuzhiyun 	 * Install our own custom wake-up handling so we are notified via
373*4882a593Smuzhiyun 	 * a callback whenever someone signals the underlying eventfd
374*4882a593Smuzhiyun 	 */
375*4882a593Smuzhiyun 	init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
376*4882a593Smuzhiyun 	init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
377*4882a593Smuzhiyun 
378*4882a593Smuzhiyun 	spin_lock_irq(&kvm->irqfds.lock);
379*4882a593Smuzhiyun 
380*4882a593Smuzhiyun 	ret = 0;
381*4882a593Smuzhiyun 	list_for_each_entry(tmp, &kvm->irqfds.items, list) {
382*4882a593Smuzhiyun 		if (irqfd->eventfd != tmp->eventfd)
383*4882a593Smuzhiyun 			continue;
384*4882a593Smuzhiyun 		/* This fd is used for another irq already. */
385*4882a593Smuzhiyun 		ret = -EBUSY;
386*4882a593Smuzhiyun 		spin_unlock_irq(&kvm->irqfds.lock);
387*4882a593Smuzhiyun 		goto fail;
388*4882a593Smuzhiyun 	}
389*4882a593Smuzhiyun 
390*4882a593Smuzhiyun 	idx = srcu_read_lock(&kvm->irq_srcu);
391*4882a593Smuzhiyun 	irqfd_update(kvm, irqfd);
392*4882a593Smuzhiyun 
393*4882a593Smuzhiyun 	list_add_tail(&irqfd->list, &kvm->irqfds.items);
394*4882a593Smuzhiyun 
395*4882a593Smuzhiyun 	spin_unlock_irq(&kvm->irqfds.lock);
396*4882a593Smuzhiyun 
397*4882a593Smuzhiyun 	/*
398*4882a593Smuzhiyun 	 * Check if there was an event already pending on the eventfd
399*4882a593Smuzhiyun 	 * before we registered, and trigger it as if we didn't miss it.
400*4882a593Smuzhiyun 	 */
401*4882a593Smuzhiyun 	events = vfs_poll(f.file, &irqfd->pt);
402*4882a593Smuzhiyun 
403*4882a593Smuzhiyun 	if (events & EPOLLIN)
404*4882a593Smuzhiyun 		schedule_work(&irqfd->inject);
405*4882a593Smuzhiyun 
406*4882a593Smuzhiyun #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
407*4882a593Smuzhiyun 	if (kvm_arch_has_irq_bypass()) {
408*4882a593Smuzhiyun 		irqfd->consumer.token = (void *)irqfd->eventfd;
409*4882a593Smuzhiyun 		irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
410*4882a593Smuzhiyun 		irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
411*4882a593Smuzhiyun 		irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
412*4882a593Smuzhiyun 		irqfd->consumer.start = kvm_arch_irq_bypass_start;
413*4882a593Smuzhiyun 		ret = irq_bypass_register_consumer(&irqfd->consumer);
414*4882a593Smuzhiyun 		if (ret)
415*4882a593Smuzhiyun 			pr_info("irq bypass consumer (token %p) registration fails: %d\n",
416*4882a593Smuzhiyun 				irqfd->consumer.token, ret);
417*4882a593Smuzhiyun 	}
418*4882a593Smuzhiyun #endif
419*4882a593Smuzhiyun 
420*4882a593Smuzhiyun 	srcu_read_unlock(&kvm->irq_srcu, idx);
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun 	/*
423*4882a593Smuzhiyun 	 * do not drop the file until the irqfd is fully initialized, otherwise
424*4882a593Smuzhiyun 	 * we might race against the EPOLLHUP
425*4882a593Smuzhiyun 	 */
426*4882a593Smuzhiyun 	fdput(f);
427*4882a593Smuzhiyun 	return 0;
428*4882a593Smuzhiyun 
429*4882a593Smuzhiyun fail:
430*4882a593Smuzhiyun 	if (irqfd->resampler)
431*4882a593Smuzhiyun 		irqfd_resampler_shutdown(irqfd);
432*4882a593Smuzhiyun 
433*4882a593Smuzhiyun 	if (resamplefd && !IS_ERR(resamplefd))
434*4882a593Smuzhiyun 		eventfd_ctx_put(resamplefd);
435*4882a593Smuzhiyun 
436*4882a593Smuzhiyun 	if (eventfd && !IS_ERR(eventfd))
437*4882a593Smuzhiyun 		eventfd_ctx_put(eventfd);
438*4882a593Smuzhiyun 
439*4882a593Smuzhiyun 	fdput(f);
440*4882a593Smuzhiyun 
441*4882a593Smuzhiyun out:
442*4882a593Smuzhiyun 	kfree(irqfd);
443*4882a593Smuzhiyun 	return ret;
444*4882a593Smuzhiyun }
445*4882a593Smuzhiyun 
kvm_irq_has_notifier(struct kvm * kvm,unsigned irqchip,unsigned pin)446*4882a593Smuzhiyun bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
447*4882a593Smuzhiyun {
448*4882a593Smuzhiyun 	struct kvm_irq_ack_notifier *kian;
449*4882a593Smuzhiyun 	int gsi, idx;
450*4882a593Smuzhiyun 
451*4882a593Smuzhiyun 	idx = srcu_read_lock(&kvm->irq_srcu);
452*4882a593Smuzhiyun 	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
453*4882a593Smuzhiyun 	if (gsi != -1)
454*4882a593Smuzhiyun 		hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
455*4882a593Smuzhiyun 					  link, srcu_read_lock_held(&kvm->irq_srcu))
456*4882a593Smuzhiyun 			if (kian->gsi == gsi) {
457*4882a593Smuzhiyun 				srcu_read_unlock(&kvm->irq_srcu, idx);
458*4882a593Smuzhiyun 				return true;
459*4882a593Smuzhiyun 			}
460*4882a593Smuzhiyun 
461*4882a593Smuzhiyun 	srcu_read_unlock(&kvm->irq_srcu, idx);
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun 	return false;
464*4882a593Smuzhiyun }
465*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
466*4882a593Smuzhiyun 
kvm_notify_acked_gsi(struct kvm * kvm,int gsi)467*4882a593Smuzhiyun void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
468*4882a593Smuzhiyun {
469*4882a593Smuzhiyun 	struct kvm_irq_ack_notifier *kian;
470*4882a593Smuzhiyun 
471*4882a593Smuzhiyun 	hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
472*4882a593Smuzhiyun 				  link, srcu_read_lock_held(&kvm->irq_srcu))
473*4882a593Smuzhiyun 		if (kian->gsi == gsi)
474*4882a593Smuzhiyun 			kian->irq_acked(kian);
475*4882a593Smuzhiyun }
476*4882a593Smuzhiyun 
kvm_notify_acked_irq(struct kvm * kvm,unsigned irqchip,unsigned pin)477*4882a593Smuzhiyun void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
478*4882a593Smuzhiyun {
479*4882a593Smuzhiyun 	int gsi, idx;
480*4882a593Smuzhiyun 
481*4882a593Smuzhiyun 	trace_kvm_ack_irq(irqchip, pin);
482*4882a593Smuzhiyun 
483*4882a593Smuzhiyun 	idx = srcu_read_lock(&kvm->irq_srcu);
484*4882a593Smuzhiyun 	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
485*4882a593Smuzhiyun 	if (gsi != -1)
486*4882a593Smuzhiyun 		kvm_notify_acked_gsi(kvm, gsi);
487*4882a593Smuzhiyun 	srcu_read_unlock(&kvm->irq_srcu, idx);
488*4882a593Smuzhiyun }
489*4882a593Smuzhiyun 
kvm_register_irq_ack_notifier(struct kvm * kvm,struct kvm_irq_ack_notifier * kian)490*4882a593Smuzhiyun void kvm_register_irq_ack_notifier(struct kvm *kvm,
491*4882a593Smuzhiyun 				   struct kvm_irq_ack_notifier *kian)
492*4882a593Smuzhiyun {
493*4882a593Smuzhiyun 	mutex_lock(&kvm->irq_lock);
494*4882a593Smuzhiyun 	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
495*4882a593Smuzhiyun 	mutex_unlock(&kvm->irq_lock);
496*4882a593Smuzhiyun 	kvm_arch_post_irq_ack_notifier_list_update(kvm);
497*4882a593Smuzhiyun }
498*4882a593Smuzhiyun 
kvm_unregister_irq_ack_notifier(struct kvm * kvm,struct kvm_irq_ack_notifier * kian)499*4882a593Smuzhiyun void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
500*4882a593Smuzhiyun 				    struct kvm_irq_ack_notifier *kian)
501*4882a593Smuzhiyun {
502*4882a593Smuzhiyun 	mutex_lock(&kvm->irq_lock);
503*4882a593Smuzhiyun 	hlist_del_init_rcu(&kian->link);
504*4882a593Smuzhiyun 	mutex_unlock(&kvm->irq_lock);
505*4882a593Smuzhiyun 	synchronize_srcu(&kvm->irq_srcu);
506*4882a593Smuzhiyun 	kvm_arch_post_irq_ack_notifier_list_update(kvm);
507*4882a593Smuzhiyun }
508*4882a593Smuzhiyun #endif
509*4882a593Smuzhiyun 
510*4882a593Smuzhiyun void
kvm_eventfd_init(struct kvm * kvm)511*4882a593Smuzhiyun kvm_eventfd_init(struct kvm *kvm)
512*4882a593Smuzhiyun {
513*4882a593Smuzhiyun #ifdef CONFIG_HAVE_KVM_IRQFD
514*4882a593Smuzhiyun 	spin_lock_init(&kvm->irqfds.lock);
515*4882a593Smuzhiyun 	INIT_LIST_HEAD(&kvm->irqfds.items);
516*4882a593Smuzhiyun 	INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
517*4882a593Smuzhiyun 	mutex_init(&kvm->irqfds.resampler_lock);
518*4882a593Smuzhiyun #endif
519*4882a593Smuzhiyun 	INIT_LIST_HEAD(&kvm->ioeventfds);
520*4882a593Smuzhiyun }
521*4882a593Smuzhiyun 
522*4882a593Smuzhiyun #ifdef CONFIG_HAVE_KVM_IRQFD
523*4882a593Smuzhiyun /*
524*4882a593Smuzhiyun  * shutdown any irqfd's that match fd+gsi
525*4882a593Smuzhiyun  */
526*4882a593Smuzhiyun static int
kvm_irqfd_deassign(struct kvm * kvm,struct kvm_irqfd * args)527*4882a593Smuzhiyun kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
528*4882a593Smuzhiyun {
529*4882a593Smuzhiyun 	struct kvm_kernel_irqfd *irqfd, *tmp;
530*4882a593Smuzhiyun 	struct eventfd_ctx *eventfd;
531*4882a593Smuzhiyun 
532*4882a593Smuzhiyun 	eventfd = eventfd_ctx_fdget(args->fd);
533*4882a593Smuzhiyun 	if (IS_ERR(eventfd))
534*4882a593Smuzhiyun 		return PTR_ERR(eventfd);
535*4882a593Smuzhiyun 
536*4882a593Smuzhiyun 	spin_lock_irq(&kvm->irqfds.lock);
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun 	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
539*4882a593Smuzhiyun 		if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
540*4882a593Smuzhiyun 			/*
541*4882a593Smuzhiyun 			 * This clearing of irq_entry.type is needed for when
542*4882a593Smuzhiyun 			 * another thread calls kvm_irq_routing_update before
543*4882a593Smuzhiyun 			 * we flush workqueue below (we synchronize with
544*4882a593Smuzhiyun 			 * kvm_irq_routing_update using irqfds.lock).
545*4882a593Smuzhiyun 			 */
546*4882a593Smuzhiyun 			write_seqcount_begin(&irqfd->irq_entry_sc);
547*4882a593Smuzhiyun 			irqfd->irq_entry.type = 0;
548*4882a593Smuzhiyun 			write_seqcount_end(&irqfd->irq_entry_sc);
549*4882a593Smuzhiyun 			irqfd_deactivate(irqfd);
550*4882a593Smuzhiyun 		}
551*4882a593Smuzhiyun 	}
552*4882a593Smuzhiyun 
553*4882a593Smuzhiyun 	spin_unlock_irq(&kvm->irqfds.lock);
554*4882a593Smuzhiyun 	eventfd_ctx_put(eventfd);
555*4882a593Smuzhiyun 
556*4882a593Smuzhiyun 	/*
557*4882a593Smuzhiyun 	 * Block until we know all outstanding shutdown jobs have completed
558*4882a593Smuzhiyun 	 * so that we guarantee there will not be any more interrupts on this
559*4882a593Smuzhiyun 	 * gsi once this deassign function returns.
560*4882a593Smuzhiyun 	 */
561*4882a593Smuzhiyun 	flush_workqueue(irqfd_cleanup_wq);
562*4882a593Smuzhiyun 
563*4882a593Smuzhiyun 	return 0;
564*4882a593Smuzhiyun }
565*4882a593Smuzhiyun 
566*4882a593Smuzhiyun int
kvm_irqfd(struct kvm * kvm,struct kvm_irqfd * args)567*4882a593Smuzhiyun kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
568*4882a593Smuzhiyun {
569*4882a593Smuzhiyun 	if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
570*4882a593Smuzhiyun 		return -EINVAL;
571*4882a593Smuzhiyun 
572*4882a593Smuzhiyun 	if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
573*4882a593Smuzhiyun 		return kvm_irqfd_deassign(kvm, args);
574*4882a593Smuzhiyun 
575*4882a593Smuzhiyun 	return kvm_irqfd_assign(kvm, args);
576*4882a593Smuzhiyun }
577*4882a593Smuzhiyun 
578*4882a593Smuzhiyun /*
579*4882a593Smuzhiyun  * This function is called as the kvm VM fd is being released. Shutdown all
580*4882a593Smuzhiyun  * irqfds that still remain open
581*4882a593Smuzhiyun  */
582*4882a593Smuzhiyun void
kvm_irqfd_release(struct kvm * kvm)583*4882a593Smuzhiyun kvm_irqfd_release(struct kvm *kvm)
584*4882a593Smuzhiyun {
585*4882a593Smuzhiyun 	struct kvm_kernel_irqfd *irqfd, *tmp;
586*4882a593Smuzhiyun 
587*4882a593Smuzhiyun 	spin_lock_irq(&kvm->irqfds.lock);
588*4882a593Smuzhiyun 
589*4882a593Smuzhiyun 	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
590*4882a593Smuzhiyun 		irqfd_deactivate(irqfd);
591*4882a593Smuzhiyun 
592*4882a593Smuzhiyun 	spin_unlock_irq(&kvm->irqfds.lock);
593*4882a593Smuzhiyun 
594*4882a593Smuzhiyun 	/*
595*4882a593Smuzhiyun 	 * Block until we know all outstanding shutdown jobs have completed
596*4882a593Smuzhiyun 	 * since we do not take a kvm* reference.
597*4882a593Smuzhiyun 	 */
598*4882a593Smuzhiyun 	flush_workqueue(irqfd_cleanup_wq);
599*4882a593Smuzhiyun 
600*4882a593Smuzhiyun }
601*4882a593Smuzhiyun 
602*4882a593Smuzhiyun /*
603*4882a593Smuzhiyun  * Take note of a change in irq routing.
604*4882a593Smuzhiyun  * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
605*4882a593Smuzhiyun  */
kvm_irq_routing_update(struct kvm * kvm)606*4882a593Smuzhiyun void kvm_irq_routing_update(struct kvm *kvm)
607*4882a593Smuzhiyun {
608*4882a593Smuzhiyun 	struct kvm_kernel_irqfd *irqfd;
609*4882a593Smuzhiyun 
610*4882a593Smuzhiyun 	spin_lock_irq(&kvm->irqfds.lock);
611*4882a593Smuzhiyun 
612*4882a593Smuzhiyun 	list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
613*4882a593Smuzhiyun 		irqfd_update(kvm, irqfd);
614*4882a593Smuzhiyun 
615*4882a593Smuzhiyun #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
616*4882a593Smuzhiyun 		if (irqfd->producer) {
617*4882a593Smuzhiyun 			int ret = kvm_arch_update_irqfd_routing(
618*4882a593Smuzhiyun 					irqfd->kvm, irqfd->producer->irq,
619*4882a593Smuzhiyun 					irqfd->gsi, 1);
620*4882a593Smuzhiyun 			WARN_ON(ret);
621*4882a593Smuzhiyun 		}
622*4882a593Smuzhiyun #endif
623*4882a593Smuzhiyun 	}
624*4882a593Smuzhiyun 
625*4882a593Smuzhiyun 	spin_unlock_irq(&kvm->irqfds.lock);
626*4882a593Smuzhiyun }
627*4882a593Smuzhiyun 
628*4882a593Smuzhiyun /*
629*4882a593Smuzhiyun  * create a host-wide workqueue for issuing deferred shutdown requests
630*4882a593Smuzhiyun  * aggregated from all vm* instances. We need our own isolated
631*4882a593Smuzhiyun  * queue to ease flushing work items when a VM exits.
632*4882a593Smuzhiyun  */
kvm_irqfd_init(void)633*4882a593Smuzhiyun int kvm_irqfd_init(void)
634*4882a593Smuzhiyun {
635*4882a593Smuzhiyun 	irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0);
636*4882a593Smuzhiyun 	if (!irqfd_cleanup_wq)
637*4882a593Smuzhiyun 		return -ENOMEM;
638*4882a593Smuzhiyun 
639*4882a593Smuzhiyun 	return 0;
640*4882a593Smuzhiyun }
641*4882a593Smuzhiyun 
kvm_irqfd_exit(void)642*4882a593Smuzhiyun void kvm_irqfd_exit(void)
643*4882a593Smuzhiyun {
644*4882a593Smuzhiyun 	destroy_workqueue(irqfd_cleanup_wq);
645*4882a593Smuzhiyun }
646*4882a593Smuzhiyun #endif
647*4882a593Smuzhiyun 
648*4882a593Smuzhiyun /*
649*4882a593Smuzhiyun  * --------------------------------------------------------------------
650*4882a593Smuzhiyun  * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
651*4882a593Smuzhiyun  *
652*4882a593Smuzhiyun  * userspace can register a PIO/MMIO address with an eventfd for receiving
653*4882a593Smuzhiyun  * notification when the memory has been touched.
654*4882a593Smuzhiyun  * --------------------------------------------------------------------
655*4882a593Smuzhiyun  */
656*4882a593Smuzhiyun 
657*4882a593Smuzhiyun struct _ioeventfd {
658*4882a593Smuzhiyun 	struct list_head     list;
659*4882a593Smuzhiyun 	u64                  addr;
660*4882a593Smuzhiyun 	int                  length;
661*4882a593Smuzhiyun 	struct eventfd_ctx  *eventfd;
662*4882a593Smuzhiyun 	u64                  datamatch;
663*4882a593Smuzhiyun 	struct kvm_io_device dev;
664*4882a593Smuzhiyun 	u8                   bus_idx;
665*4882a593Smuzhiyun 	bool                 wildcard;
666*4882a593Smuzhiyun };
667*4882a593Smuzhiyun 
668*4882a593Smuzhiyun static inline struct _ioeventfd *
to_ioeventfd(struct kvm_io_device * dev)669*4882a593Smuzhiyun to_ioeventfd(struct kvm_io_device *dev)
670*4882a593Smuzhiyun {
671*4882a593Smuzhiyun 	return container_of(dev, struct _ioeventfd, dev);
672*4882a593Smuzhiyun }
673*4882a593Smuzhiyun 
674*4882a593Smuzhiyun static void
ioeventfd_release(struct _ioeventfd * p)675*4882a593Smuzhiyun ioeventfd_release(struct _ioeventfd *p)
676*4882a593Smuzhiyun {
677*4882a593Smuzhiyun 	eventfd_ctx_put(p->eventfd);
678*4882a593Smuzhiyun 	list_del(&p->list);
679*4882a593Smuzhiyun 	kfree(p);
680*4882a593Smuzhiyun }
681*4882a593Smuzhiyun 
682*4882a593Smuzhiyun static bool
ioeventfd_in_range(struct _ioeventfd * p,gpa_t addr,int len,const void * val)683*4882a593Smuzhiyun ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
684*4882a593Smuzhiyun {
685*4882a593Smuzhiyun 	u64 _val;
686*4882a593Smuzhiyun 
687*4882a593Smuzhiyun 	if (addr != p->addr)
688*4882a593Smuzhiyun 		/* address must be precise for a hit */
689*4882a593Smuzhiyun 		return false;
690*4882a593Smuzhiyun 
691*4882a593Smuzhiyun 	if (!p->length)
692*4882a593Smuzhiyun 		/* length = 0 means only look at the address, so always a hit */
693*4882a593Smuzhiyun 		return true;
694*4882a593Smuzhiyun 
695*4882a593Smuzhiyun 	if (len != p->length)
696*4882a593Smuzhiyun 		/* address-range must be precise for a hit */
697*4882a593Smuzhiyun 		return false;
698*4882a593Smuzhiyun 
699*4882a593Smuzhiyun 	if (p->wildcard)
700*4882a593Smuzhiyun 		/* all else equal, wildcard is always a hit */
701*4882a593Smuzhiyun 		return true;
702*4882a593Smuzhiyun 
703*4882a593Smuzhiyun 	/* otherwise, we have to actually compare the data */
704*4882a593Smuzhiyun 
705*4882a593Smuzhiyun 	BUG_ON(!IS_ALIGNED((unsigned long)val, len));
706*4882a593Smuzhiyun 
707*4882a593Smuzhiyun 	switch (len) {
708*4882a593Smuzhiyun 	case 1:
709*4882a593Smuzhiyun 		_val = *(u8 *)val;
710*4882a593Smuzhiyun 		break;
711*4882a593Smuzhiyun 	case 2:
712*4882a593Smuzhiyun 		_val = *(u16 *)val;
713*4882a593Smuzhiyun 		break;
714*4882a593Smuzhiyun 	case 4:
715*4882a593Smuzhiyun 		_val = *(u32 *)val;
716*4882a593Smuzhiyun 		break;
717*4882a593Smuzhiyun 	case 8:
718*4882a593Smuzhiyun 		_val = *(u64 *)val;
719*4882a593Smuzhiyun 		break;
720*4882a593Smuzhiyun 	default:
721*4882a593Smuzhiyun 		return false;
722*4882a593Smuzhiyun 	}
723*4882a593Smuzhiyun 
724*4882a593Smuzhiyun 	return _val == p->datamatch;
725*4882a593Smuzhiyun }
726*4882a593Smuzhiyun 
727*4882a593Smuzhiyun /* MMIO/PIO writes trigger an event if the addr/val match */
728*4882a593Smuzhiyun static int
ioeventfd_write(struct kvm_vcpu * vcpu,struct kvm_io_device * this,gpa_t addr,int len,const void * val)729*4882a593Smuzhiyun ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
730*4882a593Smuzhiyun 		int len, const void *val)
731*4882a593Smuzhiyun {
732*4882a593Smuzhiyun 	struct _ioeventfd *p = to_ioeventfd(this);
733*4882a593Smuzhiyun 
734*4882a593Smuzhiyun 	if (!ioeventfd_in_range(p, addr, len, val))
735*4882a593Smuzhiyun 		return -EOPNOTSUPP;
736*4882a593Smuzhiyun 
737*4882a593Smuzhiyun 	eventfd_signal(p->eventfd, 1);
738*4882a593Smuzhiyun 	return 0;
739*4882a593Smuzhiyun }
740*4882a593Smuzhiyun 
741*4882a593Smuzhiyun /*
742*4882a593Smuzhiyun  * This function is called as KVM is completely shutting down.  We do not
743*4882a593Smuzhiyun  * need to worry about locking just nuke anything we have as quickly as possible
744*4882a593Smuzhiyun  */
745*4882a593Smuzhiyun static void
ioeventfd_destructor(struct kvm_io_device * this)746*4882a593Smuzhiyun ioeventfd_destructor(struct kvm_io_device *this)
747*4882a593Smuzhiyun {
748*4882a593Smuzhiyun 	struct _ioeventfd *p = to_ioeventfd(this);
749*4882a593Smuzhiyun 
750*4882a593Smuzhiyun 	ioeventfd_release(p);
751*4882a593Smuzhiyun }
752*4882a593Smuzhiyun 
753*4882a593Smuzhiyun static const struct kvm_io_device_ops ioeventfd_ops = {
754*4882a593Smuzhiyun 	.write      = ioeventfd_write,
755*4882a593Smuzhiyun 	.destructor = ioeventfd_destructor,
756*4882a593Smuzhiyun };
757*4882a593Smuzhiyun 
758*4882a593Smuzhiyun /* assumes kvm->slots_lock held */
759*4882a593Smuzhiyun static bool
ioeventfd_check_collision(struct kvm * kvm,struct _ioeventfd * p)760*4882a593Smuzhiyun ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
761*4882a593Smuzhiyun {
762*4882a593Smuzhiyun 	struct _ioeventfd *_p;
763*4882a593Smuzhiyun 
764*4882a593Smuzhiyun 	list_for_each_entry(_p, &kvm->ioeventfds, list)
765*4882a593Smuzhiyun 		if (_p->bus_idx == p->bus_idx &&
766*4882a593Smuzhiyun 		    _p->addr == p->addr &&
767*4882a593Smuzhiyun 		    (!_p->length || !p->length ||
768*4882a593Smuzhiyun 		     (_p->length == p->length &&
769*4882a593Smuzhiyun 		      (_p->wildcard || p->wildcard ||
770*4882a593Smuzhiyun 		       _p->datamatch == p->datamatch))))
771*4882a593Smuzhiyun 			return true;
772*4882a593Smuzhiyun 
773*4882a593Smuzhiyun 	return false;
774*4882a593Smuzhiyun }
775*4882a593Smuzhiyun 
ioeventfd_bus_from_flags(__u32 flags)776*4882a593Smuzhiyun static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
777*4882a593Smuzhiyun {
778*4882a593Smuzhiyun 	if (flags & KVM_IOEVENTFD_FLAG_PIO)
779*4882a593Smuzhiyun 		return KVM_PIO_BUS;
780*4882a593Smuzhiyun 	if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
781*4882a593Smuzhiyun 		return KVM_VIRTIO_CCW_NOTIFY_BUS;
782*4882a593Smuzhiyun 	return KVM_MMIO_BUS;
783*4882a593Smuzhiyun }
784*4882a593Smuzhiyun 
kvm_assign_ioeventfd_idx(struct kvm * kvm,enum kvm_bus bus_idx,struct kvm_ioeventfd * args)785*4882a593Smuzhiyun static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
786*4882a593Smuzhiyun 				enum kvm_bus bus_idx,
787*4882a593Smuzhiyun 				struct kvm_ioeventfd *args)
788*4882a593Smuzhiyun {
789*4882a593Smuzhiyun 
790*4882a593Smuzhiyun 	struct eventfd_ctx *eventfd;
791*4882a593Smuzhiyun 	struct _ioeventfd *p;
792*4882a593Smuzhiyun 	int ret;
793*4882a593Smuzhiyun 
794*4882a593Smuzhiyun 	eventfd = eventfd_ctx_fdget(args->fd);
795*4882a593Smuzhiyun 	if (IS_ERR(eventfd))
796*4882a593Smuzhiyun 		return PTR_ERR(eventfd);
797*4882a593Smuzhiyun 
798*4882a593Smuzhiyun 	p = kzalloc(sizeof(*p), GFP_KERNEL_ACCOUNT);
799*4882a593Smuzhiyun 	if (!p) {
800*4882a593Smuzhiyun 		ret = -ENOMEM;
801*4882a593Smuzhiyun 		goto fail;
802*4882a593Smuzhiyun 	}
803*4882a593Smuzhiyun 
804*4882a593Smuzhiyun 	INIT_LIST_HEAD(&p->list);
805*4882a593Smuzhiyun 	p->addr    = args->addr;
806*4882a593Smuzhiyun 	p->bus_idx = bus_idx;
807*4882a593Smuzhiyun 	p->length  = args->len;
808*4882a593Smuzhiyun 	p->eventfd = eventfd;
809*4882a593Smuzhiyun 
810*4882a593Smuzhiyun 	/* The datamatch feature is optional, otherwise this is a wildcard */
811*4882a593Smuzhiyun 	if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
812*4882a593Smuzhiyun 		p->datamatch = args->datamatch;
813*4882a593Smuzhiyun 	else
814*4882a593Smuzhiyun 		p->wildcard = true;
815*4882a593Smuzhiyun 
816*4882a593Smuzhiyun 	mutex_lock(&kvm->slots_lock);
817*4882a593Smuzhiyun 
818*4882a593Smuzhiyun 	/* Verify that there isn't a match already */
819*4882a593Smuzhiyun 	if (ioeventfd_check_collision(kvm, p)) {
820*4882a593Smuzhiyun 		ret = -EEXIST;
821*4882a593Smuzhiyun 		goto unlock_fail;
822*4882a593Smuzhiyun 	}
823*4882a593Smuzhiyun 
824*4882a593Smuzhiyun 	kvm_iodevice_init(&p->dev, &ioeventfd_ops);
825*4882a593Smuzhiyun 
826*4882a593Smuzhiyun 	ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
827*4882a593Smuzhiyun 				      &p->dev);
828*4882a593Smuzhiyun 	if (ret < 0)
829*4882a593Smuzhiyun 		goto unlock_fail;
830*4882a593Smuzhiyun 
831*4882a593Smuzhiyun 	kvm_get_bus(kvm, bus_idx)->ioeventfd_count++;
832*4882a593Smuzhiyun 	list_add_tail(&p->list, &kvm->ioeventfds);
833*4882a593Smuzhiyun 
834*4882a593Smuzhiyun 	mutex_unlock(&kvm->slots_lock);
835*4882a593Smuzhiyun 
836*4882a593Smuzhiyun 	return 0;
837*4882a593Smuzhiyun 
838*4882a593Smuzhiyun unlock_fail:
839*4882a593Smuzhiyun 	mutex_unlock(&kvm->slots_lock);
840*4882a593Smuzhiyun 
841*4882a593Smuzhiyun fail:
842*4882a593Smuzhiyun 	kfree(p);
843*4882a593Smuzhiyun 	eventfd_ctx_put(eventfd);
844*4882a593Smuzhiyun 
845*4882a593Smuzhiyun 	return ret;
846*4882a593Smuzhiyun }
847*4882a593Smuzhiyun 
848*4882a593Smuzhiyun static int
kvm_deassign_ioeventfd_idx(struct kvm * kvm,enum kvm_bus bus_idx,struct kvm_ioeventfd * args)849*4882a593Smuzhiyun kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
850*4882a593Smuzhiyun 			   struct kvm_ioeventfd *args)
851*4882a593Smuzhiyun {
852*4882a593Smuzhiyun 	struct _ioeventfd        *p, *tmp;
853*4882a593Smuzhiyun 	struct eventfd_ctx       *eventfd;
854*4882a593Smuzhiyun 	struct kvm_io_bus	 *bus;
855*4882a593Smuzhiyun 	int                       ret = -ENOENT;
856*4882a593Smuzhiyun 	bool                      wildcard;
857*4882a593Smuzhiyun 
858*4882a593Smuzhiyun 	eventfd = eventfd_ctx_fdget(args->fd);
859*4882a593Smuzhiyun 	if (IS_ERR(eventfd))
860*4882a593Smuzhiyun 		return PTR_ERR(eventfd);
861*4882a593Smuzhiyun 
862*4882a593Smuzhiyun 	wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
863*4882a593Smuzhiyun 
864*4882a593Smuzhiyun 	mutex_lock(&kvm->slots_lock);
865*4882a593Smuzhiyun 
866*4882a593Smuzhiyun 	list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
867*4882a593Smuzhiyun 
868*4882a593Smuzhiyun 		if (p->bus_idx != bus_idx ||
869*4882a593Smuzhiyun 		    p->eventfd != eventfd  ||
870*4882a593Smuzhiyun 		    p->addr != args->addr  ||
871*4882a593Smuzhiyun 		    p->length != args->len ||
872*4882a593Smuzhiyun 		    p->wildcard != wildcard)
873*4882a593Smuzhiyun 			continue;
874*4882a593Smuzhiyun 
875*4882a593Smuzhiyun 		if (!p->wildcard && p->datamatch != args->datamatch)
876*4882a593Smuzhiyun 			continue;
877*4882a593Smuzhiyun 
878*4882a593Smuzhiyun 		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
879*4882a593Smuzhiyun 		bus = kvm_get_bus(kvm, bus_idx);
880*4882a593Smuzhiyun 		if (bus)
881*4882a593Smuzhiyun 			bus->ioeventfd_count--;
882*4882a593Smuzhiyun 		ioeventfd_release(p);
883*4882a593Smuzhiyun 		ret = 0;
884*4882a593Smuzhiyun 		break;
885*4882a593Smuzhiyun 	}
886*4882a593Smuzhiyun 
887*4882a593Smuzhiyun 	mutex_unlock(&kvm->slots_lock);
888*4882a593Smuzhiyun 
889*4882a593Smuzhiyun 	eventfd_ctx_put(eventfd);
890*4882a593Smuzhiyun 
891*4882a593Smuzhiyun 	return ret;
892*4882a593Smuzhiyun }
893*4882a593Smuzhiyun 
kvm_deassign_ioeventfd(struct kvm * kvm,struct kvm_ioeventfd * args)894*4882a593Smuzhiyun static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
895*4882a593Smuzhiyun {
896*4882a593Smuzhiyun 	enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
897*4882a593Smuzhiyun 	int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
898*4882a593Smuzhiyun 
899*4882a593Smuzhiyun 	if (!args->len && bus_idx == KVM_MMIO_BUS)
900*4882a593Smuzhiyun 		kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
901*4882a593Smuzhiyun 
902*4882a593Smuzhiyun 	return ret;
903*4882a593Smuzhiyun }
904*4882a593Smuzhiyun 
905*4882a593Smuzhiyun static int
kvm_assign_ioeventfd(struct kvm * kvm,struct kvm_ioeventfd * args)906*4882a593Smuzhiyun kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
907*4882a593Smuzhiyun {
908*4882a593Smuzhiyun 	enum kvm_bus              bus_idx;
909*4882a593Smuzhiyun 	int ret;
910*4882a593Smuzhiyun 
911*4882a593Smuzhiyun 	bus_idx = ioeventfd_bus_from_flags(args->flags);
912*4882a593Smuzhiyun 	/* must be natural-word sized, or 0 to ignore length */
913*4882a593Smuzhiyun 	switch (args->len) {
914*4882a593Smuzhiyun 	case 0:
915*4882a593Smuzhiyun 	case 1:
916*4882a593Smuzhiyun 	case 2:
917*4882a593Smuzhiyun 	case 4:
918*4882a593Smuzhiyun 	case 8:
919*4882a593Smuzhiyun 		break;
920*4882a593Smuzhiyun 	default:
921*4882a593Smuzhiyun 		return -EINVAL;
922*4882a593Smuzhiyun 	}
923*4882a593Smuzhiyun 
924*4882a593Smuzhiyun 	/* check for range overflow */
925*4882a593Smuzhiyun 	if (args->addr + args->len < args->addr)
926*4882a593Smuzhiyun 		return -EINVAL;
927*4882a593Smuzhiyun 
928*4882a593Smuzhiyun 	/* check for extra flags that we don't understand */
929*4882a593Smuzhiyun 	if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
930*4882a593Smuzhiyun 		return -EINVAL;
931*4882a593Smuzhiyun 
932*4882a593Smuzhiyun 	/* ioeventfd with no length can't be combined with DATAMATCH */
933*4882a593Smuzhiyun 	if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
934*4882a593Smuzhiyun 		return -EINVAL;
935*4882a593Smuzhiyun 
936*4882a593Smuzhiyun 	ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
937*4882a593Smuzhiyun 	if (ret)
938*4882a593Smuzhiyun 		goto fail;
939*4882a593Smuzhiyun 
940*4882a593Smuzhiyun 	/* When length is ignored, MMIO is also put on a separate bus, for
941*4882a593Smuzhiyun 	 * faster lookups.
942*4882a593Smuzhiyun 	 */
943*4882a593Smuzhiyun 	if (!args->len && bus_idx == KVM_MMIO_BUS) {
944*4882a593Smuzhiyun 		ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
945*4882a593Smuzhiyun 		if (ret < 0)
946*4882a593Smuzhiyun 			goto fast_fail;
947*4882a593Smuzhiyun 	}
948*4882a593Smuzhiyun 
949*4882a593Smuzhiyun 	return 0;
950*4882a593Smuzhiyun 
951*4882a593Smuzhiyun fast_fail:
952*4882a593Smuzhiyun 	kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
953*4882a593Smuzhiyun fail:
954*4882a593Smuzhiyun 	return ret;
955*4882a593Smuzhiyun }
956*4882a593Smuzhiyun 
957*4882a593Smuzhiyun int
kvm_ioeventfd(struct kvm * kvm,struct kvm_ioeventfd * args)958*4882a593Smuzhiyun kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
959*4882a593Smuzhiyun {
960*4882a593Smuzhiyun 	if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
961*4882a593Smuzhiyun 		return kvm_deassign_ioeventfd(kvm, args);
962*4882a593Smuzhiyun 
963*4882a593Smuzhiyun 	return kvm_assign_ioeventfd(kvm, args);
964*4882a593Smuzhiyun }
965