1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Kernel-based Virtual Machine driver for Linux
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * AMD SVM support
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * Copyright (C) 2006 Qumranet, Inc.
8*4882a593Smuzhiyun * Copyright 2010 Red Hat, Inc. and/or its affiliates.
9*4882a593Smuzhiyun *
10*4882a593Smuzhiyun * Authors:
11*4882a593Smuzhiyun * Yaniv Kamay <yaniv@qumranet.com>
12*4882a593Smuzhiyun * Avi Kivity <avi@qumranet.com>
13*4882a593Smuzhiyun */
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun #define pr_fmt(fmt) "SVM: " fmt
16*4882a593Smuzhiyun
17*4882a593Smuzhiyun #include <linux/kvm_types.h>
18*4882a593Smuzhiyun #include <linux/hashtable.h>
19*4882a593Smuzhiyun #include <linux/amd-iommu.h>
20*4882a593Smuzhiyun #include <linux/kvm_host.h>
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun #include <asm/irq_remapping.h>
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun #include "trace.h"
25*4882a593Smuzhiyun #include "lapic.h"
26*4882a593Smuzhiyun #include "x86.h"
27*4882a593Smuzhiyun #include "irq.h"
28*4882a593Smuzhiyun #include "svm.h"
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun /* enable / disable AVIC */
31*4882a593Smuzhiyun int avic;
32*4882a593Smuzhiyun #ifdef CONFIG_X86_LOCAL_APIC
33*4882a593Smuzhiyun module_param(avic, int, S_IRUGO);
34*4882a593Smuzhiyun #endif
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun #define SVM_AVIC_DOORBELL 0xc001011b
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun #define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun /*
41*4882a593Smuzhiyun * 0xff is broadcast, so the max index allowed for physical APIC ID
42*4882a593Smuzhiyun * table is 0xfe. APIC IDs above 0xff are reserved.
43*4882a593Smuzhiyun */
44*4882a593Smuzhiyun #define AVIC_MAX_PHYSICAL_ID_COUNT 255
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun #define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
47*4882a593Smuzhiyun #define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
48*4882a593Smuzhiyun #define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun /* AVIC GATAG is encoded using VM and VCPU IDs */
51*4882a593Smuzhiyun #define AVIC_VCPU_ID_BITS 8
52*4882a593Smuzhiyun #define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun #define AVIC_VM_ID_BITS 24
55*4882a593Smuzhiyun #define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS)
56*4882a593Smuzhiyun #define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1)
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun #define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
59*4882a593Smuzhiyun (y & AVIC_VCPU_ID_MASK))
60*4882a593Smuzhiyun #define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
61*4882a593Smuzhiyun #define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun /* Note:
64*4882a593Smuzhiyun * This hash table is used to map VM_ID to a struct kvm_svm,
65*4882a593Smuzhiyun * when handling AMD IOMMU GALOG notification to schedule in
66*4882a593Smuzhiyun * a particular vCPU.
67*4882a593Smuzhiyun */
68*4882a593Smuzhiyun #define SVM_VM_DATA_HASH_BITS 8
69*4882a593Smuzhiyun static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
70*4882a593Smuzhiyun static u32 next_vm_id = 0;
71*4882a593Smuzhiyun static bool next_vm_id_wrapped = 0;
72*4882a593Smuzhiyun static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun /*
75*4882a593Smuzhiyun * This is a wrapper of struct amd_iommu_ir_data.
76*4882a593Smuzhiyun */
77*4882a593Smuzhiyun struct amd_svm_iommu_ir {
78*4882a593Smuzhiyun struct list_head node; /* Used by SVM for per-vcpu ir_list */
79*4882a593Smuzhiyun void *data; /* Storing pointer to struct amd_ir_data */
80*4882a593Smuzhiyun };
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun enum avic_ipi_failure_cause {
83*4882a593Smuzhiyun AVIC_IPI_FAILURE_INVALID_INT_TYPE,
84*4882a593Smuzhiyun AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
85*4882a593Smuzhiyun AVIC_IPI_FAILURE_INVALID_TARGET,
86*4882a593Smuzhiyun AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
87*4882a593Smuzhiyun };
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun /* Note:
90*4882a593Smuzhiyun * This function is called from IOMMU driver to notify
91*4882a593Smuzhiyun * SVM to schedule in a particular vCPU of a particular VM.
92*4882a593Smuzhiyun */
avic_ga_log_notifier(u32 ga_tag)93*4882a593Smuzhiyun int avic_ga_log_notifier(u32 ga_tag)
94*4882a593Smuzhiyun {
95*4882a593Smuzhiyun unsigned long flags;
96*4882a593Smuzhiyun struct kvm_svm *kvm_svm;
97*4882a593Smuzhiyun struct kvm_vcpu *vcpu = NULL;
98*4882a593Smuzhiyun u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
99*4882a593Smuzhiyun u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
102*4882a593Smuzhiyun trace_kvm_avic_ga_log(vm_id, vcpu_id);
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
105*4882a593Smuzhiyun hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
106*4882a593Smuzhiyun if (kvm_svm->avic_vm_id != vm_id)
107*4882a593Smuzhiyun continue;
108*4882a593Smuzhiyun vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
109*4882a593Smuzhiyun break;
110*4882a593Smuzhiyun }
111*4882a593Smuzhiyun spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun /* Note:
114*4882a593Smuzhiyun * At this point, the IOMMU should have already set the pending
115*4882a593Smuzhiyun * bit in the vAPIC backing page. So, we just need to schedule
116*4882a593Smuzhiyun * in the vcpu.
117*4882a593Smuzhiyun */
118*4882a593Smuzhiyun if (vcpu)
119*4882a593Smuzhiyun kvm_vcpu_wake_up(vcpu);
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun return 0;
122*4882a593Smuzhiyun }
123*4882a593Smuzhiyun
avic_vm_destroy(struct kvm * kvm)124*4882a593Smuzhiyun void avic_vm_destroy(struct kvm *kvm)
125*4882a593Smuzhiyun {
126*4882a593Smuzhiyun unsigned long flags;
127*4882a593Smuzhiyun struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun if (!avic)
130*4882a593Smuzhiyun return;
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun if (kvm_svm->avic_logical_id_table_page)
133*4882a593Smuzhiyun __free_page(kvm_svm->avic_logical_id_table_page);
134*4882a593Smuzhiyun if (kvm_svm->avic_physical_id_table_page)
135*4882a593Smuzhiyun __free_page(kvm_svm->avic_physical_id_table_page);
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
138*4882a593Smuzhiyun hash_del(&kvm_svm->hnode);
139*4882a593Smuzhiyun spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
140*4882a593Smuzhiyun }
141*4882a593Smuzhiyun
avic_vm_init(struct kvm * kvm)142*4882a593Smuzhiyun int avic_vm_init(struct kvm *kvm)
143*4882a593Smuzhiyun {
144*4882a593Smuzhiyun unsigned long flags;
145*4882a593Smuzhiyun int err = -ENOMEM;
146*4882a593Smuzhiyun struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
147*4882a593Smuzhiyun struct kvm_svm *k2;
148*4882a593Smuzhiyun struct page *p_page;
149*4882a593Smuzhiyun struct page *l_page;
150*4882a593Smuzhiyun u32 vm_id;
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun if (!avic)
153*4882a593Smuzhiyun return 0;
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun /* Allocating physical APIC ID table (4KB) */
156*4882a593Smuzhiyun p_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
157*4882a593Smuzhiyun if (!p_page)
158*4882a593Smuzhiyun goto free_avic;
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun kvm_svm->avic_physical_id_table_page = p_page;
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun /* Allocating logical APIC ID table (4KB) */
163*4882a593Smuzhiyun l_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
164*4882a593Smuzhiyun if (!l_page)
165*4882a593Smuzhiyun goto free_avic;
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun kvm_svm->avic_logical_id_table_page = l_page;
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
170*4882a593Smuzhiyun again:
171*4882a593Smuzhiyun vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
172*4882a593Smuzhiyun if (vm_id == 0) { /* id is 1-based, zero is not okay */
173*4882a593Smuzhiyun next_vm_id_wrapped = 1;
174*4882a593Smuzhiyun goto again;
175*4882a593Smuzhiyun }
176*4882a593Smuzhiyun /* Is it still in use? Only possible if wrapped at least once */
177*4882a593Smuzhiyun if (next_vm_id_wrapped) {
178*4882a593Smuzhiyun hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
179*4882a593Smuzhiyun if (k2->avic_vm_id == vm_id)
180*4882a593Smuzhiyun goto again;
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun }
183*4882a593Smuzhiyun kvm_svm->avic_vm_id = vm_id;
184*4882a593Smuzhiyun hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
185*4882a593Smuzhiyun spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun return 0;
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun free_avic:
190*4882a593Smuzhiyun avic_vm_destroy(kvm);
191*4882a593Smuzhiyun return err;
192*4882a593Smuzhiyun }
193*4882a593Smuzhiyun
avic_init_vmcb(struct vcpu_svm * svm)194*4882a593Smuzhiyun void avic_init_vmcb(struct vcpu_svm *svm)
195*4882a593Smuzhiyun {
196*4882a593Smuzhiyun struct vmcb *vmcb = svm->vmcb;
197*4882a593Smuzhiyun struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
198*4882a593Smuzhiyun phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
199*4882a593Smuzhiyun phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
200*4882a593Smuzhiyun phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
203*4882a593Smuzhiyun vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
204*4882a593Smuzhiyun vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
205*4882a593Smuzhiyun vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
206*4882a593Smuzhiyun if (kvm_apicv_activated(svm->vcpu.kvm))
207*4882a593Smuzhiyun vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
208*4882a593Smuzhiyun else
209*4882a593Smuzhiyun vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
210*4882a593Smuzhiyun }
211*4882a593Smuzhiyun
avic_get_physical_id_entry(struct kvm_vcpu * vcpu,unsigned int index)212*4882a593Smuzhiyun static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
213*4882a593Smuzhiyun unsigned int index)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun u64 *avic_physical_id_table;
216*4882a593Smuzhiyun struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
219*4882a593Smuzhiyun return NULL;
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun return &avic_physical_id_table[index];
224*4882a593Smuzhiyun }
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun /**
227*4882a593Smuzhiyun * Note:
228*4882a593Smuzhiyun * AVIC hardware walks the nested page table to check permissions,
229*4882a593Smuzhiyun * but does not use the SPA address specified in the leaf page
230*4882a593Smuzhiyun * table entry since it uses address in the AVIC_BACKING_PAGE pointer
231*4882a593Smuzhiyun * field of the VMCB. Therefore, we set up the
232*4882a593Smuzhiyun * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
233*4882a593Smuzhiyun */
avic_update_access_page(struct kvm * kvm,bool activate)234*4882a593Smuzhiyun static int avic_update_access_page(struct kvm *kvm, bool activate)
235*4882a593Smuzhiyun {
236*4882a593Smuzhiyun int ret = 0;
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun mutex_lock(&kvm->slots_lock);
239*4882a593Smuzhiyun /*
240*4882a593Smuzhiyun * During kvm_destroy_vm(), kvm_pit_set_reinject() could trigger
241*4882a593Smuzhiyun * APICv mode change, which update APIC_ACCESS_PAGE_PRIVATE_MEMSLOT
242*4882a593Smuzhiyun * memory region. So, we need to ensure that kvm->mm == current->mm.
243*4882a593Smuzhiyun */
244*4882a593Smuzhiyun if ((kvm->arch.apic_access_page_done == activate) ||
245*4882a593Smuzhiyun (kvm->mm != current->mm))
246*4882a593Smuzhiyun goto out;
247*4882a593Smuzhiyun
248*4882a593Smuzhiyun ret = __x86_set_memory_region(kvm,
249*4882a593Smuzhiyun APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
250*4882a593Smuzhiyun APIC_DEFAULT_PHYS_BASE,
251*4882a593Smuzhiyun activate ? PAGE_SIZE : 0);
252*4882a593Smuzhiyun if (ret)
253*4882a593Smuzhiyun goto out;
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun kvm->arch.apic_access_page_done = activate;
256*4882a593Smuzhiyun out:
257*4882a593Smuzhiyun mutex_unlock(&kvm->slots_lock);
258*4882a593Smuzhiyun return ret;
259*4882a593Smuzhiyun }
260*4882a593Smuzhiyun
avic_init_backing_page(struct kvm_vcpu * vcpu)261*4882a593Smuzhiyun static int avic_init_backing_page(struct kvm_vcpu *vcpu)
262*4882a593Smuzhiyun {
263*4882a593Smuzhiyun u64 *entry, new_entry;
264*4882a593Smuzhiyun int id = vcpu->vcpu_id;
265*4882a593Smuzhiyun struct vcpu_svm *svm = to_svm(vcpu);
266*4882a593Smuzhiyun
267*4882a593Smuzhiyun if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
268*4882a593Smuzhiyun return -EINVAL;
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun if (!svm->vcpu.arch.apic->regs)
271*4882a593Smuzhiyun return -EINVAL;
272*4882a593Smuzhiyun
273*4882a593Smuzhiyun if (kvm_apicv_activated(vcpu->kvm)) {
274*4882a593Smuzhiyun int ret;
275*4882a593Smuzhiyun
276*4882a593Smuzhiyun ret = avic_update_access_page(vcpu->kvm, true);
277*4882a593Smuzhiyun if (ret)
278*4882a593Smuzhiyun return ret;
279*4882a593Smuzhiyun }
280*4882a593Smuzhiyun
281*4882a593Smuzhiyun svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun /* Setting AVIC backing page address in the phy APIC ID table */
284*4882a593Smuzhiyun entry = avic_get_physical_id_entry(vcpu, id);
285*4882a593Smuzhiyun if (!entry)
286*4882a593Smuzhiyun return -EINVAL;
287*4882a593Smuzhiyun
288*4882a593Smuzhiyun new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
289*4882a593Smuzhiyun AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
290*4882a593Smuzhiyun AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
291*4882a593Smuzhiyun WRITE_ONCE(*entry, new_entry);
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun svm->avic_physical_id_cache = entry;
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun return 0;
296*4882a593Smuzhiyun }
297*4882a593Smuzhiyun
avic_incomplete_ipi_interception(struct vcpu_svm * svm)298*4882a593Smuzhiyun int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
299*4882a593Smuzhiyun {
300*4882a593Smuzhiyun u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
301*4882a593Smuzhiyun u32 icrl = svm->vmcb->control.exit_info_1;
302*4882a593Smuzhiyun u32 id = svm->vmcb->control.exit_info_2 >> 32;
303*4882a593Smuzhiyun u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
304*4882a593Smuzhiyun struct kvm_lapic *apic = svm->vcpu.arch.apic;
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
307*4882a593Smuzhiyun
308*4882a593Smuzhiyun switch (id) {
309*4882a593Smuzhiyun case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
310*4882a593Smuzhiyun /*
311*4882a593Smuzhiyun * AVIC hardware handles the generation of
312*4882a593Smuzhiyun * IPIs when the specified Message Type is Fixed
313*4882a593Smuzhiyun * (also known as fixed delivery mode) and
314*4882a593Smuzhiyun * the Trigger Mode is edge-triggered. The hardware
315*4882a593Smuzhiyun * also supports self and broadcast delivery modes
316*4882a593Smuzhiyun * specified via the Destination Shorthand(DSH)
317*4882a593Smuzhiyun * field of the ICRL. Logical and physical APIC ID
318*4882a593Smuzhiyun * formats are supported. All other IPI types cause
319*4882a593Smuzhiyun * a #VMEXIT, which needs to emulated.
320*4882a593Smuzhiyun */
321*4882a593Smuzhiyun kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
322*4882a593Smuzhiyun kvm_lapic_reg_write(apic, APIC_ICR, icrl);
323*4882a593Smuzhiyun break;
324*4882a593Smuzhiyun case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
325*4882a593Smuzhiyun int i;
326*4882a593Smuzhiyun struct kvm_vcpu *vcpu;
327*4882a593Smuzhiyun struct kvm *kvm = svm->vcpu.kvm;
328*4882a593Smuzhiyun struct kvm_lapic *apic = svm->vcpu.arch.apic;
329*4882a593Smuzhiyun
330*4882a593Smuzhiyun /*
331*4882a593Smuzhiyun * At this point, we expect that the AVIC HW has already
332*4882a593Smuzhiyun * set the appropriate IRR bits on the valid target
333*4882a593Smuzhiyun * vcpus. So, we just need to kick the appropriate vcpu.
334*4882a593Smuzhiyun */
335*4882a593Smuzhiyun kvm_for_each_vcpu(i, vcpu, kvm) {
336*4882a593Smuzhiyun bool m = kvm_apic_match_dest(vcpu, apic,
337*4882a593Smuzhiyun icrl & APIC_SHORT_MASK,
338*4882a593Smuzhiyun GET_APIC_DEST_FIELD(icrh),
339*4882a593Smuzhiyun icrl & APIC_DEST_MASK);
340*4882a593Smuzhiyun
341*4882a593Smuzhiyun if (m && !avic_vcpu_is_running(vcpu))
342*4882a593Smuzhiyun kvm_vcpu_wake_up(vcpu);
343*4882a593Smuzhiyun }
344*4882a593Smuzhiyun break;
345*4882a593Smuzhiyun }
346*4882a593Smuzhiyun case AVIC_IPI_FAILURE_INVALID_TARGET:
347*4882a593Smuzhiyun break;
348*4882a593Smuzhiyun case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
349*4882a593Smuzhiyun WARN_ONCE(1, "Invalid backing page\n");
350*4882a593Smuzhiyun break;
351*4882a593Smuzhiyun default:
352*4882a593Smuzhiyun pr_err("Unknown IPI interception\n");
353*4882a593Smuzhiyun }
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun return 1;
356*4882a593Smuzhiyun }
357*4882a593Smuzhiyun
avic_get_logical_id_entry(struct kvm_vcpu * vcpu,u32 ldr,bool flat)358*4882a593Smuzhiyun static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
359*4882a593Smuzhiyun {
360*4882a593Smuzhiyun struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
361*4882a593Smuzhiyun int index;
362*4882a593Smuzhiyun u32 *logical_apic_id_table;
363*4882a593Smuzhiyun int dlid = GET_APIC_LOGICAL_ID(ldr);
364*4882a593Smuzhiyun
365*4882a593Smuzhiyun if (!dlid)
366*4882a593Smuzhiyun return NULL;
367*4882a593Smuzhiyun
368*4882a593Smuzhiyun if (flat) { /* flat */
369*4882a593Smuzhiyun index = ffs(dlid) - 1;
370*4882a593Smuzhiyun if (index > 7)
371*4882a593Smuzhiyun return NULL;
372*4882a593Smuzhiyun } else { /* cluster */
373*4882a593Smuzhiyun int cluster = (dlid & 0xf0) >> 4;
374*4882a593Smuzhiyun int apic = ffs(dlid & 0x0f) - 1;
375*4882a593Smuzhiyun
376*4882a593Smuzhiyun if ((apic < 0) || (apic > 7) ||
377*4882a593Smuzhiyun (cluster >= 0xf))
378*4882a593Smuzhiyun return NULL;
379*4882a593Smuzhiyun index = (cluster << 2) + apic;
380*4882a593Smuzhiyun }
381*4882a593Smuzhiyun
382*4882a593Smuzhiyun logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
383*4882a593Smuzhiyun
384*4882a593Smuzhiyun return &logical_apic_id_table[index];
385*4882a593Smuzhiyun }
386*4882a593Smuzhiyun
avic_ldr_write(struct kvm_vcpu * vcpu,u8 g_physical_id,u32 ldr)387*4882a593Smuzhiyun static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
388*4882a593Smuzhiyun {
389*4882a593Smuzhiyun bool flat;
390*4882a593Smuzhiyun u32 *entry, new_entry;
391*4882a593Smuzhiyun
392*4882a593Smuzhiyun flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
393*4882a593Smuzhiyun entry = avic_get_logical_id_entry(vcpu, ldr, flat);
394*4882a593Smuzhiyun if (!entry)
395*4882a593Smuzhiyun return -EINVAL;
396*4882a593Smuzhiyun
397*4882a593Smuzhiyun new_entry = READ_ONCE(*entry);
398*4882a593Smuzhiyun new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
399*4882a593Smuzhiyun new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
400*4882a593Smuzhiyun new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
401*4882a593Smuzhiyun WRITE_ONCE(*entry, new_entry);
402*4882a593Smuzhiyun
403*4882a593Smuzhiyun return 0;
404*4882a593Smuzhiyun }
405*4882a593Smuzhiyun
avic_invalidate_logical_id_entry(struct kvm_vcpu * vcpu)406*4882a593Smuzhiyun static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
407*4882a593Smuzhiyun {
408*4882a593Smuzhiyun struct vcpu_svm *svm = to_svm(vcpu);
409*4882a593Smuzhiyun bool flat = svm->dfr_reg == APIC_DFR_FLAT;
410*4882a593Smuzhiyun u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
411*4882a593Smuzhiyun
412*4882a593Smuzhiyun if (entry)
413*4882a593Smuzhiyun clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
414*4882a593Smuzhiyun }
415*4882a593Smuzhiyun
avic_handle_ldr_update(struct kvm_vcpu * vcpu)416*4882a593Smuzhiyun static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
417*4882a593Smuzhiyun {
418*4882a593Smuzhiyun int ret = 0;
419*4882a593Smuzhiyun struct vcpu_svm *svm = to_svm(vcpu);
420*4882a593Smuzhiyun u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
421*4882a593Smuzhiyun u32 id = kvm_xapic_id(vcpu->arch.apic);
422*4882a593Smuzhiyun
423*4882a593Smuzhiyun if (ldr == svm->ldr_reg)
424*4882a593Smuzhiyun return 0;
425*4882a593Smuzhiyun
426*4882a593Smuzhiyun avic_invalidate_logical_id_entry(vcpu);
427*4882a593Smuzhiyun
428*4882a593Smuzhiyun if (ldr)
429*4882a593Smuzhiyun ret = avic_ldr_write(vcpu, id, ldr);
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun if (!ret)
432*4882a593Smuzhiyun svm->ldr_reg = ldr;
433*4882a593Smuzhiyun
434*4882a593Smuzhiyun return ret;
435*4882a593Smuzhiyun }
436*4882a593Smuzhiyun
avic_handle_apic_id_update(struct kvm_vcpu * vcpu)437*4882a593Smuzhiyun static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
438*4882a593Smuzhiyun {
439*4882a593Smuzhiyun u64 *old, *new;
440*4882a593Smuzhiyun struct vcpu_svm *svm = to_svm(vcpu);
441*4882a593Smuzhiyun u32 id = kvm_xapic_id(vcpu->arch.apic);
442*4882a593Smuzhiyun
443*4882a593Smuzhiyun if (vcpu->vcpu_id == id)
444*4882a593Smuzhiyun return 0;
445*4882a593Smuzhiyun
446*4882a593Smuzhiyun old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
447*4882a593Smuzhiyun new = avic_get_physical_id_entry(vcpu, id);
448*4882a593Smuzhiyun if (!new || !old)
449*4882a593Smuzhiyun return 1;
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun /* We need to move physical_id_entry to new offset */
452*4882a593Smuzhiyun *new = *old;
453*4882a593Smuzhiyun *old = 0ULL;
454*4882a593Smuzhiyun to_svm(vcpu)->avic_physical_id_cache = new;
455*4882a593Smuzhiyun
456*4882a593Smuzhiyun /*
457*4882a593Smuzhiyun * Also update the guest physical APIC ID in the logical
458*4882a593Smuzhiyun * APIC ID table entry if already setup the LDR.
459*4882a593Smuzhiyun */
460*4882a593Smuzhiyun if (svm->ldr_reg)
461*4882a593Smuzhiyun avic_handle_ldr_update(vcpu);
462*4882a593Smuzhiyun
463*4882a593Smuzhiyun return 0;
464*4882a593Smuzhiyun }
465*4882a593Smuzhiyun
avic_handle_dfr_update(struct kvm_vcpu * vcpu)466*4882a593Smuzhiyun static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
467*4882a593Smuzhiyun {
468*4882a593Smuzhiyun struct vcpu_svm *svm = to_svm(vcpu);
469*4882a593Smuzhiyun u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
470*4882a593Smuzhiyun
471*4882a593Smuzhiyun if (svm->dfr_reg == dfr)
472*4882a593Smuzhiyun return;
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun avic_invalidate_logical_id_entry(vcpu);
475*4882a593Smuzhiyun svm->dfr_reg = dfr;
476*4882a593Smuzhiyun }
477*4882a593Smuzhiyun
avic_unaccel_trap_write(struct vcpu_svm * svm)478*4882a593Smuzhiyun static int avic_unaccel_trap_write(struct vcpu_svm *svm)
479*4882a593Smuzhiyun {
480*4882a593Smuzhiyun struct kvm_lapic *apic = svm->vcpu.arch.apic;
481*4882a593Smuzhiyun u32 offset = svm->vmcb->control.exit_info_1 &
482*4882a593Smuzhiyun AVIC_UNACCEL_ACCESS_OFFSET_MASK;
483*4882a593Smuzhiyun
484*4882a593Smuzhiyun switch (offset) {
485*4882a593Smuzhiyun case APIC_ID:
486*4882a593Smuzhiyun if (avic_handle_apic_id_update(&svm->vcpu))
487*4882a593Smuzhiyun return 0;
488*4882a593Smuzhiyun break;
489*4882a593Smuzhiyun case APIC_LDR:
490*4882a593Smuzhiyun if (avic_handle_ldr_update(&svm->vcpu))
491*4882a593Smuzhiyun return 0;
492*4882a593Smuzhiyun break;
493*4882a593Smuzhiyun case APIC_DFR:
494*4882a593Smuzhiyun avic_handle_dfr_update(&svm->vcpu);
495*4882a593Smuzhiyun break;
496*4882a593Smuzhiyun default:
497*4882a593Smuzhiyun break;
498*4882a593Smuzhiyun }
499*4882a593Smuzhiyun
500*4882a593Smuzhiyun kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
501*4882a593Smuzhiyun
502*4882a593Smuzhiyun return 1;
503*4882a593Smuzhiyun }
504*4882a593Smuzhiyun
is_avic_unaccelerated_access_trap(u32 offset)505*4882a593Smuzhiyun static bool is_avic_unaccelerated_access_trap(u32 offset)
506*4882a593Smuzhiyun {
507*4882a593Smuzhiyun bool ret = false;
508*4882a593Smuzhiyun
509*4882a593Smuzhiyun switch (offset) {
510*4882a593Smuzhiyun case APIC_ID:
511*4882a593Smuzhiyun case APIC_EOI:
512*4882a593Smuzhiyun case APIC_RRR:
513*4882a593Smuzhiyun case APIC_LDR:
514*4882a593Smuzhiyun case APIC_DFR:
515*4882a593Smuzhiyun case APIC_SPIV:
516*4882a593Smuzhiyun case APIC_ESR:
517*4882a593Smuzhiyun case APIC_ICR:
518*4882a593Smuzhiyun case APIC_LVTT:
519*4882a593Smuzhiyun case APIC_LVTTHMR:
520*4882a593Smuzhiyun case APIC_LVTPC:
521*4882a593Smuzhiyun case APIC_LVT0:
522*4882a593Smuzhiyun case APIC_LVT1:
523*4882a593Smuzhiyun case APIC_LVTERR:
524*4882a593Smuzhiyun case APIC_TMICT:
525*4882a593Smuzhiyun case APIC_TDCR:
526*4882a593Smuzhiyun ret = true;
527*4882a593Smuzhiyun break;
528*4882a593Smuzhiyun default:
529*4882a593Smuzhiyun break;
530*4882a593Smuzhiyun }
531*4882a593Smuzhiyun return ret;
532*4882a593Smuzhiyun }
533*4882a593Smuzhiyun
avic_unaccelerated_access_interception(struct vcpu_svm * svm)534*4882a593Smuzhiyun int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
535*4882a593Smuzhiyun {
536*4882a593Smuzhiyun int ret = 0;
537*4882a593Smuzhiyun u32 offset = svm->vmcb->control.exit_info_1 &
538*4882a593Smuzhiyun AVIC_UNACCEL_ACCESS_OFFSET_MASK;
539*4882a593Smuzhiyun u32 vector = svm->vmcb->control.exit_info_2 &
540*4882a593Smuzhiyun AVIC_UNACCEL_ACCESS_VECTOR_MASK;
541*4882a593Smuzhiyun bool write = (svm->vmcb->control.exit_info_1 >> 32) &
542*4882a593Smuzhiyun AVIC_UNACCEL_ACCESS_WRITE_MASK;
543*4882a593Smuzhiyun bool trap = is_avic_unaccelerated_access_trap(offset);
544*4882a593Smuzhiyun
545*4882a593Smuzhiyun trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
546*4882a593Smuzhiyun trap, write, vector);
547*4882a593Smuzhiyun if (trap) {
548*4882a593Smuzhiyun /* Handling Trap */
549*4882a593Smuzhiyun WARN_ONCE(!write, "svm: Handling trap read.\n");
550*4882a593Smuzhiyun ret = avic_unaccel_trap_write(svm);
551*4882a593Smuzhiyun } else {
552*4882a593Smuzhiyun /* Handling Fault */
553*4882a593Smuzhiyun ret = kvm_emulate_instruction(&svm->vcpu, 0);
554*4882a593Smuzhiyun }
555*4882a593Smuzhiyun
556*4882a593Smuzhiyun return ret;
557*4882a593Smuzhiyun }
558*4882a593Smuzhiyun
avic_init_vcpu(struct vcpu_svm * svm)559*4882a593Smuzhiyun int avic_init_vcpu(struct vcpu_svm *svm)
560*4882a593Smuzhiyun {
561*4882a593Smuzhiyun int ret;
562*4882a593Smuzhiyun struct kvm_vcpu *vcpu = &svm->vcpu;
563*4882a593Smuzhiyun
564*4882a593Smuzhiyun if (!avic || !irqchip_in_kernel(vcpu->kvm))
565*4882a593Smuzhiyun return 0;
566*4882a593Smuzhiyun
567*4882a593Smuzhiyun ret = avic_init_backing_page(&svm->vcpu);
568*4882a593Smuzhiyun if (ret)
569*4882a593Smuzhiyun return ret;
570*4882a593Smuzhiyun
571*4882a593Smuzhiyun INIT_LIST_HEAD(&svm->ir_list);
572*4882a593Smuzhiyun spin_lock_init(&svm->ir_list_lock);
573*4882a593Smuzhiyun svm->dfr_reg = APIC_DFR_FLAT;
574*4882a593Smuzhiyun
575*4882a593Smuzhiyun return ret;
576*4882a593Smuzhiyun }
577*4882a593Smuzhiyun
avic_post_state_restore(struct kvm_vcpu * vcpu)578*4882a593Smuzhiyun void avic_post_state_restore(struct kvm_vcpu *vcpu)
579*4882a593Smuzhiyun {
580*4882a593Smuzhiyun if (avic_handle_apic_id_update(vcpu) != 0)
581*4882a593Smuzhiyun return;
582*4882a593Smuzhiyun avic_handle_dfr_update(vcpu);
583*4882a593Smuzhiyun avic_handle_ldr_update(vcpu);
584*4882a593Smuzhiyun }
585*4882a593Smuzhiyun
svm_toggle_avic_for_irq_window(struct kvm_vcpu * vcpu,bool activate)586*4882a593Smuzhiyun void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate)
587*4882a593Smuzhiyun {
588*4882a593Smuzhiyun if (!avic || !lapic_in_kernel(vcpu))
589*4882a593Smuzhiyun return;
590*4882a593Smuzhiyun
591*4882a593Smuzhiyun srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
592*4882a593Smuzhiyun kvm_request_apicv_update(vcpu->kvm, activate,
593*4882a593Smuzhiyun APICV_INHIBIT_REASON_IRQWIN);
594*4882a593Smuzhiyun vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
595*4882a593Smuzhiyun }
596*4882a593Smuzhiyun
svm_set_virtual_apic_mode(struct kvm_vcpu * vcpu)597*4882a593Smuzhiyun void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
598*4882a593Smuzhiyun {
599*4882a593Smuzhiyun return;
600*4882a593Smuzhiyun }
601*4882a593Smuzhiyun
svm_hwapic_irr_update(struct kvm_vcpu * vcpu,int max_irr)602*4882a593Smuzhiyun void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
603*4882a593Smuzhiyun {
604*4882a593Smuzhiyun }
605*4882a593Smuzhiyun
svm_hwapic_isr_update(struct kvm_vcpu * vcpu,int max_isr)606*4882a593Smuzhiyun void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
607*4882a593Smuzhiyun {
608*4882a593Smuzhiyun }
609*4882a593Smuzhiyun
svm_set_pi_irte_mode(struct kvm_vcpu * vcpu,bool activate)610*4882a593Smuzhiyun static int svm_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
611*4882a593Smuzhiyun {
612*4882a593Smuzhiyun int ret = 0;
613*4882a593Smuzhiyun unsigned long flags;
614*4882a593Smuzhiyun struct amd_svm_iommu_ir *ir;
615*4882a593Smuzhiyun struct vcpu_svm *svm = to_svm(vcpu);
616*4882a593Smuzhiyun
617*4882a593Smuzhiyun if (!kvm_arch_has_assigned_device(vcpu->kvm))
618*4882a593Smuzhiyun return 0;
619*4882a593Smuzhiyun
620*4882a593Smuzhiyun /*
621*4882a593Smuzhiyun * Here, we go through the per-vcpu ir_list to update all existing
622*4882a593Smuzhiyun * interrupt remapping table entry targeting this vcpu.
623*4882a593Smuzhiyun */
624*4882a593Smuzhiyun spin_lock_irqsave(&svm->ir_list_lock, flags);
625*4882a593Smuzhiyun
626*4882a593Smuzhiyun if (list_empty(&svm->ir_list))
627*4882a593Smuzhiyun goto out;
628*4882a593Smuzhiyun
629*4882a593Smuzhiyun list_for_each_entry(ir, &svm->ir_list, node) {
630*4882a593Smuzhiyun if (activate)
631*4882a593Smuzhiyun ret = amd_iommu_activate_guest_mode(ir->data);
632*4882a593Smuzhiyun else
633*4882a593Smuzhiyun ret = amd_iommu_deactivate_guest_mode(ir->data);
634*4882a593Smuzhiyun if (ret)
635*4882a593Smuzhiyun break;
636*4882a593Smuzhiyun }
637*4882a593Smuzhiyun out:
638*4882a593Smuzhiyun spin_unlock_irqrestore(&svm->ir_list_lock, flags);
639*4882a593Smuzhiyun return ret;
640*4882a593Smuzhiyun }
641*4882a593Smuzhiyun
svm_refresh_apicv_exec_ctrl(struct kvm_vcpu * vcpu)642*4882a593Smuzhiyun void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
643*4882a593Smuzhiyun {
644*4882a593Smuzhiyun struct vcpu_svm *svm = to_svm(vcpu);
645*4882a593Smuzhiyun struct vmcb *vmcb = svm->vmcb;
646*4882a593Smuzhiyun bool activated = kvm_vcpu_apicv_active(vcpu);
647*4882a593Smuzhiyun
648*4882a593Smuzhiyun if (!avic)
649*4882a593Smuzhiyun return;
650*4882a593Smuzhiyun
651*4882a593Smuzhiyun if (activated) {
652*4882a593Smuzhiyun /**
653*4882a593Smuzhiyun * During AVIC temporary deactivation, guest could update
654*4882a593Smuzhiyun * APIC ID, DFR and LDR registers, which would not be trapped
655*4882a593Smuzhiyun * by avic_unaccelerated_access_interception(). In this case,
656*4882a593Smuzhiyun * we need to check and update the AVIC logical APIC ID table
657*4882a593Smuzhiyun * accordingly before re-activating.
658*4882a593Smuzhiyun */
659*4882a593Smuzhiyun avic_post_state_restore(vcpu);
660*4882a593Smuzhiyun vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
661*4882a593Smuzhiyun } else {
662*4882a593Smuzhiyun vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
663*4882a593Smuzhiyun }
664*4882a593Smuzhiyun vmcb_mark_dirty(vmcb, VMCB_AVIC);
665*4882a593Smuzhiyun
666*4882a593Smuzhiyun svm_set_pi_irte_mode(vcpu, activated);
667*4882a593Smuzhiyun }
668*4882a593Smuzhiyun
svm_load_eoi_exitmap(struct kvm_vcpu * vcpu,u64 * eoi_exit_bitmap)669*4882a593Smuzhiyun void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
670*4882a593Smuzhiyun {
671*4882a593Smuzhiyun return;
672*4882a593Smuzhiyun }
673*4882a593Smuzhiyun
svm_deliver_avic_intr(struct kvm_vcpu * vcpu,int vec)674*4882a593Smuzhiyun int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
675*4882a593Smuzhiyun {
676*4882a593Smuzhiyun if (!vcpu->arch.apicv_active)
677*4882a593Smuzhiyun return -1;
678*4882a593Smuzhiyun
679*4882a593Smuzhiyun kvm_lapic_set_irr(vec, vcpu->arch.apic);
680*4882a593Smuzhiyun smp_mb__after_atomic();
681*4882a593Smuzhiyun
682*4882a593Smuzhiyun if (avic_vcpu_is_running(vcpu)) {
683*4882a593Smuzhiyun int cpuid = vcpu->cpu;
684*4882a593Smuzhiyun
685*4882a593Smuzhiyun if (cpuid != get_cpu())
686*4882a593Smuzhiyun wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid));
687*4882a593Smuzhiyun put_cpu();
688*4882a593Smuzhiyun } else
689*4882a593Smuzhiyun kvm_vcpu_wake_up(vcpu);
690*4882a593Smuzhiyun
691*4882a593Smuzhiyun return 0;
692*4882a593Smuzhiyun }
693*4882a593Smuzhiyun
svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu * vcpu)694*4882a593Smuzhiyun bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
695*4882a593Smuzhiyun {
696*4882a593Smuzhiyun return false;
697*4882a593Smuzhiyun }
698*4882a593Smuzhiyun
svm_ir_list_del(struct vcpu_svm * svm,struct amd_iommu_pi_data * pi)699*4882a593Smuzhiyun static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
700*4882a593Smuzhiyun {
701*4882a593Smuzhiyun unsigned long flags;
702*4882a593Smuzhiyun struct amd_svm_iommu_ir *cur;
703*4882a593Smuzhiyun
704*4882a593Smuzhiyun spin_lock_irqsave(&svm->ir_list_lock, flags);
705*4882a593Smuzhiyun list_for_each_entry(cur, &svm->ir_list, node) {
706*4882a593Smuzhiyun if (cur->data != pi->ir_data)
707*4882a593Smuzhiyun continue;
708*4882a593Smuzhiyun list_del(&cur->node);
709*4882a593Smuzhiyun kfree(cur);
710*4882a593Smuzhiyun break;
711*4882a593Smuzhiyun }
712*4882a593Smuzhiyun spin_unlock_irqrestore(&svm->ir_list_lock, flags);
713*4882a593Smuzhiyun }
714*4882a593Smuzhiyun
svm_ir_list_add(struct vcpu_svm * svm,struct amd_iommu_pi_data * pi)715*4882a593Smuzhiyun static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
716*4882a593Smuzhiyun {
717*4882a593Smuzhiyun int ret = 0;
718*4882a593Smuzhiyun unsigned long flags;
719*4882a593Smuzhiyun struct amd_svm_iommu_ir *ir;
720*4882a593Smuzhiyun
721*4882a593Smuzhiyun /**
722*4882a593Smuzhiyun * In some cases, the existing irte is updaed and re-set,
723*4882a593Smuzhiyun * so we need to check here if it's already been * added
724*4882a593Smuzhiyun * to the ir_list.
725*4882a593Smuzhiyun */
726*4882a593Smuzhiyun if (pi->ir_data && (pi->prev_ga_tag != 0)) {
727*4882a593Smuzhiyun struct kvm *kvm = svm->vcpu.kvm;
728*4882a593Smuzhiyun u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
729*4882a593Smuzhiyun struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
730*4882a593Smuzhiyun struct vcpu_svm *prev_svm;
731*4882a593Smuzhiyun
732*4882a593Smuzhiyun if (!prev_vcpu) {
733*4882a593Smuzhiyun ret = -EINVAL;
734*4882a593Smuzhiyun goto out;
735*4882a593Smuzhiyun }
736*4882a593Smuzhiyun
737*4882a593Smuzhiyun prev_svm = to_svm(prev_vcpu);
738*4882a593Smuzhiyun svm_ir_list_del(prev_svm, pi);
739*4882a593Smuzhiyun }
740*4882a593Smuzhiyun
741*4882a593Smuzhiyun /**
742*4882a593Smuzhiyun * Allocating new amd_iommu_pi_data, which will get
743*4882a593Smuzhiyun * add to the per-vcpu ir_list.
744*4882a593Smuzhiyun */
745*4882a593Smuzhiyun ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
746*4882a593Smuzhiyun if (!ir) {
747*4882a593Smuzhiyun ret = -ENOMEM;
748*4882a593Smuzhiyun goto out;
749*4882a593Smuzhiyun }
750*4882a593Smuzhiyun ir->data = pi->ir_data;
751*4882a593Smuzhiyun
752*4882a593Smuzhiyun spin_lock_irqsave(&svm->ir_list_lock, flags);
753*4882a593Smuzhiyun list_add(&ir->node, &svm->ir_list);
754*4882a593Smuzhiyun spin_unlock_irqrestore(&svm->ir_list_lock, flags);
755*4882a593Smuzhiyun out:
756*4882a593Smuzhiyun return ret;
757*4882a593Smuzhiyun }
758*4882a593Smuzhiyun
759*4882a593Smuzhiyun /**
760*4882a593Smuzhiyun * Note:
761*4882a593Smuzhiyun * The HW cannot support posting multicast/broadcast
762*4882a593Smuzhiyun * interrupts to a vCPU. So, we still use legacy interrupt
763*4882a593Smuzhiyun * remapping for these kind of interrupts.
764*4882a593Smuzhiyun *
765*4882a593Smuzhiyun * For lowest-priority interrupts, we only support
766*4882a593Smuzhiyun * those with single CPU as the destination, e.g. user
767*4882a593Smuzhiyun * configures the interrupts via /proc/irq or uses
768*4882a593Smuzhiyun * irqbalance to make the interrupts single-CPU.
769*4882a593Smuzhiyun */
770*4882a593Smuzhiyun static int
get_pi_vcpu_info(struct kvm * kvm,struct kvm_kernel_irq_routing_entry * e,struct vcpu_data * vcpu_info,struct vcpu_svm ** svm)771*4882a593Smuzhiyun get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
772*4882a593Smuzhiyun struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
773*4882a593Smuzhiyun {
774*4882a593Smuzhiyun struct kvm_lapic_irq irq;
775*4882a593Smuzhiyun struct kvm_vcpu *vcpu = NULL;
776*4882a593Smuzhiyun
777*4882a593Smuzhiyun kvm_set_msi_irq(kvm, e, &irq);
778*4882a593Smuzhiyun
779*4882a593Smuzhiyun if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
780*4882a593Smuzhiyun !kvm_irq_is_postable(&irq)) {
781*4882a593Smuzhiyun pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
782*4882a593Smuzhiyun __func__, irq.vector);
783*4882a593Smuzhiyun return -1;
784*4882a593Smuzhiyun }
785*4882a593Smuzhiyun
786*4882a593Smuzhiyun pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
787*4882a593Smuzhiyun irq.vector);
788*4882a593Smuzhiyun *svm = to_svm(vcpu);
789*4882a593Smuzhiyun vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
790*4882a593Smuzhiyun vcpu_info->vector = irq.vector;
791*4882a593Smuzhiyun
792*4882a593Smuzhiyun return 0;
793*4882a593Smuzhiyun }
794*4882a593Smuzhiyun
795*4882a593Smuzhiyun /*
796*4882a593Smuzhiyun * svm_update_pi_irte - set IRTE for Posted-Interrupts
797*4882a593Smuzhiyun *
798*4882a593Smuzhiyun * @kvm: kvm
799*4882a593Smuzhiyun * @host_irq: host irq of the interrupt
800*4882a593Smuzhiyun * @guest_irq: gsi of the interrupt
801*4882a593Smuzhiyun * @set: set or unset PI
802*4882a593Smuzhiyun * returns 0 on success, < 0 on failure
803*4882a593Smuzhiyun */
svm_update_pi_irte(struct kvm * kvm,unsigned int host_irq,uint32_t guest_irq,bool set)804*4882a593Smuzhiyun int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
805*4882a593Smuzhiyun uint32_t guest_irq, bool set)
806*4882a593Smuzhiyun {
807*4882a593Smuzhiyun struct kvm_kernel_irq_routing_entry *e;
808*4882a593Smuzhiyun struct kvm_irq_routing_table *irq_rt;
809*4882a593Smuzhiyun int idx, ret = 0;
810*4882a593Smuzhiyun
811*4882a593Smuzhiyun if (!kvm_arch_has_assigned_device(kvm) ||
812*4882a593Smuzhiyun !irq_remapping_cap(IRQ_POSTING_CAP))
813*4882a593Smuzhiyun return 0;
814*4882a593Smuzhiyun
815*4882a593Smuzhiyun pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
816*4882a593Smuzhiyun __func__, host_irq, guest_irq, set);
817*4882a593Smuzhiyun
818*4882a593Smuzhiyun idx = srcu_read_lock(&kvm->irq_srcu);
819*4882a593Smuzhiyun irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
820*4882a593Smuzhiyun
821*4882a593Smuzhiyun if (guest_irq >= irq_rt->nr_rt_entries ||
822*4882a593Smuzhiyun hlist_empty(&irq_rt->map[guest_irq])) {
823*4882a593Smuzhiyun pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
824*4882a593Smuzhiyun guest_irq, irq_rt->nr_rt_entries);
825*4882a593Smuzhiyun goto out;
826*4882a593Smuzhiyun }
827*4882a593Smuzhiyun
828*4882a593Smuzhiyun hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
829*4882a593Smuzhiyun struct vcpu_data vcpu_info;
830*4882a593Smuzhiyun struct vcpu_svm *svm = NULL;
831*4882a593Smuzhiyun
832*4882a593Smuzhiyun if (e->type != KVM_IRQ_ROUTING_MSI)
833*4882a593Smuzhiyun continue;
834*4882a593Smuzhiyun
835*4882a593Smuzhiyun /**
836*4882a593Smuzhiyun * Here, we setup with legacy mode in the following cases:
837*4882a593Smuzhiyun * 1. When cannot target interrupt to a specific vcpu.
838*4882a593Smuzhiyun * 2. Unsetting posted interrupt.
839*4882a593Smuzhiyun * 3. APIC virtialization is disabled for the vcpu.
840*4882a593Smuzhiyun * 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
841*4882a593Smuzhiyun */
842*4882a593Smuzhiyun if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
843*4882a593Smuzhiyun kvm_vcpu_apicv_active(&svm->vcpu)) {
844*4882a593Smuzhiyun struct amd_iommu_pi_data pi;
845*4882a593Smuzhiyun
846*4882a593Smuzhiyun /* Try to enable guest_mode in IRTE */
847*4882a593Smuzhiyun pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
848*4882a593Smuzhiyun AVIC_HPA_MASK);
849*4882a593Smuzhiyun pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
850*4882a593Smuzhiyun svm->vcpu.vcpu_id);
851*4882a593Smuzhiyun pi.is_guest_mode = true;
852*4882a593Smuzhiyun pi.vcpu_data = &vcpu_info;
853*4882a593Smuzhiyun ret = irq_set_vcpu_affinity(host_irq, &pi);
854*4882a593Smuzhiyun
855*4882a593Smuzhiyun /**
856*4882a593Smuzhiyun * Here, we successfully setting up vcpu affinity in
857*4882a593Smuzhiyun * IOMMU guest mode. Now, we need to store the posted
858*4882a593Smuzhiyun * interrupt information in a per-vcpu ir_list so that
859*4882a593Smuzhiyun * we can reference to them directly when we update vcpu
860*4882a593Smuzhiyun * scheduling information in IOMMU irte.
861*4882a593Smuzhiyun */
862*4882a593Smuzhiyun if (!ret && pi.is_guest_mode)
863*4882a593Smuzhiyun svm_ir_list_add(svm, &pi);
864*4882a593Smuzhiyun } else {
865*4882a593Smuzhiyun /* Use legacy mode in IRTE */
866*4882a593Smuzhiyun struct amd_iommu_pi_data pi;
867*4882a593Smuzhiyun
868*4882a593Smuzhiyun /**
869*4882a593Smuzhiyun * Here, pi is used to:
870*4882a593Smuzhiyun * - Tell IOMMU to use legacy mode for this interrupt.
871*4882a593Smuzhiyun * - Retrieve ga_tag of prior interrupt remapping data.
872*4882a593Smuzhiyun */
873*4882a593Smuzhiyun pi.prev_ga_tag = 0;
874*4882a593Smuzhiyun pi.is_guest_mode = false;
875*4882a593Smuzhiyun ret = irq_set_vcpu_affinity(host_irq, &pi);
876*4882a593Smuzhiyun
877*4882a593Smuzhiyun /**
878*4882a593Smuzhiyun * Check if the posted interrupt was previously
879*4882a593Smuzhiyun * setup with the guest_mode by checking if the ga_tag
880*4882a593Smuzhiyun * was cached. If so, we need to clean up the per-vcpu
881*4882a593Smuzhiyun * ir_list.
882*4882a593Smuzhiyun */
883*4882a593Smuzhiyun if (!ret && pi.prev_ga_tag) {
884*4882a593Smuzhiyun int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
885*4882a593Smuzhiyun struct kvm_vcpu *vcpu;
886*4882a593Smuzhiyun
887*4882a593Smuzhiyun vcpu = kvm_get_vcpu_by_id(kvm, id);
888*4882a593Smuzhiyun if (vcpu)
889*4882a593Smuzhiyun svm_ir_list_del(to_svm(vcpu), &pi);
890*4882a593Smuzhiyun }
891*4882a593Smuzhiyun }
892*4882a593Smuzhiyun
893*4882a593Smuzhiyun if (!ret && svm) {
894*4882a593Smuzhiyun trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
895*4882a593Smuzhiyun e->gsi, vcpu_info.vector,
896*4882a593Smuzhiyun vcpu_info.pi_desc_addr, set);
897*4882a593Smuzhiyun }
898*4882a593Smuzhiyun
899*4882a593Smuzhiyun if (ret < 0) {
900*4882a593Smuzhiyun pr_err("%s: failed to update PI IRTE\n", __func__);
901*4882a593Smuzhiyun goto out;
902*4882a593Smuzhiyun }
903*4882a593Smuzhiyun }
904*4882a593Smuzhiyun
905*4882a593Smuzhiyun ret = 0;
906*4882a593Smuzhiyun out:
907*4882a593Smuzhiyun srcu_read_unlock(&kvm->irq_srcu, idx);
908*4882a593Smuzhiyun return ret;
909*4882a593Smuzhiyun }
910*4882a593Smuzhiyun
svm_check_apicv_inhibit_reasons(ulong bit)911*4882a593Smuzhiyun bool svm_check_apicv_inhibit_reasons(ulong bit)
912*4882a593Smuzhiyun {
913*4882a593Smuzhiyun ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
914*4882a593Smuzhiyun BIT(APICV_INHIBIT_REASON_HYPERV) |
915*4882a593Smuzhiyun BIT(APICV_INHIBIT_REASON_NESTED) |
916*4882a593Smuzhiyun BIT(APICV_INHIBIT_REASON_IRQWIN) |
917*4882a593Smuzhiyun BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
918*4882a593Smuzhiyun BIT(APICV_INHIBIT_REASON_X2APIC);
919*4882a593Smuzhiyun
920*4882a593Smuzhiyun return supported & BIT(bit);
921*4882a593Smuzhiyun }
922*4882a593Smuzhiyun
svm_pre_update_apicv_exec_ctrl(struct kvm * kvm,bool activate)923*4882a593Smuzhiyun void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate)
924*4882a593Smuzhiyun {
925*4882a593Smuzhiyun avic_update_access_page(kvm, activate);
926*4882a593Smuzhiyun }
927*4882a593Smuzhiyun
928*4882a593Smuzhiyun static inline int
avic_update_iommu_vcpu_affinity(struct kvm_vcpu * vcpu,int cpu,bool r)929*4882a593Smuzhiyun avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
930*4882a593Smuzhiyun {
931*4882a593Smuzhiyun int ret = 0;
932*4882a593Smuzhiyun unsigned long flags;
933*4882a593Smuzhiyun struct amd_svm_iommu_ir *ir;
934*4882a593Smuzhiyun struct vcpu_svm *svm = to_svm(vcpu);
935*4882a593Smuzhiyun
936*4882a593Smuzhiyun if (!kvm_arch_has_assigned_device(vcpu->kvm))
937*4882a593Smuzhiyun return 0;
938*4882a593Smuzhiyun
939*4882a593Smuzhiyun /*
940*4882a593Smuzhiyun * Here, we go through the per-vcpu ir_list to update all existing
941*4882a593Smuzhiyun * interrupt remapping table entry targeting this vcpu.
942*4882a593Smuzhiyun */
943*4882a593Smuzhiyun spin_lock_irqsave(&svm->ir_list_lock, flags);
944*4882a593Smuzhiyun
945*4882a593Smuzhiyun if (list_empty(&svm->ir_list))
946*4882a593Smuzhiyun goto out;
947*4882a593Smuzhiyun
948*4882a593Smuzhiyun list_for_each_entry(ir, &svm->ir_list, node) {
949*4882a593Smuzhiyun ret = amd_iommu_update_ga(cpu, r, ir->data);
950*4882a593Smuzhiyun if (ret)
951*4882a593Smuzhiyun break;
952*4882a593Smuzhiyun }
953*4882a593Smuzhiyun out:
954*4882a593Smuzhiyun spin_unlock_irqrestore(&svm->ir_list_lock, flags);
955*4882a593Smuzhiyun return ret;
956*4882a593Smuzhiyun }
957*4882a593Smuzhiyun
avic_vcpu_load(struct kvm_vcpu * vcpu,int cpu)958*4882a593Smuzhiyun void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
959*4882a593Smuzhiyun {
960*4882a593Smuzhiyun u64 entry;
961*4882a593Smuzhiyun /* ID = 0xff (broadcast), ID > 0xff (reserved) */
962*4882a593Smuzhiyun int h_physical_id = kvm_cpu_get_apicid(cpu);
963*4882a593Smuzhiyun struct vcpu_svm *svm = to_svm(vcpu);
964*4882a593Smuzhiyun
965*4882a593Smuzhiyun if (!kvm_vcpu_apicv_active(vcpu))
966*4882a593Smuzhiyun return;
967*4882a593Smuzhiyun
968*4882a593Smuzhiyun /*
969*4882a593Smuzhiyun * Since the host physical APIC id is 8 bits,
970*4882a593Smuzhiyun * we can support host APIC ID upto 255.
971*4882a593Smuzhiyun */
972*4882a593Smuzhiyun if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
973*4882a593Smuzhiyun return;
974*4882a593Smuzhiyun
975*4882a593Smuzhiyun entry = READ_ONCE(*(svm->avic_physical_id_cache));
976*4882a593Smuzhiyun WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
977*4882a593Smuzhiyun
978*4882a593Smuzhiyun entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
979*4882a593Smuzhiyun entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
980*4882a593Smuzhiyun
981*4882a593Smuzhiyun entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
982*4882a593Smuzhiyun if (svm->avic_is_running)
983*4882a593Smuzhiyun entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
984*4882a593Smuzhiyun
985*4882a593Smuzhiyun WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
986*4882a593Smuzhiyun avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
987*4882a593Smuzhiyun svm->avic_is_running);
988*4882a593Smuzhiyun }
989*4882a593Smuzhiyun
avic_vcpu_put(struct kvm_vcpu * vcpu)990*4882a593Smuzhiyun void avic_vcpu_put(struct kvm_vcpu *vcpu)
991*4882a593Smuzhiyun {
992*4882a593Smuzhiyun u64 entry;
993*4882a593Smuzhiyun struct vcpu_svm *svm = to_svm(vcpu);
994*4882a593Smuzhiyun
995*4882a593Smuzhiyun if (!kvm_vcpu_apicv_active(vcpu))
996*4882a593Smuzhiyun return;
997*4882a593Smuzhiyun
998*4882a593Smuzhiyun entry = READ_ONCE(*(svm->avic_physical_id_cache));
999*4882a593Smuzhiyun if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
1000*4882a593Smuzhiyun avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
1001*4882a593Smuzhiyun
1002*4882a593Smuzhiyun entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1003*4882a593Smuzhiyun WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
1004*4882a593Smuzhiyun }
1005*4882a593Smuzhiyun
1006*4882a593Smuzhiyun /**
1007*4882a593Smuzhiyun * This function is called during VCPU halt/unhalt.
1008*4882a593Smuzhiyun */
avic_set_running(struct kvm_vcpu * vcpu,bool is_run)1009*4882a593Smuzhiyun static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
1010*4882a593Smuzhiyun {
1011*4882a593Smuzhiyun struct vcpu_svm *svm = to_svm(vcpu);
1012*4882a593Smuzhiyun
1013*4882a593Smuzhiyun svm->avic_is_running = is_run;
1014*4882a593Smuzhiyun if (is_run)
1015*4882a593Smuzhiyun avic_vcpu_load(vcpu, vcpu->cpu);
1016*4882a593Smuzhiyun else
1017*4882a593Smuzhiyun avic_vcpu_put(vcpu);
1018*4882a593Smuzhiyun }
1019*4882a593Smuzhiyun
svm_vcpu_blocking(struct kvm_vcpu * vcpu)1020*4882a593Smuzhiyun void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
1021*4882a593Smuzhiyun {
1022*4882a593Smuzhiyun avic_set_running(vcpu, false);
1023*4882a593Smuzhiyun }
1024*4882a593Smuzhiyun
svm_vcpu_unblocking(struct kvm_vcpu * vcpu)1025*4882a593Smuzhiyun void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
1026*4882a593Smuzhiyun {
1027*4882a593Smuzhiyun if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
1028*4882a593Smuzhiyun kvm_vcpu_update_apicv(vcpu);
1029*4882a593Smuzhiyun avic_set_running(vcpu, true);
1030*4882a593Smuzhiyun }
1031