xref: /OK3568_Linux_fs/kernel/arch/arm64/kvm/arm.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (C) 2012 - Virtual Open Systems and Columbia University
4*4882a593Smuzhiyun  * Author: Christoffer Dall <c.dall@virtualopensystems.com>
5*4882a593Smuzhiyun  */
6*4882a593Smuzhiyun 
7*4882a593Smuzhiyun #include <linux/bug.h>
8*4882a593Smuzhiyun #include <linux/cpu_pm.h>
9*4882a593Smuzhiyun #include <linux/errno.h>
10*4882a593Smuzhiyun #include <linux/err.h>
11*4882a593Smuzhiyun #include <linux/kvm_host.h>
12*4882a593Smuzhiyun #include <linux/list.h>
13*4882a593Smuzhiyun #include <linux/module.h>
14*4882a593Smuzhiyun #include <linux/vmalloc.h>
15*4882a593Smuzhiyun #include <linux/fs.h>
16*4882a593Smuzhiyun #include <linux/mman.h>
17*4882a593Smuzhiyun #include <linux/sched.h>
18*4882a593Smuzhiyun #include <linux/kmemleak.h>
19*4882a593Smuzhiyun #include <linux/kvm.h>
20*4882a593Smuzhiyun #include <linux/kvm_irqfd.h>
21*4882a593Smuzhiyun #include <linux/irqbypass.h>
22*4882a593Smuzhiyun #include <linux/sched/stat.h>
23*4882a593Smuzhiyun #include <linux/psci.h>
24*4882a593Smuzhiyun #include <trace/events/kvm.h>
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun #define CREATE_TRACE_POINTS
27*4882a593Smuzhiyun #include "trace_arm.h"
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun #include <linux/uaccess.h>
30*4882a593Smuzhiyun #include <asm/ptrace.h>
31*4882a593Smuzhiyun #include <asm/mman.h>
32*4882a593Smuzhiyun #include <asm/tlbflush.h>
33*4882a593Smuzhiyun #include <asm/cacheflush.h>
34*4882a593Smuzhiyun #include <asm/cpufeature.h>
35*4882a593Smuzhiyun #include <asm/virt.h>
36*4882a593Smuzhiyun #include <asm/kvm_arm.h>
37*4882a593Smuzhiyun #include <asm/kvm_asm.h>
38*4882a593Smuzhiyun #include <asm/kvm_mmu.h>
39*4882a593Smuzhiyun #include <asm/kvm_emulate.h>
40*4882a593Smuzhiyun #include <asm/sections.h>
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun #include <kvm/arm_hypercalls.h>
43*4882a593Smuzhiyun #include <kvm/arm_pmu.h>
44*4882a593Smuzhiyun #include <kvm/arm_psci.h>
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun #ifdef REQUIRES_VIRT
47*4882a593Smuzhiyun __asm__(".arch_extension	virt");
48*4882a593Smuzhiyun #endif
49*4882a593Smuzhiyun 
50*4882a593Smuzhiyun static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
51*4882a593Smuzhiyun DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
56*4882a593Smuzhiyun unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
57*4882a593Smuzhiyun DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
58*4882a593Smuzhiyun 
59*4882a593Smuzhiyun /* The VMID used in the VTTBR */
60*4882a593Smuzhiyun static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
61*4882a593Smuzhiyun static u32 kvm_next_vmid;
62*4882a593Smuzhiyun static DEFINE_SPINLOCK(kvm_vmid_lock);
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun static bool vgic_present;
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
67*4882a593Smuzhiyun DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
68*4882a593Smuzhiyun 
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)69*4882a593Smuzhiyun int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
70*4882a593Smuzhiyun {
71*4882a593Smuzhiyun 	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
72*4882a593Smuzhiyun }
73*4882a593Smuzhiyun 
kvm_arch_hardware_setup(void * opaque)74*4882a593Smuzhiyun int kvm_arch_hardware_setup(void *opaque)
75*4882a593Smuzhiyun {
76*4882a593Smuzhiyun 	return 0;
77*4882a593Smuzhiyun }
78*4882a593Smuzhiyun 
kvm_arch_check_processor_compat(void * opaque)79*4882a593Smuzhiyun int kvm_arch_check_processor_compat(void *opaque)
80*4882a593Smuzhiyun {
81*4882a593Smuzhiyun 	return 0;
82*4882a593Smuzhiyun }
83*4882a593Smuzhiyun 
kvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)84*4882a593Smuzhiyun int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
85*4882a593Smuzhiyun 			    struct kvm_enable_cap *cap)
86*4882a593Smuzhiyun {
87*4882a593Smuzhiyun 	int r;
88*4882a593Smuzhiyun 
89*4882a593Smuzhiyun 	if (cap->flags)
90*4882a593Smuzhiyun 		return -EINVAL;
91*4882a593Smuzhiyun 
92*4882a593Smuzhiyun 	switch (cap->cap) {
93*4882a593Smuzhiyun 	case KVM_CAP_ARM_NISV_TO_USER:
94*4882a593Smuzhiyun 		r = 0;
95*4882a593Smuzhiyun 		kvm->arch.return_nisv_io_abort_to_user = true;
96*4882a593Smuzhiyun 		break;
97*4882a593Smuzhiyun 	default:
98*4882a593Smuzhiyun 		r = -EINVAL;
99*4882a593Smuzhiyun 		break;
100*4882a593Smuzhiyun 	}
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun 	return r;
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun 
kvm_arm_default_max_vcpus(void)105*4882a593Smuzhiyun static int kvm_arm_default_max_vcpus(void)
106*4882a593Smuzhiyun {
107*4882a593Smuzhiyun 	return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
108*4882a593Smuzhiyun }
109*4882a593Smuzhiyun 
set_default_spectre(struct kvm * kvm)110*4882a593Smuzhiyun static void set_default_spectre(struct kvm *kvm)
111*4882a593Smuzhiyun {
112*4882a593Smuzhiyun 	/*
113*4882a593Smuzhiyun 	 * The default is to expose CSV2 == 1 if the HW isn't affected.
114*4882a593Smuzhiyun 	 * Although this is a per-CPU feature, we make it global because
115*4882a593Smuzhiyun 	 * asymmetric systems are just a nuisance.
116*4882a593Smuzhiyun 	 *
117*4882a593Smuzhiyun 	 * Userspace can override this as long as it doesn't promise
118*4882a593Smuzhiyun 	 * the impossible.
119*4882a593Smuzhiyun 	 */
120*4882a593Smuzhiyun 	if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED)
121*4882a593Smuzhiyun 		kvm->arch.pfr0_csv2 = 1;
122*4882a593Smuzhiyun 	if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED)
123*4882a593Smuzhiyun 		kvm->arch.pfr0_csv3 = 1;
124*4882a593Smuzhiyun }
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun /**
127*4882a593Smuzhiyun  * kvm_arch_init_vm - initializes a VM data structure
128*4882a593Smuzhiyun  * @kvm:	pointer to the KVM struct
129*4882a593Smuzhiyun  */
kvm_arch_init_vm(struct kvm * kvm,unsigned long type)130*4882a593Smuzhiyun int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
131*4882a593Smuzhiyun {
132*4882a593Smuzhiyun 	int ret;
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun 	ret = kvm_arm_setup_stage2(kvm, type);
135*4882a593Smuzhiyun 	if (ret)
136*4882a593Smuzhiyun 		return ret;
137*4882a593Smuzhiyun 
138*4882a593Smuzhiyun 	ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu);
139*4882a593Smuzhiyun 	if (ret)
140*4882a593Smuzhiyun 		return ret;
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun 	ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
143*4882a593Smuzhiyun 	if (ret)
144*4882a593Smuzhiyun 		goto out_free_stage2_pgd;
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun 	kvm_vgic_early_init(kvm);
147*4882a593Smuzhiyun 
148*4882a593Smuzhiyun 	/* The maximum number of VCPUs is limited by the host's GIC model */
149*4882a593Smuzhiyun 	kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 	set_default_spectre(kvm);
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun 	return ret;
154*4882a593Smuzhiyun out_free_stage2_pgd:
155*4882a593Smuzhiyun 	kvm_free_stage2_pgd(&kvm->arch.mmu);
156*4882a593Smuzhiyun 	return ret;
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun 
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)159*4882a593Smuzhiyun vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
160*4882a593Smuzhiyun {
161*4882a593Smuzhiyun 	return VM_FAULT_SIGBUS;
162*4882a593Smuzhiyun }
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun /**
166*4882a593Smuzhiyun  * kvm_arch_destroy_vm - destroy the VM data structure
167*4882a593Smuzhiyun  * @kvm:	pointer to the KVM struct
168*4882a593Smuzhiyun  */
kvm_arch_destroy_vm(struct kvm * kvm)169*4882a593Smuzhiyun void kvm_arch_destroy_vm(struct kvm *kvm)
170*4882a593Smuzhiyun {
171*4882a593Smuzhiyun 	int i;
172*4882a593Smuzhiyun 
173*4882a593Smuzhiyun 	bitmap_free(kvm->arch.pmu_filter);
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 	kvm_vgic_destroy(kvm);
176*4882a593Smuzhiyun 
177*4882a593Smuzhiyun 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
178*4882a593Smuzhiyun 		if (kvm->vcpus[i]) {
179*4882a593Smuzhiyun 			kvm_vcpu_destroy(kvm->vcpus[i]);
180*4882a593Smuzhiyun 			kvm->vcpus[i] = NULL;
181*4882a593Smuzhiyun 		}
182*4882a593Smuzhiyun 	}
183*4882a593Smuzhiyun 	atomic_set(&kvm->online_vcpus, 0);
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun 
kvm_vm_ioctl_check_extension(struct kvm * kvm,long ext)186*4882a593Smuzhiyun int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
187*4882a593Smuzhiyun {
188*4882a593Smuzhiyun 	int r;
189*4882a593Smuzhiyun 	switch (ext) {
190*4882a593Smuzhiyun 	case KVM_CAP_IRQCHIP:
191*4882a593Smuzhiyun 		r = vgic_present;
192*4882a593Smuzhiyun 		break;
193*4882a593Smuzhiyun 	case KVM_CAP_IOEVENTFD:
194*4882a593Smuzhiyun 	case KVM_CAP_DEVICE_CTRL:
195*4882a593Smuzhiyun 	case KVM_CAP_USER_MEMORY:
196*4882a593Smuzhiyun 	case KVM_CAP_SYNC_MMU:
197*4882a593Smuzhiyun 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
198*4882a593Smuzhiyun 	case KVM_CAP_ONE_REG:
199*4882a593Smuzhiyun 	case KVM_CAP_ARM_PSCI:
200*4882a593Smuzhiyun 	case KVM_CAP_ARM_PSCI_0_2:
201*4882a593Smuzhiyun 	case KVM_CAP_READONLY_MEM:
202*4882a593Smuzhiyun 	case KVM_CAP_MP_STATE:
203*4882a593Smuzhiyun 	case KVM_CAP_IMMEDIATE_EXIT:
204*4882a593Smuzhiyun 	case KVM_CAP_VCPU_EVENTS:
205*4882a593Smuzhiyun 	case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2:
206*4882a593Smuzhiyun 	case KVM_CAP_ARM_NISV_TO_USER:
207*4882a593Smuzhiyun 	case KVM_CAP_ARM_INJECT_EXT_DABT:
208*4882a593Smuzhiyun 	case KVM_CAP_SET_GUEST_DEBUG:
209*4882a593Smuzhiyun 	case KVM_CAP_VCPU_ATTRIBUTES:
210*4882a593Smuzhiyun 		r = 1;
211*4882a593Smuzhiyun 		break;
212*4882a593Smuzhiyun 	case KVM_CAP_ARM_SET_DEVICE_ADDR:
213*4882a593Smuzhiyun 		r = 1;
214*4882a593Smuzhiyun 		break;
215*4882a593Smuzhiyun 	case KVM_CAP_NR_VCPUS:
216*4882a593Smuzhiyun 		r = num_online_cpus();
217*4882a593Smuzhiyun 		break;
218*4882a593Smuzhiyun 	case KVM_CAP_MAX_VCPUS:
219*4882a593Smuzhiyun 	case KVM_CAP_MAX_VCPU_ID:
220*4882a593Smuzhiyun 		if (kvm)
221*4882a593Smuzhiyun 			r = kvm->arch.max_vcpus;
222*4882a593Smuzhiyun 		else
223*4882a593Smuzhiyun 			r = kvm_arm_default_max_vcpus();
224*4882a593Smuzhiyun 		break;
225*4882a593Smuzhiyun 	case KVM_CAP_MSI_DEVID:
226*4882a593Smuzhiyun 		if (!kvm)
227*4882a593Smuzhiyun 			r = -EINVAL;
228*4882a593Smuzhiyun 		else
229*4882a593Smuzhiyun 			r = kvm->arch.vgic.msis_require_devid;
230*4882a593Smuzhiyun 		break;
231*4882a593Smuzhiyun 	case KVM_CAP_ARM_USER_IRQ:
232*4882a593Smuzhiyun 		/*
233*4882a593Smuzhiyun 		 * 1: EL1_VTIMER, EL1_PTIMER, and PMU.
234*4882a593Smuzhiyun 		 * (bump this number if adding more devices)
235*4882a593Smuzhiyun 		 */
236*4882a593Smuzhiyun 		r = 1;
237*4882a593Smuzhiyun 		break;
238*4882a593Smuzhiyun 	case KVM_CAP_STEAL_TIME:
239*4882a593Smuzhiyun 		r = kvm_arm_pvtime_supported();
240*4882a593Smuzhiyun 		break;
241*4882a593Smuzhiyun 	case KVM_CAP_ARM_EL1_32BIT:
242*4882a593Smuzhiyun 		r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1);
243*4882a593Smuzhiyun 		break;
244*4882a593Smuzhiyun 	case KVM_CAP_GUEST_DEBUG_HW_BPS:
245*4882a593Smuzhiyun 		r = get_num_brps();
246*4882a593Smuzhiyun 		break;
247*4882a593Smuzhiyun 	case KVM_CAP_GUEST_DEBUG_HW_WPS:
248*4882a593Smuzhiyun 		r = get_num_wrps();
249*4882a593Smuzhiyun 		break;
250*4882a593Smuzhiyun 	case KVM_CAP_ARM_PMU_V3:
251*4882a593Smuzhiyun 		r = kvm_arm_support_pmu_v3();
252*4882a593Smuzhiyun 		break;
253*4882a593Smuzhiyun 	case KVM_CAP_ARM_INJECT_SERROR_ESR:
254*4882a593Smuzhiyun 		r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
255*4882a593Smuzhiyun 		break;
256*4882a593Smuzhiyun 	case KVM_CAP_ARM_VM_IPA_SIZE:
257*4882a593Smuzhiyun 		r = get_kvm_ipa_limit();
258*4882a593Smuzhiyun 		break;
259*4882a593Smuzhiyun 	case KVM_CAP_ARM_SVE:
260*4882a593Smuzhiyun 		r = system_supports_sve();
261*4882a593Smuzhiyun 		break;
262*4882a593Smuzhiyun 	case KVM_CAP_ARM_PTRAUTH_ADDRESS:
263*4882a593Smuzhiyun 	case KVM_CAP_ARM_PTRAUTH_GENERIC:
264*4882a593Smuzhiyun 		r = system_has_full_ptr_auth();
265*4882a593Smuzhiyun 		break;
266*4882a593Smuzhiyun 	default:
267*4882a593Smuzhiyun 		r = 0;
268*4882a593Smuzhiyun 	}
269*4882a593Smuzhiyun 
270*4882a593Smuzhiyun 	return r;
271*4882a593Smuzhiyun }
272*4882a593Smuzhiyun 
kvm_arch_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)273*4882a593Smuzhiyun long kvm_arch_dev_ioctl(struct file *filp,
274*4882a593Smuzhiyun 			unsigned int ioctl, unsigned long arg)
275*4882a593Smuzhiyun {
276*4882a593Smuzhiyun 	return -EINVAL;
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun 
kvm_arch_alloc_vm(void)279*4882a593Smuzhiyun struct kvm *kvm_arch_alloc_vm(void)
280*4882a593Smuzhiyun {
281*4882a593Smuzhiyun 	if (!has_vhe())
282*4882a593Smuzhiyun 		return kzalloc(sizeof(struct kvm), GFP_KERNEL);
283*4882a593Smuzhiyun 
284*4882a593Smuzhiyun 	return vzalloc(sizeof(struct kvm));
285*4882a593Smuzhiyun }
286*4882a593Smuzhiyun 
kvm_arch_free_vm(struct kvm * kvm)287*4882a593Smuzhiyun void kvm_arch_free_vm(struct kvm *kvm)
288*4882a593Smuzhiyun {
289*4882a593Smuzhiyun 	if (!has_vhe())
290*4882a593Smuzhiyun 		kfree(kvm);
291*4882a593Smuzhiyun 	else
292*4882a593Smuzhiyun 		vfree(kvm);
293*4882a593Smuzhiyun }
294*4882a593Smuzhiyun 
kvm_arch_vcpu_precreate(struct kvm * kvm,unsigned int id)295*4882a593Smuzhiyun int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
296*4882a593Smuzhiyun {
297*4882a593Smuzhiyun 	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
298*4882a593Smuzhiyun 		return -EBUSY;
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun 	if (id >= kvm->arch.max_vcpus)
301*4882a593Smuzhiyun 		return -EINVAL;
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 	return 0;
304*4882a593Smuzhiyun }
305*4882a593Smuzhiyun 
kvm_arch_vcpu_create(struct kvm_vcpu * vcpu)306*4882a593Smuzhiyun int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
307*4882a593Smuzhiyun {
308*4882a593Smuzhiyun 	int err;
309*4882a593Smuzhiyun 
310*4882a593Smuzhiyun 	/* Force users to call KVM_ARM_VCPU_INIT */
311*4882a593Smuzhiyun 	vcpu->arch.target = -1;
312*4882a593Smuzhiyun 	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
313*4882a593Smuzhiyun 
314*4882a593Smuzhiyun 	vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
315*4882a593Smuzhiyun 
316*4882a593Smuzhiyun 	/* Set up the timer */
317*4882a593Smuzhiyun 	kvm_timer_vcpu_init(vcpu);
318*4882a593Smuzhiyun 
319*4882a593Smuzhiyun 	kvm_pmu_vcpu_init(vcpu);
320*4882a593Smuzhiyun 
321*4882a593Smuzhiyun 	kvm_arm_reset_debug_ptr(vcpu);
322*4882a593Smuzhiyun 
323*4882a593Smuzhiyun 	kvm_arm_pvtime_vcpu_init(&vcpu->arch);
324*4882a593Smuzhiyun 
325*4882a593Smuzhiyun 	vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
326*4882a593Smuzhiyun 
327*4882a593Smuzhiyun 	err = kvm_vgic_vcpu_init(vcpu);
328*4882a593Smuzhiyun 	if (err)
329*4882a593Smuzhiyun 		return err;
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun 	return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
332*4882a593Smuzhiyun }
333*4882a593Smuzhiyun 
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)334*4882a593Smuzhiyun void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
335*4882a593Smuzhiyun {
336*4882a593Smuzhiyun }
337*4882a593Smuzhiyun 
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)338*4882a593Smuzhiyun void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
339*4882a593Smuzhiyun {
340*4882a593Smuzhiyun 	if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
341*4882a593Smuzhiyun 		static_branch_dec(&userspace_irqchip_in_use);
342*4882a593Smuzhiyun 
343*4882a593Smuzhiyun 	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
344*4882a593Smuzhiyun 	kvm_timer_vcpu_terminate(vcpu);
345*4882a593Smuzhiyun 	kvm_pmu_vcpu_destroy(vcpu);
346*4882a593Smuzhiyun 
347*4882a593Smuzhiyun 	kvm_arm_vcpu_destroy(vcpu);
348*4882a593Smuzhiyun }
349*4882a593Smuzhiyun 
kvm_cpu_has_pending_timer(struct kvm_vcpu * vcpu)350*4882a593Smuzhiyun int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
351*4882a593Smuzhiyun {
352*4882a593Smuzhiyun 	return kvm_timer_is_pending(vcpu);
353*4882a593Smuzhiyun }
354*4882a593Smuzhiyun 
kvm_arch_vcpu_blocking(struct kvm_vcpu * vcpu)355*4882a593Smuzhiyun void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
356*4882a593Smuzhiyun {
357*4882a593Smuzhiyun 	/*
358*4882a593Smuzhiyun 	 * If we're about to block (most likely because we've just hit a
359*4882a593Smuzhiyun 	 * WFI), we need to sync back the state of the GIC CPU interface
360*4882a593Smuzhiyun 	 * so that we have the latest PMR and group enables. This ensures
361*4882a593Smuzhiyun 	 * that kvm_arch_vcpu_runnable has up-to-date data to decide
362*4882a593Smuzhiyun 	 * whether we have pending interrupts.
363*4882a593Smuzhiyun 	 *
364*4882a593Smuzhiyun 	 * For the same reason, we want to tell GICv4 that we need
365*4882a593Smuzhiyun 	 * doorbells to be signalled, should an interrupt become pending.
366*4882a593Smuzhiyun 	 */
367*4882a593Smuzhiyun 	preempt_disable();
368*4882a593Smuzhiyun 	kvm_vgic_vmcr_sync(vcpu);
369*4882a593Smuzhiyun 	vgic_v4_put(vcpu, true);
370*4882a593Smuzhiyun 	preempt_enable();
371*4882a593Smuzhiyun }
372*4882a593Smuzhiyun 
kvm_arch_vcpu_unblocking(struct kvm_vcpu * vcpu)373*4882a593Smuzhiyun void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
374*4882a593Smuzhiyun {
375*4882a593Smuzhiyun 	preempt_disable();
376*4882a593Smuzhiyun 	vgic_v4_load(vcpu);
377*4882a593Smuzhiyun 	preempt_enable();
378*4882a593Smuzhiyun }
379*4882a593Smuzhiyun 
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)380*4882a593Smuzhiyun void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
381*4882a593Smuzhiyun {
382*4882a593Smuzhiyun 	struct kvm_s2_mmu *mmu;
383*4882a593Smuzhiyun 	int *last_ran;
384*4882a593Smuzhiyun 
385*4882a593Smuzhiyun 	mmu = vcpu->arch.hw_mmu;
386*4882a593Smuzhiyun 	last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
387*4882a593Smuzhiyun 
388*4882a593Smuzhiyun 	/*
389*4882a593Smuzhiyun 	 * We guarantee that both TLBs and I-cache are private to each
390*4882a593Smuzhiyun 	 * vcpu. If detecting that a vcpu from the same VM has
391*4882a593Smuzhiyun 	 * previously run on the same physical CPU, call into the
392*4882a593Smuzhiyun 	 * hypervisor code to nuke the relevant contexts.
393*4882a593Smuzhiyun 	 *
394*4882a593Smuzhiyun 	 * We might get preempted before the vCPU actually runs, but
395*4882a593Smuzhiyun 	 * over-invalidation doesn't affect correctness.
396*4882a593Smuzhiyun 	 */
397*4882a593Smuzhiyun 	if (*last_ran != vcpu->vcpu_id) {
398*4882a593Smuzhiyun 		kvm_call_hyp(__kvm_flush_cpu_context, mmu);
399*4882a593Smuzhiyun 		*last_ran = vcpu->vcpu_id;
400*4882a593Smuzhiyun 	}
401*4882a593Smuzhiyun 
402*4882a593Smuzhiyun 	vcpu->cpu = cpu;
403*4882a593Smuzhiyun 
404*4882a593Smuzhiyun 	kvm_vgic_load(vcpu);
405*4882a593Smuzhiyun 	kvm_timer_vcpu_load(vcpu);
406*4882a593Smuzhiyun 	if (has_vhe())
407*4882a593Smuzhiyun 		kvm_vcpu_load_sysregs_vhe(vcpu);
408*4882a593Smuzhiyun 	kvm_arch_vcpu_load_fp(vcpu);
409*4882a593Smuzhiyun 	kvm_vcpu_pmu_restore_guest(vcpu);
410*4882a593Smuzhiyun 	if (kvm_arm_is_pvtime_enabled(&vcpu->arch))
411*4882a593Smuzhiyun 		kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
412*4882a593Smuzhiyun 
413*4882a593Smuzhiyun 	if (single_task_running())
414*4882a593Smuzhiyun 		vcpu_clear_wfx_traps(vcpu);
415*4882a593Smuzhiyun 	else
416*4882a593Smuzhiyun 		vcpu_set_wfx_traps(vcpu);
417*4882a593Smuzhiyun 
418*4882a593Smuzhiyun 	if (vcpu_has_ptrauth(vcpu))
419*4882a593Smuzhiyun 		vcpu_ptrauth_disable(vcpu);
420*4882a593Smuzhiyun 	kvm_arch_vcpu_load_debug_state_flags(vcpu);
421*4882a593Smuzhiyun }
422*4882a593Smuzhiyun 
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)423*4882a593Smuzhiyun void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
424*4882a593Smuzhiyun {
425*4882a593Smuzhiyun 	kvm_arch_vcpu_put_debug_state_flags(vcpu);
426*4882a593Smuzhiyun 	kvm_arch_vcpu_put_fp(vcpu);
427*4882a593Smuzhiyun 	if (has_vhe())
428*4882a593Smuzhiyun 		kvm_vcpu_put_sysregs_vhe(vcpu);
429*4882a593Smuzhiyun 	kvm_timer_vcpu_put(vcpu);
430*4882a593Smuzhiyun 	kvm_vgic_put(vcpu);
431*4882a593Smuzhiyun 	kvm_vcpu_pmu_restore_host(vcpu);
432*4882a593Smuzhiyun 
433*4882a593Smuzhiyun 	vcpu->cpu = -1;
434*4882a593Smuzhiyun }
435*4882a593Smuzhiyun 
vcpu_power_off(struct kvm_vcpu * vcpu)436*4882a593Smuzhiyun static void vcpu_power_off(struct kvm_vcpu *vcpu)
437*4882a593Smuzhiyun {
438*4882a593Smuzhiyun 	vcpu->arch.power_off = true;
439*4882a593Smuzhiyun 	kvm_make_request(KVM_REQ_SLEEP, vcpu);
440*4882a593Smuzhiyun 	kvm_vcpu_kick(vcpu);
441*4882a593Smuzhiyun }
442*4882a593Smuzhiyun 
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)443*4882a593Smuzhiyun int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
444*4882a593Smuzhiyun 				    struct kvm_mp_state *mp_state)
445*4882a593Smuzhiyun {
446*4882a593Smuzhiyun 	if (vcpu->arch.power_off)
447*4882a593Smuzhiyun 		mp_state->mp_state = KVM_MP_STATE_STOPPED;
448*4882a593Smuzhiyun 	else
449*4882a593Smuzhiyun 		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
450*4882a593Smuzhiyun 
451*4882a593Smuzhiyun 	return 0;
452*4882a593Smuzhiyun }
453*4882a593Smuzhiyun 
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)454*4882a593Smuzhiyun int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
455*4882a593Smuzhiyun 				    struct kvm_mp_state *mp_state)
456*4882a593Smuzhiyun {
457*4882a593Smuzhiyun 	int ret = 0;
458*4882a593Smuzhiyun 
459*4882a593Smuzhiyun 	switch (mp_state->mp_state) {
460*4882a593Smuzhiyun 	case KVM_MP_STATE_RUNNABLE:
461*4882a593Smuzhiyun 		vcpu->arch.power_off = false;
462*4882a593Smuzhiyun 		break;
463*4882a593Smuzhiyun 	case KVM_MP_STATE_STOPPED:
464*4882a593Smuzhiyun 		vcpu_power_off(vcpu);
465*4882a593Smuzhiyun 		break;
466*4882a593Smuzhiyun 	default:
467*4882a593Smuzhiyun 		ret = -EINVAL;
468*4882a593Smuzhiyun 	}
469*4882a593Smuzhiyun 
470*4882a593Smuzhiyun 	return ret;
471*4882a593Smuzhiyun }
472*4882a593Smuzhiyun 
473*4882a593Smuzhiyun /**
474*4882a593Smuzhiyun  * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled
475*4882a593Smuzhiyun  * @v:		The VCPU pointer
476*4882a593Smuzhiyun  *
477*4882a593Smuzhiyun  * If the guest CPU is not waiting for interrupts or an interrupt line is
478*4882a593Smuzhiyun  * asserted, the CPU is by definition runnable.
479*4882a593Smuzhiyun  */
kvm_arch_vcpu_runnable(struct kvm_vcpu * v)480*4882a593Smuzhiyun int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
481*4882a593Smuzhiyun {
482*4882a593Smuzhiyun 	bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
483*4882a593Smuzhiyun 	return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
484*4882a593Smuzhiyun 		&& !v->arch.power_off && !v->arch.pause);
485*4882a593Smuzhiyun }
486*4882a593Smuzhiyun 
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)487*4882a593Smuzhiyun bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
488*4882a593Smuzhiyun {
489*4882a593Smuzhiyun 	return vcpu_mode_priv(vcpu);
490*4882a593Smuzhiyun }
491*4882a593Smuzhiyun 
492*4882a593Smuzhiyun /* Just ensure a guest exit from a particular CPU */
exit_vm_noop(void * info)493*4882a593Smuzhiyun static void exit_vm_noop(void *info)
494*4882a593Smuzhiyun {
495*4882a593Smuzhiyun }
496*4882a593Smuzhiyun 
force_vm_exit(const cpumask_t * mask)497*4882a593Smuzhiyun void force_vm_exit(const cpumask_t *mask)
498*4882a593Smuzhiyun {
499*4882a593Smuzhiyun 	preempt_disable();
500*4882a593Smuzhiyun 	smp_call_function_many(mask, exit_vm_noop, NULL, true);
501*4882a593Smuzhiyun 	preempt_enable();
502*4882a593Smuzhiyun }
503*4882a593Smuzhiyun 
504*4882a593Smuzhiyun /**
505*4882a593Smuzhiyun  * need_new_vmid_gen - check that the VMID is still valid
506*4882a593Smuzhiyun  * @vmid: The VMID to check
507*4882a593Smuzhiyun  *
508*4882a593Smuzhiyun  * return true if there is a new generation of VMIDs being used
509*4882a593Smuzhiyun  *
510*4882a593Smuzhiyun  * The hardware supports a limited set of values with the value zero reserved
511*4882a593Smuzhiyun  * for the host, so we check if an assigned value belongs to a previous
512*4882a593Smuzhiyun  * generation, which requires us to assign a new value. If we're the first to
513*4882a593Smuzhiyun  * use a VMID for the new generation, we must flush necessary caches and TLBs
514*4882a593Smuzhiyun  * on all CPUs.
515*4882a593Smuzhiyun  */
need_new_vmid_gen(struct kvm_vmid * vmid)516*4882a593Smuzhiyun static bool need_new_vmid_gen(struct kvm_vmid *vmid)
517*4882a593Smuzhiyun {
518*4882a593Smuzhiyun 	u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
519*4882a593Smuzhiyun 	smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
520*4882a593Smuzhiyun 	return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
521*4882a593Smuzhiyun }
522*4882a593Smuzhiyun 
523*4882a593Smuzhiyun /**
524*4882a593Smuzhiyun  * update_vmid - Update the vmid with a valid VMID for the current generation
525*4882a593Smuzhiyun  * @vmid: The stage-2 VMID information struct
526*4882a593Smuzhiyun  */
update_vmid(struct kvm_vmid * vmid)527*4882a593Smuzhiyun static void update_vmid(struct kvm_vmid *vmid)
528*4882a593Smuzhiyun {
529*4882a593Smuzhiyun 	if (!need_new_vmid_gen(vmid))
530*4882a593Smuzhiyun 		return;
531*4882a593Smuzhiyun 
532*4882a593Smuzhiyun 	spin_lock(&kvm_vmid_lock);
533*4882a593Smuzhiyun 
534*4882a593Smuzhiyun 	/*
535*4882a593Smuzhiyun 	 * We need to re-check the vmid_gen here to ensure that if another vcpu
536*4882a593Smuzhiyun 	 * already allocated a valid vmid for this vm, then this vcpu should
537*4882a593Smuzhiyun 	 * use the same vmid.
538*4882a593Smuzhiyun 	 */
539*4882a593Smuzhiyun 	if (!need_new_vmid_gen(vmid)) {
540*4882a593Smuzhiyun 		spin_unlock(&kvm_vmid_lock);
541*4882a593Smuzhiyun 		return;
542*4882a593Smuzhiyun 	}
543*4882a593Smuzhiyun 
544*4882a593Smuzhiyun 	/* First user of a new VMID generation? */
545*4882a593Smuzhiyun 	if (unlikely(kvm_next_vmid == 0)) {
546*4882a593Smuzhiyun 		atomic64_inc(&kvm_vmid_gen);
547*4882a593Smuzhiyun 		kvm_next_vmid = 1;
548*4882a593Smuzhiyun 
549*4882a593Smuzhiyun 		/*
550*4882a593Smuzhiyun 		 * On SMP we know no other CPUs can use this CPU's or each
551*4882a593Smuzhiyun 		 * other's VMID after force_vm_exit returns since the
552*4882a593Smuzhiyun 		 * kvm_vmid_lock blocks them from reentry to the guest.
553*4882a593Smuzhiyun 		 */
554*4882a593Smuzhiyun 		force_vm_exit(cpu_all_mask);
555*4882a593Smuzhiyun 		/*
556*4882a593Smuzhiyun 		 * Now broadcast TLB + ICACHE invalidation over the inner
557*4882a593Smuzhiyun 		 * shareable domain to make sure all data structures are
558*4882a593Smuzhiyun 		 * clean.
559*4882a593Smuzhiyun 		 */
560*4882a593Smuzhiyun 		kvm_call_hyp(__kvm_flush_vm_context);
561*4882a593Smuzhiyun 	}
562*4882a593Smuzhiyun 
563*4882a593Smuzhiyun 	vmid->vmid = kvm_next_vmid;
564*4882a593Smuzhiyun 	kvm_next_vmid++;
565*4882a593Smuzhiyun 	kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
566*4882a593Smuzhiyun 
567*4882a593Smuzhiyun 	smp_wmb();
568*4882a593Smuzhiyun 	WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
569*4882a593Smuzhiyun 
570*4882a593Smuzhiyun 	spin_unlock(&kvm_vmid_lock);
571*4882a593Smuzhiyun }
572*4882a593Smuzhiyun 
kvm_vcpu_first_run_init(struct kvm_vcpu * vcpu)573*4882a593Smuzhiyun static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
574*4882a593Smuzhiyun {
575*4882a593Smuzhiyun 	struct kvm *kvm = vcpu->kvm;
576*4882a593Smuzhiyun 	int ret = 0;
577*4882a593Smuzhiyun 
578*4882a593Smuzhiyun 	if (likely(vcpu->arch.has_run_once))
579*4882a593Smuzhiyun 		return 0;
580*4882a593Smuzhiyun 
581*4882a593Smuzhiyun 	if (!kvm_arm_vcpu_is_finalized(vcpu))
582*4882a593Smuzhiyun 		return -EPERM;
583*4882a593Smuzhiyun 
584*4882a593Smuzhiyun 	vcpu->arch.has_run_once = true;
585*4882a593Smuzhiyun 
586*4882a593Smuzhiyun 	kvm_arm_vcpu_init_debug(vcpu);
587*4882a593Smuzhiyun 
588*4882a593Smuzhiyun 	if (likely(irqchip_in_kernel(kvm))) {
589*4882a593Smuzhiyun 		/*
590*4882a593Smuzhiyun 		 * Map the VGIC hardware resources before running a vcpu the
591*4882a593Smuzhiyun 		 * first time on this VM.
592*4882a593Smuzhiyun 		 */
593*4882a593Smuzhiyun 		ret = kvm_vgic_map_resources(kvm);
594*4882a593Smuzhiyun 		if (ret)
595*4882a593Smuzhiyun 			return ret;
596*4882a593Smuzhiyun 	} else {
597*4882a593Smuzhiyun 		/*
598*4882a593Smuzhiyun 		 * Tell the rest of the code that there are userspace irqchip
599*4882a593Smuzhiyun 		 * VMs in the wild.
600*4882a593Smuzhiyun 		 */
601*4882a593Smuzhiyun 		static_branch_inc(&userspace_irqchip_in_use);
602*4882a593Smuzhiyun 	}
603*4882a593Smuzhiyun 
604*4882a593Smuzhiyun 	ret = kvm_timer_enable(vcpu);
605*4882a593Smuzhiyun 	if (ret)
606*4882a593Smuzhiyun 		return ret;
607*4882a593Smuzhiyun 
608*4882a593Smuzhiyun 	ret = kvm_arm_pmu_v3_enable(vcpu);
609*4882a593Smuzhiyun 
610*4882a593Smuzhiyun 	return ret;
611*4882a593Smuzhiyun }
612*4882a593Smuzhiyun 
kvm_arch_intc_initialized(struct kvm * kvm)613*4882a593Smuzhiyun bool kvm_arch_intc_initialized(struct kvm *kvm)
614*4882a593Smuzhiyun {
615*4882a593Smuzhiyun 	return vgic_initialized(kvm);
616*4882a593Smuzhiyun }
617*4882a593Smuzhiyun 
kvm_arm_halt_guest(struct kvm * kvm)618*4882a593Smuzhiyun void kvm_arm_halt_guest(struct kvm *kvm)
619*4882a593Smuzhiyun {
620*4882a593Smuzhiyun 	int i;
621*4882a593Smuzhiyun 	struct kvm_vcpu *vcpu;
622*4882a593Smuzhiyun 
623*4882a593Smuzhiyun 	kvm_for_each_vcpu(i, vcpu, kvm)
624*4882a593Smuzhiyun 		vcpu->arch.pause = true;
625*4882a593Smuzhiyun 	kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP);
626*4882a593Smuzhiyun }
627*4882a593Smuzhiyun 
kvm_arm_resume_guest(struct kvm * kvm)628*4882a593Smuzhiyun void kvm_arm_resume_guest(struct kvm *kvm)
629*4882a593Smuzhiyun {
630*4882a593Smuzhiyun 	int i;
631*4882a593Smuzhiyun 	struct kvm_vcpu *vcpu;
632*4882a593Smuzhiyun 
633*4882a593Smuzhiyun 	kvm_for_each_vcpu(i, vcpu, kvm) {
634*4882a593Smuzhiyun 		vcpu->arch.pause = false;
635*4882a593Smuzhiyun 		rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
636*4882a593Smuzhiyun 	}
637*4882a593Smuzhiyun }
638*4882a593Smuzhiyun 
vcpu_req_sleep(struct kvm_vcpu * vcpu)639*4882a593Smuzhiyun static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
640*4882a593Smuzhiyun {
641*4882a593Smuzhiyun 	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
642*4882a593Smuzhiyun 
643*4882a593Smuzhiyun 	rcuwait_wait_event(wait,
644*4882a593Smuzhiyun 			   (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
645*4882a593Smuzhiyun 			   TASK_INTERRUPTIBLE);
646*4882a593Smuzhiyun 
647*4882a593Smuzhiyun 	if (vcpu->arch.power_off || vcpu->arch.pause) {
648*4882a593Smuzhiyun 		/* Awaken to handle a signal, request we sleep again later. */
649*4882a593Smuzhiyun 		kvm_make_request(KVM_REQ_SLEEP, vcpu);
650*4882a593Smuzhiyun 	}
651*4882a593Smuzhiyun 
652*4882a593Smuzhiyun 	/*
653*4882a593Smuzhiyun 	 * Make sure we will observe a potential reset request if we've
654*4882a593Smuzhiyun 	 * observed a change to the power state. Pairs with the smp_wmb() in
655*4882a593Smuzhiyun 	 * kvm_psci_vcpu_on().
656*4882a593Smuzhiyun 	 */
657*4882a593Smuzhiyun 	smp_rmb();
658*4882a593Smuzhiyun }
659*4882a593Smuzhiyun 
kvm_vcpu_initialized(struct kvm_vcpu * vcpu)660*4882a593Smuzhiyun static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
661*4882a593Smuzhiyun {
662*4882a593Smuzhiyun 	return vcpu->arch.target >= 0;
663*4882a593Smuzhiyun }
664*4882a593Smuzhiyun 
check_vcpu_requests(struct kvm_vcpu * vcpu)665*4882a593Smuzhiyun static void check_vcpu_requests(struct kvm_vcpu *vcpu)
666*4882a593Smuzhiyun {
667*4882a593Smuzhiyun 	if (kvm_request_pending(vcpu)) {
668*4882a593Smuzhiyun 		if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
669*4882a593Smuzhiyun 			vcpu_req_sleep(vcpu);
670*4882a593Smuzhiyun 
671*4882a593Smuzhiyun 		if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
672*4882a593Smuzhiyun 			kvm_reset_vcpu(vcpu);
673*4882a593Smuzhiyun 
674*4882a593Smuzhiyun 		/*
675*4882a593Smuzhiyun 		 * Clear IRQ_PENDING requests that were made to guarantee
676*4882a593Smuzhiyun 		 * that a VCPU sees new virtual interrupts.
677*4882a593Smuzhiyun 		 */
678*4882a593Smuzhiyun 		kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
679*4882a593Smuzhiyun 
680*4882a593Smuzhiyun 		if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
681*4882a593Smuzhiyun 			kvm_update_stolen_time(vcpu);
682*4882a593Smuzhiyun 
683*4882a593Smuzhiyun 		if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) {
684*4882a593Smuzhiyun 			/* The distributor enable bits were changed */
685*4882a593Smuzhiyun 			preempt_disable();
686*4882a593Smuzhiyun 			vgic_v4_put(vcpu, false);
687*4882a593Smuzhiyun 			vgic_v4_load(vcpu);
688*4882a593Smuzhiyun 			preempt_enable();
689*4882a593Smuzhiyun 		}
690*4882a593Smuzhiyun 	}
691*4882a593Smuzhiyun }
692*4882a593Smuzhiyun 
vcpu_mode_is_bad_32bit(struct kvm_vcpu * vcpu)693*4882a593Smuzhiyun static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
694*4882a593Smuzhiyun {
695*4882a593Smuzhiyun 	if (likely(!vcpu_mode_is_32bit(vcpu)))
696*4882a593Smuzhiyun 		return false;
697*4882a593Smuzhiyun 
698*4882a593Smuzhiyun 	return !kvm_supports_32bit_el0();
699*4882a593Smuzhiyun }
700*4882a593Smuzhiyun 
701*4882a593Smuzhiyun /**
702*4882a593Smuzhiyun  * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
703*4882a593Smuzhiyun  * @vcpu:	The VCPU pointer
704*4882a593Smuzhiyun  *
705*4882a593Smuzhiyun  * This function is called through the VCPU_RUN ioctl called from user space. It
706*4882a593Smuzhiyun  * will execute VM code in a loop until the time slice for the process is used
707*4882a593Smuzhiyun  * or some emulation is needed from user space in which case the function will
708*4882a593Smuzhiyun  * return with return value 0 and with the kvm_run structure filled in with the
709*4882a593Smuzhiyun  * required data for the requested emulation.
710*4882a593Smuzhiyun  */
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu)711*4882a593Smuzhiyun int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
712*4882a593Smuzhiyun {
713*4882a593Smuzhiyun 	struct kvm_run *run = vcpu->run;
714*4882a593Smuzhiyun 	int ret;
715*4882a593Smuzhiyun 
716*4882a593Smuzhiyun 	if (unlikely(!kvm_vcpu_initialized(vcpu)))
717*4882a593Smuzhiyun 		return -ENOEXEC;
718*4882a593Smuzhiyun 
719*4882a593Smuzhiyun 	ret = kvm_vcpu_first_run_init(vcpu);
720*4882a593Smuzhiyun 	if (ret)
721*4882a593Smuzhiyun 		return ret;
722*4882a593Smuzhiyun 
723*4882a593Smuzhiyun 	if (run->exit_reason == KVM_EXIT_MMIO) {
724*4882a593Smuzhiyun 		ret = kvm_handle_mmio_return(vcpu);
725*4882a593Smuzhiyun 		if (ret)
726*4882a593Smuzhiyun 			return ret;
727*4882a593Smuzhiyun 	}
728*4882a593Smuzhiyun 
729*4882a593Smuzhiyun 	if (run->immediate_exit)
730*4882a593Smuzhiyun 		return -EINTR;
731*4882a593Smuzhiyun 
732*4882a593Smuzhiyun 	vcpu_load(vcpu);
733*4882a593Smuzhiyun 
734*4882a593Smuzhiyun 	kvm_sigset_activate(vcpu);
735*4882a593Smuzhiyun 
736*4882a593Smuzhiyun 	ret = 1;
737*4882a593Smuzhiyun 	run->exit_reason = KVM_EXIT_UNKNOWN;
738*4882a593Smuzhiyun 	while (ret > 0) {
739*4882a593Smuzhiyun 		/*
740*4882a593Smuzhiyun 		 * Check conditions before entering the guest
741*4882a593Smuzhiyun 		 */
742*4882a593Smuzhiyun 		cond_resched();
743*4882a593Smuzhiyun 
744*4882a593Smuzhiyun 		update_vmid(&vcpu->arch.hw_mmu->vmid);
745*4882a593Smuzhiyun 
746*4882a593Smuzhiyun 		check_vcpu_requests(vcpu);
747*4882a593Smuzhiyun 
748*4882a593Smuzhiyun 		/*
749*4882a593Smuzhiyun 		 * Preparing the interrupts to be injected also
750*4882a593Smuzhiyun 		 * involves poking the GIC, which must be done in a
751*4882a593Smuzhiyun 		 * non-preemptible context.
752*4882a593Smuzhiyun 		 */
753*4882a593Smuzhiyun 		preempt_disable();
754*4882a593Smuzhiyun 
755*4882a593Smuzhiyun 		kvm_pmu_flush_hwstate(vcpu);
756*4882a593Smuzhiyun 
757*4882a593Smuzhiyun 		local_irq_disable();
758*4882a593Smuzhiyun 
759*4882a593Smuzhiyun 		kvm_vgic_flush_hwstate(vcpu);
760*4882a593Smuzhiyun 
761*4882a593Smuzhiyun 		/*
762*4882a593Smuzhiyun 		 * Exit if we have a signal pending so that we can deliver the
763*4882a593Smuzhiyun 		 * signal to user space.
764*4882a593Smuzhiyun 		 */
765*4882a593Smuzhiyun 		if (signal_pending(current)) {
766*4882a593Smuzhiyun 			ret = -EINTR;
767*4882a593Smuzhiyun 			run->exit_reason = KVM_EXIT_INTR;
768*4882a593Smuzhiyun 		}
769*4882a593Smuzhiyun 
770*4882a593Smuzhiyun 		/*
771*4882a593Smuzhiyun 		 * If we're using a userspace irqchip, then check if we need
772*4882a593Smuzhiyun 		 * to tell a userspace irqchip about timer or PMU level
773*4882a593Smuzhiyun 		 * changes and if so, exit to userspace (the actual level
774*4882a593Smuzhiyun 		 * state gets updated in kvm_timer_update_run and
775*4882a593Smuzhiyun 		 * kvm_pmu_update_run below).
776*4882a593Smuzhiyun 		 */
777*4882a593Smuzhiyun 		if (static_branch_unlikely(&userspace_irqchip_in_use)) {
778*4882a593Smuzhiyun 			if (kvm_timer_should_notify_user(vcpu) ||
779*4882a593Smuzhiyun 			    kvm_pmu_should_notify_user(vcpu)) {
780*4882a593Smuzhiyun 				ret = -EINTR;
781*4882a593Smuzhiyun 				run->exit_reason = KVM_EXIT_INTR;
782*4882a593Smuzhiyun 			}
783*4882a593Smuzhiyun 		}
784*4882a593Smuzhiyun 
785*4882a593Smuzhiyun 		/*
786*4882a593Smuzhiyun 		 * Ensure we set mode to IN_GUEST_MODE after we disable
787*4882a593Smuzhiyun 		 * interrupts and before the final VCPU requests check.
788*4882a593Smuzhiyun 		 * See the comment in kvm_vcpu_exiting_guest_mode() and
789*4882a593Smuzhiyun 		 * Documentation/virt/kvm/vcpu-requests.rst
790*4882a593Smuzhiyun 		 */
791*4882a593Smuzhiyun 		smp_store_mb(vcpu->mode, IN_GUEST_MODE);
792*4882a593Smuzhiyun 
793*4882a593Smuzhiyun 		if (ret <= 0 || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
794*4882a593Smuzhiyun 		    kvm_request_pending(vcpu)) {
795*4882a593Smuzhiyun 			vcpu->mode = OUTSIDE_GUEST_MODE;
796*4882a593Smuzhiyun 			isb(); /* Ensure work in x_flush_hwstate is committed */
797*4882a593Smuzhiyun 			kvm_pmu_sync_hwstate(vcpu);
798*4882a593Smuzhiyun 			if (static_branch_unlikely(&userspace_irqchip_in_use))
799*4882a593Smuzhiyun 				kvm_timer_sync_user(vcpu);
800*4882a593Smuzhiyun 			kvm_vgic_sync_hwstate(vcpu);
801*4882a593Smuzhiyun 			local_irq_enable();
802*4882a593Smuzhiyun 			preempt_enable();
803*4882a593Smuzhiyun 			continue;
804*4882a593Smuzhiyun 		}
805*4882a593Smuzhiyun 
806*4882a593Smuzhiyun 		kvm_arm_setup_debug(vcpu);
807*4882a593Smuzhiyun 
808*4882a593Smuzhiyun 		/**************************************************************
809*4882a593Smuzhiyun 		 * Enter the guest
810*4882a593Smuzhiyun 		 */
811*4882a593Smuzhiyun 		trace_kvm_entry(*vcpu_pc(vcpu));
812*4882a593Smuzhiyun 		guest_enter_irqoff();
813*4882a593Smuzhiyun 
814*4882a593Smuzhiyun 		ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
815*4882a593Smuzhiyun 
816*4882a593Smuzhiyun 		vcpu->mode = OUTSIDE_GUEST_MODE;
817*4882a593Smuzhiyun 		vcpu->stat.exits++;
818*4882a593Smuzhiyun 		/*
819*4882a593Smuzhiyun 		 * Back from guest
820*4882a593Smuzhiyun 		 *************************************************************/
821*4882a593Smuzhiyun 
822*4882a593Smuzhiyun 		kvm_arm_clear_debug(vcpu);
823*4882a593Smuzhiyun 
824*4882a593Smuzhiyun 		/*
825*4882a593Smuzhiyun 		 * We must sync the PMU state before the vgic state so
826*4882a593Smuzhiyun 		 * that the vgic can properly sample the updated state of the
827*4882a593Smuzhiyun 		 * interrupt line.
828*4882a593Smuzhiyun 		 */
829*4882a593Smuzhiyun 		kvm_pmu_sync_hwstate(vcpu);
830*4882a593Smuzhiyun 
831*4882a593Smuzhiyun 		/*
832*4882a593Smuzhiyun 		 * Sync the vgic state before syncing the timer state because
833*4882a593Smuzhiyun 		 * the timer code needs to know if the virtual timer
834*4882a593Smuzhiyun 		 * interrupts are active.
835*4882a593Smuzhiyun 		 */
836*4882a593Smuzhiyun 		kvm_vgic_sync_hwstate(vcpu);
837*4882a593Smuzhiyun 
838*4882a593Smuzhiyun 		/*
839*4882a593Smuzhiyun 		 * Sync the timer hardware state before enabling interrupts as
840*4882a593Smuzhiyun 		 * we don't want vtimer interrupts to race with syncing the
841*4882a593Smuzhiyun 		 * timer virtual interrupt state.
842*4882a593Smuzhiyun 		 */
843*4882a593Smuzhiyun 		if (static_branch_unlikely(&userspace_irqchip_in_use))
844*4882a593Smuzhiyun 			kvm_timer_sync_user(vcpu);
845*4882a593Smuzhiyun 
846*4882a593Smuzhiyun 		kvm_arch_vcpu_ctxsync_fp(vcpu);
847*4882a593Smuzhiyun 
848*4882a593Smuzhiyun 		/*
849*4882a593Smuzhiyun 		 * We may have taken a host interrupt in HYP mode (ie
850*4882a593Smuzhiyun 		 * while executing the guest). This interrupt is still
851*4882a593Smuzhiyun 		 * pending, as we haven't serviced it yet!
852*4882a593Smuzhiyun 		 *
853*4882a593Smuzhiyun 		 * We're now back in SVC mode, with interrupts
854*4882a593Smuzhiyun 		 * disabled.  Enabling the interrupts now will have
855*4882a593Smuzhiyun 		 * the effect of taking the interrupt again, in SVC
856*4882a593Smuzhiyun 		 * mode this time.
857*4882a593Smuzhiyun 		 */
858*4882a593Smuzhiyun 		local_irq_enable();
859*4882a593Smuzhiyun 
860*4882a593Smuzhiyun 		/*
861*4882a593Smuzhiyun 		 * We do local_irq_enable() before calling guest_exit() so
862*4882a593Smuzhiyun 		 * that if a timer interrupt hits while running the guest we
863*4882a593Smuzhiyun 		 * account that tick as being spent in the guest.  We enable
864*4882a593Smuzhiyun 		 * preemption after calling guest_exit() so that if we get
865*4882a593Smuzhiyun 		 * preempted we make sure ticks after that is not counted as
866*4882a593Smuzhiyun 		 * guest time.
867*4882a593Smuzhiyun 		 */
868*4882a593Smuzhiyun 		guest_exit();
869*4882a593Smuzhiyun 		trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
870*4882a593Smuzhiyun 
871*4882a593Smuzhiyun 		/* Exit types that need handling before we can be preempted */
872*4882a593Smuzhiyun 		handle_exit_early(vcpu, ret);
873*4882a593Smuzhiyun 
874*4882a593Smuzhiyun 		preempt_enable();
875*4882a593Smuzhiyun 
876*4882a593Smuzhiyun 		/*
877*4882a593Smuzhiyun 		 * The ARMv8 architecture doesn't give the hypervisor
878*4882a593Smuzhiyun 		 * a mechanism to prevent a guest from dropping to AArch32 EL0
879*4882a593Smuzhiyun 		 * if implemented by the CPU. If we spot the guest in such
880*4882a593Smuzhiyun 		 * state and that we decided it wasn't supposed to do so (like
881*4882a593Smuzhiyun 		 * with the asymmetric AArch32 case), return to userspace with
882*4882a593Smuzhiyun 		 * a fatal error.
883*4882a593Smuzhiyun 		 */
884*4882a593Smuzhiyun 		if (vcpu_mode_is_bad_32bit(vcpu)) {
885*4882a593Smuzhiyun 			/*
886*4882a593Smuzhiyun 			 * As we have caught the guest red-handed, decide that
887*4882a593Smuzhiyun 			 * it isn't fit for purpose anymore by making the vcpu
888*4882a593Smuzhiyun 			 * invalid. The VMM can try and fix it by issuing  a
889*4882a593Smuzhiyun 			 * KVM_ARM_VCPU_INIT if it really wants to.
890*4882a593Smuzhiyun 			 */
891*4882a593Smuzhiyun 			vcpu->arch.target = -1;
892*4882a593Smuzhiyun 			ret = ARM_EXCEPTION_IL;
893*4882a593Smuzhiyun 		}
894*4882a593Smuzhiyun 
895*4882a593Smuzhiyun 		ret = handle_exit(vcpu, ret);
896*4882a593Smuzhiyun 	}
897*4882a593Smuzhiyun 
898*4882a593Smuzhiyun 	/* Tell userspace about in-kernel device output levels */
899*4882a593Smuzhiyun 	if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
900*4882a593Smuzhiyun 		kvm_timer_update_run(vcpu);
901*4882a593Smuzhiyun 		kvm_pmu_update_run(vcpu);
902*4882a593Smuzhiyun 	}
903*4882a593Smuzhiyun 
904*4882a593Smuzhiyun 	kvm_sigset_deactivate(vcpu);
905*4882a593Smuzhiyun 
906*4882a593Smuzhiyun 	vcpu_put(vcpu);
907*4882a593Smuzhiyun 	return ret;
908*4882a593Smuzhiyun }
909*4882a593Smuzhiyun 
vcpu_interrupt_line(struct kvm_vcpu * vcpu,int number,bool level)910*4882a593Smuzhiyun static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
911*4882a593Smuzhiyun {
912*4882a593Smuzhiyun 	int bit_index;
913*4882a593Smuzhiyun 	bool set;
914*4882a593Smuzhiyun 	unsigned long *hcr;
915*4882a593Smuzhiyun 
916*4882a593Smuzhiyun 	if (number == KVM_ARM_IRQ_CPU_IRQ)
917*4882a593Smuzhiyun 		bit_index = __ffs(HCR_VI);
918*4882a593Smuzhiyun 	else /* KVM_ARM_IRQ_CPU_FIQ */
919*4882a593Smuzhiyun 		bit_index = __ffs(HCR_VF);
920*4882a593Smuzhiyun 
921*4882a593Smuzhiyun 	hcr = vcpu_hcr(vcpu);
922*4882a593Smuzhiyun 	if (level)
923*4882a593Smuzhiyun 		set = test_and_set_bit(bit_index, hcr);
924*4882a593Smuzhiyun 	else
925*4882a593Smuzhiyun 		set = test_and_clear_bit(bit_index, hcr);
926*4882a593Smuzhiyun 
927*4882a593Smuzhiyun 	/*
928*4882a593Smuzhiyun 	 * If we didn't change anything, no need to wake up or kick other CPUs
929*4882a593Smuzhiyun 	 */
930*4882a593Smuzhiyun 	if (set == level)
931*4882a593Smuzhiyun 		return 0;
932*4882a593Smuzhiyun 
933*4882a593Smuzhiyun 	/*
934*4882a593Smuzhiyun 	 * The vcpu irq_lines field was updated, wake up sleeping VCPUs and
935*4882a593Smuzhiyun 	 * trigger a world-switch round on the running physical CPU to set the
936*4882a593Smuzhiyun 	 * virtual IRQ/FIQ fields in the HCR appropriately.
937*4882a593Smuzhiyun 	 */
938*4882a593Smuzhiyun 	kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
939*4882a593Smuzhiyun 	kvm_vcpu_kick(vcpu);
940*4882a593Smuzhiyun 
941*4882a593Smuzhiyun 	return 0;
942*4882a593Smuzhiyun }
943*4882a593Smuzhiyun 
kvm_vm_ioctl_irq_line(struct kvm * kvm,struct kvm_irq_level * irq_level,bool line_status)944*4882a593Smuzhiyun int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
945*4882a593Smuzhiyun 			  bool line_status)
946*4882a593Smuzhiyun {
947*4882a593Smuzhiyun 	u32 irq = irq_level->irq;
948*4882a593Smuzhiyun 	unsigned int irq_type, vcpu_idx, irq_num;
949*4882a593Smuzhiyun 	int nrcpus = atomic_read(&kvm->online_vcpus);
950*4882a593Smuzhiyun 	struct kvm_vcpu *vcpu = NULL;
951*4882a593Smuzhiyun 	bool level = irq_level->level;
952*4882a593Smuzhiyun 
953*4882a593Smuzhiyun 	irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
954*4882a593Smuzhiyun 	vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
955*4882a593Smuzhiyun 	vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1);
956*4882a593Smuzhiyun 	irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
957*4882a593Smuzhiyun 
958*4882a593Smuzhiyun 	trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
959*4882a593Smuzhiyun 
960*4882a593Smuzhiyun 	switch (irq_type) {
961*4882a593Smuzhiyun 	case KVM_ARM_IRQ_TYPE_CPU:
962*4882a593Smuzhiyun 		if (irqchip_in_kernel(kvm))
963*4882a593Smuzhiyun 			return -ENXIO;
964*4882a593Smuzhiyun 
965*4882a593Smuzhiyun 		if (vcpu_idx >= nrcpus)
966*4882a593Smuzhiyun 			return -EINVAL;
967*4882a593Smuzhiyun 
968*4882a593Smuzhiyun 		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
969*4882a593Smuzhiyun 		if (!vcpu)
970*4882a593Smuzhiyun 			return -EINVAL;
971*4882a593Smuzhiyun 
972*4882a593Smuzhiyun 		if (irq_num > KVM_ARM_IRQ_CPU_FIQ)
973*4882a593Smuzhiyun 			return -EINVAL;
974*4882a593Smuzhiyun 
975*4882a593Smuzhiyun 		return vcpu_interrupt_line(vcpu, irq_num, level);
976*4882a593Smuzhiyun 	case KVM_ARM_IRQ_TYPE_PPI:
977*4882a593Smuzhiyun 		if (!irqchip_in_kernel(kvm))
978*4882a593Smuzhiyun 			return -ENXIO;
979*4882a593Smuzhiyun 
980*4882a593Smuzhiyun 		if (vcpu_idx >= nrcpus)
981*4882a593Smuzhiyun 			return -EINVAL;
982*4882a593Smuzhiyun 
983*4882a593Smuzhiyun 		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
984*4882a593Smuzhiyun 		if (!vcpu)
985*4882a593Smuzhiyun 			return -EINVAL;
986*4882a593Smuzhiyun 
987*4882a593Smuzhiyun 		if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
988*4882a593Smuzhiyun 			return -EINVAL;
989*4882a593Smuzhiyun 
990*4882a593Smuzhiyun 		return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL);
991*4882a593Smuzhiyun 	case KVM_ARM_IRQ_TYPE_SPI:
992*4882a593Smuzhiyun 		if (!irqchip_in_kernel(kvm))
993*4882a593Smuzhiyun 			return -ENXIO;
994*4882a593Smuzhiyun 
995*4882a593Smuzhiyun 		if (irq_num < VGIC_NR_PRIVATE_IRQS)
996*4882a593Smuzhiyun 			return -EINVAL;
997*4882a593Smuzhiyun 
998*4882a593Smuzhiyun 		return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL);
999*4882a593Smuzhiyun 	}
1000*4882a593Smuzhiyun 
1001*4882a593Smuzhiyun 	return -EINVAL;
1002*4882a593Smuzhiyun }
1003*4882a593Smuzhiyun 
kvm_vcpu_set_target(struct kvm_vcpu * vcpu,const struct kvm_vcpu_init * init)1004*4882a593Smuzhiyun static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
1005*4882a593Smuzhiyun 			       const struct kvm_vcpu_init *init)
1006*4882a593Smuzhiyun {
1007*4882a593Smuzhiyun 	unsigned int i, ret;
1008*4882a593Smuzhiyun 	int phys_target = kvm_target_cpu();
1009*4882a593Smuzhiyun 
1010*4882a593Smuzhiyun 	if (init->target != phys_target)
1011*4882a593Smuzhiyun 		return -EINVAL;
1012*4882a593Smuzhiyun 
1013*4882a593Smuzhiyun 	/*
1014*4882a593Smuzhiyun 	 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
1015*4882a593Smuzhiyun 	 * use the same target.
1016*4882a593Smuzhiyun 	 */
1017*4882a593Smuzhiyun 	if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
1018*4882a593Smuzhiyun 		return -EINVAL;
1019*4882a593Smuzhiyun 
1020*4882a593Smuzhiyun 	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
1021*4882a593Smuzhiyun 	for (i = 0; i < sizeof(init->features) * 8; i++) {
1022*4882a593Smuzhiyun 		bool set = (init->features[i / 32] & (1 << (i % 32)));
1023*4882a593Smuzhiyun 
1024*4882a593Smuzhiyun 		if (set && i >= KVM_VCPU_MAX_FEATURES)
1025*4882a593Smuzhiyun 			return -ENOENT;
1026*4882a593Smuzhiyun 
1027*4882a593Smuzhiyun 		/*
1028*4882a593Smuzhiyun 		 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
1029*4882a593Smuzhiyun 		 * use the same feature set.
1030*4882a593Smuzhiyun 		 */
1031*4882a593Smuzhiyun 		if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
1032*4882a593Smuzhiyun 		    test_bit(i, vcpu->arch.features) != set)
1033*4882a593Smuzhiyun 			return -EINVAL;
1034*4882a593Smuzhiyun 
1035*4882a593Smuzhiyun 		if (set)
1036*4882a593Smuzhiyun 			set_bit(i, vcpu->arch.features);
1037*4882a593Smuzhiyun 	}
1038*4882a593Smuzhiyun 
1039*4882a593Smuzhiyun 	vcpu->arch.target = phys_target;
1040*4882a593Smuzhiyun 
1041*4882a593Smuzhiyun 	/* Now we know what it is, we can reset it. */
1042*4882a593Smuzhiyun 	ret = kvm_reset_vcpu(vcpu);
1043*4882a593Smuzhiyun 	if (ret) {
1044*4882a593Smuzhiyun 		vcpu->arch.target = -1;
1045*4882a593Smuzhiyun 		bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
1046*4882a593Smuzhiyun 	}
1047*4882a593Smuzhiyun 
1048*4882a593Smuzhiyun 	return ret;
1049*4882a593Smuzhiyun }
1050*4882a593Smuzhiyun 
kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu * vcpu,struct kvm_vcpu_init * init)1051*4882a593Smuzhiyun static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
1052*4882a593Smuzhiyun 					 struct kvm_vcpu_init *init)
1053*4882a593Smuzhiyun {
1054*4882a593Smuzhiyun 	int ret;
1055*4882a593Smuzhiyun 
1056*4882a593Smuzhiyun 	ret = kvm_vcpu_set_target(vcpu, init);
1057*4882a593Smuzhiyun 	if (ret)
1058*4882a593Smuzhiyun 		return ret;
1059*4882a593Smuzhiyun 
1060*4882a593Smuzhiyun 	/*
1061*4882a593Smuzhiyun 	 * Ensure a rebooted VM will fault in RAM pages and detect if the
1062*4882a593Smuzhiyun 	 * guest MMU is turned off and flush the caches as needed.
1063*4882a593Smuzhiyun 	 *
1064*4882a593Smuzhiyun 	 * S2FWB enforces all memory accesses to RAM being cacheable,
1065*4882a593Smuzhiyun 	 * ensuring that the data side is always coherent. We still
1066*4882a593Smuzhiyun 	 * need to invalidate the I-cache though, as FWB does *not*
1067*4882a593Smuzhiyun 	 * imply CTR_EL0.DIC.
1068*4882a593Smuzhiyun 	 */
1069*4882a593Smuzhiyun 	if (vcpu->arch.has_run_once) {
1070*4882a593Smuzhiyun 		if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
1071*4882a593Smuzhiyun 			stage2_unmap_vm(vcpu->kvm);
1072*4882a593Smuzhiyun 		else
1073*4882a593Smuzhiyun 			__flush_icache_all();
1074*4882a593Smuzhiyun 	}
1075*4882a593Smuzhiyun 
1076*4882a593Smuzhiyun 	vcpu_reset_hcr(vcpu);
1077*4882a593Smuzhiyun 
1078*4882a593Smuzhiyun 	/*
1079*4882a593Smuzhiyun 	 * Handle the "start in power-off" case.
1080*4882a593Smuzhiyun 	 */
1081*4882a593Smuzhiyun 	if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
1082*4882a593Smuzhiyun 		vcpu_power_off(vcpu);
1083*4882a593Smuzhiyun 	else
1084*4882a593Smuzhiyun 		vcpu->arch.power_off = false;
1085*4882a593Smuzhiyun 
1086*4882a593Smuzhiyun 	return 0;
1087*4882a593Smuzhiyun }
1088*4882a593Smuzhiyun 
kvm_arm_vcpu_set_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1089*4882a593Smuzhiyun static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu,
1090*4882a593Smuzhiyun 				 struct kvm_device_attr *attr)
1091*4882a593Smuzhiyun {
1092*4882a593Smuzhiyun 	int ret = -ENXIO;
1093*4882a593Smuzhiyun 
1094*4882a593Smuzhiyun 	switch (attr->group) {
1095*4882a593Smuzhiyun 	default:
1096*4882a593Smuzhiyun 		ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr);
1097*4882a593Smuzhiyun 		break;
1098*4882a593Smuzhiyun 	}
1099*4882a593Smuzhiyun 
1100*4882a593Smuzhiyun 	return ret;
1101*4882a593Smuzhiyun }
1102*4882a593Smuzhiyun 
kvm_arm_vcpu_get_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1103*4882a593Smuzhiyun static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu,
1104*4882a593Smuzhiyun 				 struct kvm_device_attr *attr)
1105*4882a593Smuzhiyun {
1106*4882a593Smuzhiyun 	int ret = -ENXIO;
1107*4882a593Smuzhiyun 
1108*4882a593Smuzhiyun 	switch (attr->group) {
1109*4882a593Smuzhiyun 	default:
1110*4882a593Smuzhiyun 		ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr);
1111*4882a593Smuzhiyun 		break;
1112*4882a593Smuzhiyun 	}
1113*4882a593Smuzhiyun 
1114*4882a593Smuzhiyun 	return ret;
1115*4882a593Smuzhiyun }
1116*4882a593Smuzhiyun 
kvm_arm_vcpu_has_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1117*4882a593Smuzhiyun static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
1118*4882a593Smuzhiyun 				 struct kvm_device_attr *attr)
1119*4882a593Smuzhiyun {
1120*4882a593Smuzhiyun 	int ret = -ENXIO;
1121*4882a593Smuzhiyun 
1122*4882a593Smuzhiyun 	switch (attr->group) {
1123*4882a593Smuzhiyun 	default:
1124*4882a593Smuzhiyun 		ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr);
1125*4882a593Smuzhiyun 		break;
1126*4882a593Smuzhiyun 	}
1127*4882a593Smuzhiyun 
1128*4882a593Smuzhiyun 	return ret;
1129*4882a593Smuzhiyun }
1130*4882a593Smuzhiyun 
kvm_arm_vcpu_get_events(struct kvm_vcpu * vcpu,struct kvm_vcpu_events * events)1131*4882a593Smuzhiyun static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
1132*4882a593Smuzhiyun 				   struct kvm_vcpu_events *events)
1133*4882a593Smuzhiyun {
1134*4882a593Smuzhiyun 	memset(events, 0, sizeof(*events));
1135*4882a593Smuzhiyun 
1136*4882a593Smuzhiyun 	return __kvm_arm_vcpu_get_events(vcpu, events);
1137*4882a593Smuzhiyun }
1138*4882a593Smuzhiyun 
kvm_arm_vcpu_set_events(struct kvm_vcpu * vcpu,struct kvm_vcpu_events * events)1139*4882a593Smuzhiyun static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
1140*4882a593Smuzhiyun 				   struct kvm_vcpu_events *events)
1141*4882a593Smuzhiyun {
1142*4882a593Smuzhiyun 	int i;
1143*4882a593Smuzhiyun 
1144*4882a593Smuzhiyun 	/* check whether the reserved field is zero */
1145*4882a593Smuzhiyun 	for (i = 0; i < ARRAY_SIZE(events->reserved); i++)
1146*4882a593Smuzhiyun 		if (events->reserved[i])
1147*4882a593Smuzhiyun 			return -EINVAL;
1148*4882a593Smuzhiyun 
1149*4882a593Smuzhiyun 	/* check whether the pad field is zero */
1150*4882a593Smuzhiyun 	for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++)
1151*4882a593Smuzhiyun 		if (events->exception.pad[i])
1152*4882a593Smuzhiyun 			return -EINVAL;
1153*4882a593Smuzhiyun 
1154*4882a593Smuzhiyun 	return __kvm_arm_vcpu_set_events(vcpu, events);
1155*4882a593Smuzhiyun }
1156*4882a593Smuzhiyun 
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)1157*4882a593Smuzhiyun long kvm_arch_vcpu_ioctl(struct file *filp,
1158*4882a593Smuzhiyun 			 unsigned int ioctl, unsigned long arg)
1159*4882a593Smuzhiyun {
1160*4882a593Smuzhiyun 	struct kvm_vcpu *vcpu = filp->private_data;
1161*4882a593Smuzhiyun 	void __user *argp = (void __user *)arg;
1162*4882a593Smuzhiyun 	struct kvm_device_attr attr;
1163*4882a593Smuzhiyun 	long r;
1164*4882a593Smuzhiyun 
1165*4882a593Smuzhiyun 	switch (ioctl) {
1166*4882a593Smuzhiyun 	case KVM_ARM_VCPU_INIT: {
1167*4882a593Smuzhiyun 		struct kvm_vcpu_init init;
1168*4882a593Smuzhiyun 
1169*4882a593Smuzhiyun 		r = -EFAULT;
1170*4882a593Smuzhiyun 		if (copy_from_user(&init, argp, sizeof(init)))
1171*4882a593Smuzhiyun 			break;
1172*4882a593Smuzhiyun 
1173*4882a593Smuzhiyun 		r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
1174*4882a593Smuzhiyun 		break;
1175*4882a593Smuzhiyun 	}
1176*4882a593Smuzhiyun 	case KVM_SET_ONE_REG:
1177*4882a593Smuzhiyun 	case KVM_GET_ONE_REG: {
1178*4882a593Smuzhiyun 		struct kvm_one_reg reg;
1179*4882a593Smuzhiyun 
1180*4882a593Smuzhiyun 		r = -ENOEXEC;
1181*4882a593Smuzhiyun 		if (unlikely(!kvm_vcpu_initialized(vcpu)))
1182*4882a593Smuzhiyun 			break;
1183*4882a593Smuzhiyun 
1184*4882a593Smuzhiyun 		r = -EFAULT;
1185*4882a593Smuzhiyun 		if (copy_from_user(&reg, argp, sizeof(reg)))
1186*4882a593Smuzhiyun 			break;
1187*4882a593Smuzhiyun 
1188*4882a593Smuzhiyun 		/*
1189*4882a593Smuzhiyun 		 * We could owe a reset due to PSCI. Handle the pending reset
1190*4882a593Smuzhiyun 		 * here to ensure userspace register accesses are ordered after
1191*4882a593Smuzhiyun 		 * the reset.
1192*4882a593Smuzhiyun 		 */
1193*4882a593Smuzhiyun 		if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
1194*4882a593Smuzhiyun 			kvm_reset_vcpu(vcpu);
1195*4882a593Smuzhiyun 
1196*4882a593Smuzhiyun 		if (ioctl == KVM_SET_ONE_REG)
1197*4882a593Smuzhiyun 			r = kvm_arm_set_reg(vcpu, &reg);
1198*4882a593Smuzhiyun 		else
1199*4882a593Smuzhiyun 			r = kvm_arm_get_reg(vcpu, &reg);
1200*4882a593Smuzhiyun 		break;
1201*4882a593Smuzhiyun 	}
1202*4882a593Smuzhiyun 	case KVM_GET_REG_LIST: {
1203*4882a593Smuzhiyun 		struct kvm_reg_list __user *user_list = argp;
1204*4882a593Smuzhiyun 		struct kvm_reg_list reg_list;
1205*4882a593Smuzhiyun 		unsigned n;
1206*4882a593Smuzhiyun 
1207*4882a593Smuzhiyun 		r = -ENOEXEC;
1208*4882a593Smuzhiyun 		if (unlikely(!kvm_vcpu_initialized(vcpu)))
1209*4882a593Smuzhiyun 			break;
1210*4882a593Smuzhiyun 
1211*4882a593Smuzhiyun 		r = -EPERM;
1212*4882a593Smuzhiyun 		if (!kvm_arm_vcpu_is_finalized(vcpu))
1213*4882a593Smuzhiyun 			break;
1214*4882a593Smuzhiyun 
1215*4882a593Smuzhiyun 		r = -EFAULT;
1216*4882a593Smuzhiyun 		if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
1217*4882a593Smuzhiyun 			break;
1218*4882a593Smuzhiyun 		n = reg_list.n;
1219*4882a593Smuzhiyun 		reg_list.n = kvm_arm_num_regs(vcpu);
1220*4882a593Smuzhiyun 		if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
1221*4882a593Smuzhiyun 			break;
1222*4882a593Smuzhiyun 		r = -E2BIG;
1223*4882a593Smuzhiyun 		if (n < reg_list.n)
1224*4882a593Smuzhiyun 			break;
1225*4882a593Smuzhiyun 		r = kvm_arm_copy_reg_indices(vcpu, user_list->reg);
1226*4882a593Smuzhiyun 		break;
1227*4882a593Smuzhiyun 	}
1228*4882a593Smuzhiyun 	case KVM_SET_DEVICE_ATTR: {
1229*4882a593Smuzhiyun 		r = -EFAULT;
1230*4882a593Smuzhiyun 		if (copy_from_user(&attr, argp, sizeof(attr)))
1231*4882a593Smuzhiyun 			break;
1232*4882a593Smuzhiyun 		r = kvm_arm_vcpu_set_attr(vcpu, &attr);
1233*4882a593Smuzhiyun 		break;
1234*4882a593Smuzhiyun 	}
1235*4882a593Smuzhiyun 	case KVM_GET_DEVICE_ATTR: {
1236*4882a593Smuzhiyun 		r = -EFAULT;
1237*4882a593Smuzhiyun 		if (copy_from_user(&attr, argp, sizeof(attr)))
1238*4882a593Smuzhiyun 			break;
1239*4882a593Smuzhiyun 		r = kvm_arm_vcpu_get_attr(vcpu, &attr);
1240*4882a593Smuzhiyun 		break;
1241*4882a593Smuzhiyun 	}
1242*4882a593Smuzhiyun 	case KVM_HAS_DEVICE_ATTR: {
1243*4882a593Smuzhiyun 		r = -EFAULT;
1244*4882a593Smuzhiyun 		if (copy_from_user(&attr, argp, sizeof(attr)))
1245*4882a593Smuzhiyun 			break;
1246*4882a593Smuzhiyun 		r = kvm_arm_vcpu_has_attr(vcpu, &attr);
1247*4882a593Smuzhiyun 		break;
1248*4882a593Smuzhiyun 	}
1249*4882a593Smuzhiyun 	case KVM_GET_VCPU_EVENTS: {
1250*4882a593Smuzhiyun 		struct kvm_vcpu_events events;
1251*4882a593Smuzhiyun 
1252*4882a593Smuzhiyun 		if (kvm_arm_vcpu_get_events(vcpu, &events))
1253*4882a593Smuzhiyun 			return -EINVAL;
1254*4882a593Smuzhiyun 
1255*4882a593Smuzhiyun 		if (copy_to_user(argp, &events, sizeof(events)))
1256*4882a593Smuzhiyun 			return -EFAULT;
1257*4882a593Smuzhiyun 
1258*4882a593Smuzhiyun 		return 0;
1259*4882a593Smuzhiyun 	}
1260*4882a593Smuzhiyun 	case KVM_SET_VCPU_EVENTS: {
1261*4882a593Smuzhiyun 		struct kvm_vcpu_events events;
1262*4882a593Smuzhiyun 
1263*4882a593Smuzhiyun 		if (copy_from_user(&events, argp, sizeof(events)))
1264*4882a593Smuzhiyun 			return -EFAULT;
1265*4882a593Smuzhiyun 
1266*4882a593Smuzhiyun 		return kvm_arm_vcpu_set_events(vcpu, &events);
1267*4882a593Smuzhiyun 	}
1268*4882a593Smuzhiyun 	case KVM_ARM_VCPU_FINALIZE: {
1269*4882a593Smuzhiyun 		int what;
1270*4882a593Smuzhiyun 
1271*4882a593Smuzhiyun 		if (!kvm_vcpu_initialized(vcpu))
1272*4882a593Smuzhiyun 			return -ENOEXEC;
1273*4882a593Smuzhiyun 
1274*4882a593Smuzhiyun 		if (get_user(what, (const int __user *)argp))
1275*4882a593Smuzhiyun 			return -EFAULT;
1276*4882a593Smuzhiyun 
1277*4882a593Smuzhiyun 		return kvm_arm_vcpu_finalize(vcpu, what);
1278*4882a593Smuzhiyun 	}
1279*4882a593Smuzhiyun 	default:
1280*4882a593Smuzhiyun 		r = -EINVAL;
1281*4882a593Smuzhiyun 	}
1282*4882a593Smuzhiyun 
1283*4882a593Smuzhiyun 	return r;
1284*4882a593Smuzhiyun }
1285*4882a593Smuzhiyun 
kvm_arch_sync_dirty_log(struct kvm * kvm,struct kvm_memory_slot * memslot)1286*4882a593Smuzhiyun void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
1287*4882a593Smuzhiyun {
1288*4882a593Smuzhiyun 
1289*4882a593Smuzhiyun }
1290*4882a593Smuzhiyun 
kvm_arch_flush_remote_tlbs_memslot(struct kvm * kvm,struct kvm_memory_slot * memslot)1291*4882a593Smuzhiyun void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
1292*4882a593Smuzhiyun 					struct kvm_memory_slot *memslot)
1293*4882a593Smuzhiyun {
1294*4882a593Smuzhiyun 	kvm_flush_remote_tlbs(kvm);
1295*4882a593Smuzhiyun }
1296*4882a593Smuzhiyun 
kvm_vm_ioctl_set_device_addr(struct kvm * kvm,struct kvm_arm_device_addr * dev_addr)1297*4882a593Smuzhiyun static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
1298*4882a593Smuzhiyun 					struct kvm_arm_device_addr *dev_addr)
1299*4882a593Smuzhiyun {
1300*4882a593Smuzhiyun 	unsigned long dev_id, type;
1301*4882a593Smuzhiyun 
1302*4882a593Smuzhiyun 	dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
1303*4882a593Smuzhiyun 		KVM_ARM_DEVICE_ID_SHIFT;
1304*4882a593Smuzhiyun 	type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
1305*4882a593Smuzhiyun 		KVM_ARM_DEVICE_TYPE_SHIFT;
1306*4882a593Smuzhiyun 
1307*4882a593Smuzhiyun 	switch (dev_id) {
1308*4882a593Smuzhiyun 	case KVM_ARM_DEVICE_VGIC_V2:
1309*4882a593Smuzhiyun 		if (!vgic_present)
1310*4882a593Smuzhiyun 			return -ENXIO;
1311*4882a593Smuzhiyun 		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
1312*4882a593Smuzhiyun 	default:
1313*4882a593Smuzhiyun 		return -ENODEV;
1314*4882a593Smuzhiyun 	}
1315*4882a593Smuzhiyun }
1316*4882a593Smuzhiyun 
kvm_arch_vm_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)1317*4882a593Smuzhiyun long kvm_arch_vm_ioctl(struct file *filp,
1318*4882a593Smuzhiyun 		       unsigned int ioctl, unsigned long arg)
1319*4882a593Smuzhiyun {
1320*4882a593Smuzhiyun 	struct kvm *kvm = filp->private_data;
1321*4882a593Smuzhiyun 	void __user *argp = (void __user *)arg;
1322*4882a593Smuzhiyun 
1323*4882a593Smuzhiyun 	switch (ioctl) {
1324*4882a593Smuzhiyun 	case KVM_CREATE_IRQCHIP: {
1325*4882a593Smuzhiyun 		int ret;
1326*4882a593Smuzhiyun 		if (!vgic_present)
1327*4882a593Smuzhiyun 			return -ENXIO;
1328*4882a593Smuzhiyun 		mutex_lock(&kvm->lock);
1329*4882a593Smuzhiyun 		ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
1330*4882a593Smuzhiyun 		mutex_unlock(&kvm->lock);
1331*4882a593Smuzhiyun 		return ret;
1332*4882a593Smuzhiyun 	}
1333*4882a593Smuzhiyun 	case KVM_ARM_SET_DEVICE_ADDR: {
1334*4882a593Smuzhiyun 		struct kvm_arm_device_addr dev_addr;
1335*4882a593Smuzhiyun 
1336*4882a593Smuzhiyun 		if (copy_from_user(&dev_addr, argp, sizeof(dev_addr)))
1337*4882a593Smuzhiyun 			return -EFAULT;
1338*4882a593Smuzhiyun 		return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
1339*4882a593Smuzhiyun 	}
1340*4882a593Smuzhiyun 	case KVM_ARM_PREFERRED_TARGET: {
1341*4882a593Smuzhiyun 		int err;
1342*4882a593Smuzhiyun 		struct kvm_vcpu_init init;
1343*4882a593Smuzhiyun 
1344*4882a593Smuzhiyun 		err = kvm_vcpu_preferred_target(&init);
1345*4882a593Smuzhiyun 		if (err)
1346*4882a593Smuzhiyun 			return err;
1347*4882a593Smuzhiyun 
1348*4882a593Smuzhiyun 		if (copy_to_user(argp, &init, sizeof(init)))
1349*4882a593Smuzhiyun 			return -EFAULT;
1350*4882a593Smuzhiyun 
1351*4882a593Smuzhiyun 		return 0;
1352*4882a593Smuzhiyun 	}
1353*4882a593Smuzhiyun 	default:
1354*4882a593Smuzhiyun 		return -EINVAL;
1355*4882a593Smuzhiyun 	}
1356*4882a593Smuzhiyun }
1357*4882a593Smuzhiyun 
nvhe_percpu_size(void)1358*4882a593Smuzhiyun static unsigned long nvhe_percpu_size(void)
1359*4882a593Smuzhiyun {
1360*4882a593Smuzhiyun 	return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) -
1361*4882a593Smuzhiyun 		(unsigned long)CHOOSE_NVHE_SYM(__per_cpu_start);
1362*4882a593Smuzhiyun }
1363*4882a593Smuzhiyun 
nvhe_percpu_order(void)1364*4882a593Smuzhiyun static unsigned long nvhe_percpu_order(void)
1365*4882a593Smuzhiyun {
1366*4882a593Smuzhiyun 	unsigned long size = nvhe_percpu_size();
1367*4882a593Smuzhiyun 
1368*4882a593Smuzhiyun 	return size ? get_order(size) : 0;
1369*4882a593Smuzhiyun }
1370*4882a593Smuzhiyun 
1371*4882a593Smuzhiyun /* A lookup table holding the hypervisor VA for each vector slot */
1372*4882a593Smuzhiyun static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
1373*4882a593Smuzhiyun 
kvm_init_vector_slot(void * base,enum arm64_hyp_spectre_vector slot)1374*4882a593Smuzhiyun static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot)
1375*4882a593Smuzhiyun {
1376*4882a593Smuzhiyun 	hyp_spectre_vector_selector[slot] = __kvm_vector_slot2addr(base, slot);
1377*4882a593Smuzhiyun }
1378*4882a593Smuzhiyun 
kvm_init_vector_slots(void)1379*4882a593Smuzhiyun static int kvm_init_vector_slots(void)
1380*4882a593Smuzhiyun {
1381*4882a593Smuzhiyun 	int err;
1382*4882a593Smuzhiyun 	void *base;
1383*4882a593Smuzhiyun 
1384*4882a593Smuzhiyun 	base = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
1385*4882a593Smuzhiyun 	kvm_init_vector_slot(base, HYP_VECTOR_DIRECT);
1386*4882a593Smuzhiyun 
1387*4882a593Smuzhiyun 	base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
1388*4882a593Smuzhiyun 	kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT);
1389*4882a593Smuzhiyun 
1390*4882a593Smuzhiyun 	if (kvm_system_needs_idmapped_vectors() && !has_vhe()) {
1391*4882a593Smuzhiyun 		err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs),
1392*4882a593Smuzhiyun 					       __BP_HARDEN_HYP_VECS_SZ, &base);
1393*4882a593Smuzhiyun 		if (err)
1394*4882a593Smuzhiyun 			return err;
1395*4882a593Smuzhiyun 	}
1396*4882a593Smuzhiyun 
1397*4882a593Smuzhiyun 	kvm_init_vector_slot(base, HYP_VECTOR_INDIRECT);
1398*4882a593Smuzhiyun 	kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_INDIRECT);
1399*4882a593Smuzhiyun 	return 0;
1400*4882a593Smuzhiyun }
1401*4882a593Smuzhiyun 
cpu_prepare_hyp_mode(int cpu)1402*4882a593Smuzhiyun static void cpu_prepare_hyp_mode(int cpu)
1403*4882a593Smuzhiyun {
1404*4882a593Smuzhiyun 	struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
1405*4882a593Smuzhiyun 	unsigned long tcr;
1406*4882a593Smuzhiyun 
1407*4882a593Smuzhiyun 	/*
1408*4882a593Smuzhiyun 	 * Calculate the raw per-cpu offset without a translation from the
1409*4882a593Smuzhiyun 	 * kernel's mapping to the linear mapping, and store it in tpidr_el2
1410*4882a593Smuzhiyun 	 * so that we can use adr_l to access per-cpu variables in EL2.
1411*4882a593Smuzhiyun 	 * Also drop the KASAN tag which gets in the way...
1412*4882a593Smuzhiyun 	 */
1413*4882a593Smuzhiyun 	params->tpidr_el2 = (unsigned long)kasan_reset_tag(per_cpu_ptr_nvhe_sym(__per_cpu_start, cpu)) -
1414*4882a593Smuzhiyun 			    (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start));
1415*4882a593Smuzhiyun 
1416*4882a593Smuzhiyun 	params->mair_el2 = read_sysreg(mair_el1);
1417*4882a593Smuzhiyun 
1418*4882a593Smuzhiyun 	/*
1419*4882a593Smuzhiyun 	 * The ID map may be configured to use an extended virtual address
1420*4882a593Smuzhiyun 	 * range. This is only the case if system RAM is out of range for the
1421*4882a593Smuzhiyun 	 * currently configured page size and VA_BITS, in which case we will
1422*4882a593Smuzhiyun 	 * also need the extended virtual range for the HYP ID map, or we won't
1423*4882a593Smuzhiyun 	 * be able to enable the EL2 MMU.
1424*4882a593Smuzhiyun 	 *
1425*4882a593Smuzhiyun 	 * However, at EL2, there is only one TTBR register, and we can't switch
1426*4882a593Smuzhiyun 	 * between translation tables *and* update TCR_EL2.T0SZ at the same
1427*4882a593Smuzhiyun 	 * time. Bottom line: we need to use the extended range with *both* our
1428*4882a593Smuzhiyun 	 * translation tables.
1429*4882a593Smuzhiyun 	 *
1430*4882a593Smuzhiyun 	 * So use the same T0SZ value we use for the ID map.
1431*4882a593Smuzhiyun 	 */
1432*4882a593Smuzhiyun 	tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1;
1433*4882a593Smuzhiyun 	tcr &= ~TCR_T0SZ_MASK;
1434*4882a593Smuzhiyun 	tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
1435*4882a593Smuzhiyun 	params->tcr_el2 = tcr;
1436*4882a593Smuzhiyun 
1437*4882a593Smuzhiyun 	params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE);
1438*4882a593Smuzhiyun 	params->pgd_pa = kvm_mmu_get_httbr();
1439*4882a593Smuzhiyun 	if (is_protected_kvm_enabled())
1440*4882a593Smuzhiyun 		params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS;
1441*4882a593Smuzhiyun 	else
1442*4882a593Smuzhiyun 		params->hcr_el2 = HCR_HOST_NVHE_FLAGS;
1443*4882a593Smuzhiyun 	params->vttbr = params->vtcr = 0;
1444*4882a593Smuzhiyun 
1445*4882a593Smuzhiyun 	/*
1446*4882a593Smuzhiyun 	 * Flush the init params from the data cache because the struct will
1447*4882a593Smuzhiyun 	 * be read while the MMU is off.
1448*4882a593Smuzhiyun 	 */
1449*4882a593Smuzhiyun 	kvm_flush_dcache_to_poc(params, sizeof(*params));
1450*4882a593Smuzhiyun }
1451*4882a593Smuzhiyun 
hyp_install_host_vector(void)1452*4882a593Smuzhiyun static void hyp_install_host_vector(void)
1453*4882a593Smuzhiyun {
1454*4882a593Smuzhiyun 	struct kvm_nvhe_init_params *params;
1455*4882a593Smuzhiyun 	struct arm_smccc_res res;
1456*4882a593Smuzhiyun 
1457*4882a593Smuzhiyun 	/* Switch from the HYP stub to our own HYP init vector */
1458*4882a593Smuzhiyun 	__hyp_set_vectors(kvm_get_idmap_vector());
1459*4882a593Smuzhiyun 
1460*4882a593Smuzhiyun 	/*
1461*4882a593Smuzhiyun 	 * Call initialization code, and switch to the full blown HYP code.
1462*4882a593Smuzhiyun 	 * If the cpucaps haven't been finalized yet, something has gone very
1463*4882a593Smuzhiyun 	 * wrong, and hyp will crash and burn when it uses any
1464*4882a593Smuzhiyun 	 * cpus_have_const_cap() wrapper.
1465*4882a593Smuzhiyun 	 */
1466*4882a593Smuzhiyun 	BUG_ON(!system_capabilities_finalized());
1467*4882a593Smuzhiyun 	params = this_cpu_ptr_nvhe_sym(kvm_init_params);
1468*4882a593Smuzhiyun 	arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res);
1469*4882a593Smuzhiyun 	WARN_ON(res.a0 != SMCCC_RET_SUCCESS);
1470*4882a593Smuzhiyun }
1471*4882a593Smuzhiyun 
cpu_init_hyp_mode(void)1472*4882a593Smuzhiyun static void cpu_init_hyp_mode(void)
1473*4882a593Smuzhiyun {
1474*4882a593Smuzhiyun 	hyp_install_host_vector();
1475*4882a593Smuzhiyun 
1476*4882a593Smuzhiyun 	/*
1477*4882a593Smuzhiyun 	 * Disabling SSBD on a non-VHE system requires us to enable SSBS
1478*4882a593Smuzhiyun 	 * at EL2.
1479*4882a593Smuzhiyun 	 */
1480*4882a593Smuzhiyun 	if (this_cpu_has_cap(ARM64_SSBS) &&
1481*4882a593Smuzhiyun 	    arm64_get_spectre_v4_state() == SPECTRE_VULNERABLE) {
1482*4882a593Smuzhiyun 		kvm_call_hyp_nvhe(__kvm_enable_ssbs);
1483*4882a593Smuzhiyun 	}
1484*4882a593Smuzhiyun }
1485*4882a593Smuzhiyun 
cpu_hyp_reset(void)1486*4882a593Smuzhiyun static void cpu_hyp_reset(void)
1487*4882a593Smuzhiyun {
1488*4882a593Smuzhiyun 	if (!is_kernel_in_hyp_mode())
1489*4882a593Smuzhiyun 		__hyp_reset_vectors();
1490*4882a593Smuzhiyun }
1491*4882a593Smuzhiyun 
1492*4882a593Smuzhiyun /*
1493*4882a593Smuzhiyun  * EL2 vectors can be mapped and rerouted in a number of ways,
1494*4882a593Smuzhiyun  * depending on the kernel configuration and CPU present:
1495*4882a593Smuzhiyun  *
1496*4882a593Smuzhiyun  * - If the CPU is affected by Spectre-v2, the hardening sequence is
1497*4882a593Smuzhiyun  *   placed in one of the vector slots, which is executed before jumping
1498*4882a593Smuzhiyun  *   to the real vectors.
1499*4882a593Smuzhiyun  *
1500*4882a593Smuzhiyun  * - If the CPU also has the ARM64_SPECTRE_V3A cap, the slot
1501*4882a593Smuzhiyun  *   containing the hardening sequence is mapped next to the idmap page,
1502*4882a593Smuzhiyun  *   and executed before jumping to the real vectors.
1503*4882a593Smuzhiyun  *
1504*4882a593Smuzhiyun  * - If the CPU only has the ARM64_SPECTRE_V3A cap, then an
1505*4882a593Smuzhiyun  *   empty slot is selected, mapped next to the idmap page, and
1506*4882a593Smuzhiyun  *   executed before jumping to the real vectors.
1507*4882a593Smuzhiyun  *
1508*4882a593Smuzhiyun  * Note that ARM64_SPECTRE_V3A is somewhat incompatible with
1509*4882a593Smuzhiyun  * VHE, as we don't have hypervisor-specific mappings. If the system
1510*4882a593Smuzhiyun  * is VHE and yet selects this capability, it will be ignored.
1511*4882a593Smuzhiyun  */
cpu_set_hyp_vector(void)1512*4882a593Smuzhiyun static void cpu_set_hyp_vector(void)
1513*4882a593Smuzhiyun {
1514*4882a593Smuzhiyun 	struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
1515*4882a593Smuzhiyun 	void *vector = hyp_spectre_vector_selector[data->slot];
1516*4882a593Smuzhiyun 
1517*4882a593Smuzhiyun 	if (!is_protected_kvm_enabled())
1518*4882a593Smuzhiyun 		*this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector;
1519*4882a593Smuzhiyun 	else
1520*4882a593Smuzhiyun 		kvm_call_hyp_nvhe(__pkvm_cpu_set_vector, data->slot);
1521*4882a593Smuzhiyun }
1522*4882a593Smuzhiyun 
cpu_hyp_reinit(void)1523*4882a593Smuzhiyun static void cpu_hyp_reinit(void)
1524*4882a593Smuzhiyun {
1525*4882a593Smuzhiyun 	kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt);
1526*4882a593Smuzhiyun 
1527*4882a593Smuzhiyun 	cpu_hyp_reset();
1528*4882a593Smuzhiyun 
1529*4882a593Smuzhiyun 	if (is_kernel_in_hyp_mode())
1530*4882a593Smuzhiyun 		kvm_timer_init_vhe();
1531*4882a593Smuzhiyun 	else
1532*4882a593Smuzhiyun 		cpu_init_hyp_mode();
1533*4882a593Smuzhiyun 
1534*4882a593Smuzhiyun 	cpu_set_hyp_vector();
1535*4882a593Smuzhiyun 
1536*4882a593Smuzhiyun 	kvm_arm_init_debug();
1537*4882a593Smuzhiyun 
1538*4882a593Smuzhiyun 	if (vgic_present)
1539*4882a593Smuzhiyun 		kvm_vgic_init_cpu_hardware();
1540*4882a593Smuzhiyun }
1541*4882a593Smuzhiyun 
_kvm_arch_hardware_enable(void * discard)1542*4882a593Smuzhiyun static void _kvm_arch_hardware_enable(void *discard)
1543*4882a593Smuzhiyun {
1544*4882a593Smuzhiyun 	if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
1545*4882a593Smuzhiyun 		cpu_hyp_reinit();
1546*4882a593Smuzhiyun 		__this_cpu_write(kvm_arm_hardware_enabled, 1);
1547*4882a593Smuzhiyun 	}
1548*4882a593Smuzhiyun }
1549*4882a593Smuzhiyun 
kvm_arch_hardware_enable(void)1550*4882a593Smuzhiyun int kvm_arch_hardware_enable(void)
1551*4882a593Smuzhiyun {
1552*4882a593Smuzhiyun 	_kvm_arch_hardware_enable(NULL);
1553*4882a593Smuzhiyun 	return 0;
1554*4882a593Smuzhiyun }
1555*4882a593Smuzhiyun 
_kvm_arch_hardware_disable(void * discard)1556*4882a593Smuzhiyun static void _kvm_arch_hardware_disable(void *discard)
1557*4882a593Smuzhiyun {
1558*4882a593Smuzhiyun 	if (__this_cpu_read(kvm_arm_hardware_enabled)) {
1559*4882a593Smuzhiyun 		cpu_hyp_reset();
1560*4882a593Smuzhiyun 		__this_cpu_write(kvm_arm_hardware_enabled, 0);
1561*4882a593Smuzhiyun 	}
1562*4882a593Smuzhiyun }
1563*4882a593Smuzhiyun 
kvm_arch_hardware_disable(void)1564*4882a593Smuzhiyun void kvm_arch_hardware_disable(void)
1565*4882a593Smuzhiyun {
1566*4882a593Smuzhiyun 	if (!is_protected_kvm_enabled())
1567*4882a593Smuzhiyun 		_kvm_arch_hardware_disable(NULL);
1568*4882a593Smuzhiyun }
1569*4882a593Smuzhiyun 
1570*4882a593Smuzhiyun #ifdef CONFIG_CPU_PM
hyp_init_cpu_pm_notifier(struct notifier_block * self,unsigned long cmd,void * v)1571*4882a593Smuzhiyun static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
1572*4882a593Smuzhiyun 				    unsigned long cmd,
1573*4882a593Smuzhiyun 				    void *v)
1574*4882a593Smuzhiyun {
1575*4882a593Smuzhiyun 	/*
1576*4882a593Smuzhiyun 	 * kvm_arm_hardware_enabled is left with its old value over
1577*4882a593Smuzhiyun 	 * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should
1578*4882a593Smuzhiyun 	 * re-enable hyp.
1579*4882a593Smuzhiyun 	 */
1580*4882a593Smuzhiyun 	switch (cmd) {
1581*4882a593Smuzhiyun 	case CPU_PM_ENTER:
1582*4882a593Smuzhiyun 		if (__this_cpu_read(kvm_arm_hardware_enabled))
1583*4882a593Smuzhiyun 			/*
1584*4882a593Smuzhiyun 			 * don't update kvm_arm_hardware_enabled here
1585*4882a593Smuzhiyun 			 * so that the hardware will be re-enabled
1586*4882a593Smuzhiyun 			 * when we resume. See below.
1587*4882a593Smuzhiyun 			 */
1588*4882a593Smuzhiyun 			cpu_hyp_reset();
1589*4882a593Smuzhiyun 
1590*4882a593Smuzhiyun 		return NOTIFY_OK;
1591*4882a593Smuzhiyun 	case CPU_PM_ENTER_FAILED:
1592*4882a593Smuzhiyun 	case CPU_PM_EXIT:
1593*4882a593Smuzhiyun 		if (__this_cpu_read(kvm_arm_hardware_enabled))
1594*4882a593Smuzhiyun 			/* The hardware was enabled before suspend. */
1595*4882a593Smuzhiyun 			cpu_hyp_reinit();
1596*4882a593Smuzhiyun 
1597*4882a593Smuzhiyun 		return NOTIFY_OK;
1598*4882a593Smuzhiyun 
1599*4882a593Smuzhiyun 	default:
1600*4882a593Smuzhiyun 		return NOTIFY_DONE;
1601*4882a593Smuzhiyun 	}
1602*4882a593Smuzhiyun }
1603*4882a593Smuzhiyun 
1604*4882a593Smuzhiyun static struct notifier_block hyp_init_cpu_pm_nb = {
1605*4882a593Smuzhiyun 	.notifier_call = hyp_init_cpu_pm_notifier,
1606*4882a593Smuzhiyun };
1607*4882a593Smuzhiyun 
hyp_cpu_pm_init(void)1608*4882a593Smuzhiyun static void hyp_cpu_pm_init(void)
1609*4882a593Smuzhiyun {
1610*4882a593Smuzhiyun 	if (!is_protected_kvm_enabled())
1611*4882a593Smuzhiyun 		cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
1612*4882a593Smuzhiyun }
hyp_cpu_pm_exit(void)1613*4882a593Smuzhiyun static void hyp_cpu_pm_exit(void)
1614*4882a593Smuzhiyun {
1615*4882a593Smuzhiyun 	if (!is_protected_kvm_enabled())
1616*4882a593Smuzhiyun 		cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
1617*4882a593Smuzhiyun }
1618*4882a593Smuzhiyun #else
hyp_cpu_pm_init(void)1619*4882a593Smuzhiyun static inline void hyp_cpu_pm_init(void)
1620*4882a593Smuzhiyun {
1621*4882a593Smuzhiyun }
hyp_cpu_pm_exit(void)1622*4882a593Smuzhiyun static inline void hyp_cpu_pm_exit(void)
1623*4882a593Smuzhiyun {
1624*4882a593Smuzhiyun }
1625*4882a593Smuzhiyun #endif
1626*4882a593Smuzhiyun 
init_cpu_logical_map(void)1627*4882a593Smuzhiyun static void init_cpu_logical_map(void)
1628*4882a593Smuzhiyun {
1629*4882a593Smuzhiyun 	unsigned int cpu;
1630*4882a593Smuzhiyun 
1631*4882a593Smuzhiyun 	/*
1632*4882a593Smuzhiyun 	 * Copy the MPIDR <-> logical CPU ID mapping to hyp.
1633*4882a593Smuzhiyun 	 * Only copy the set of online CPUs whose features have been chacked
1634*4882a593Smuzhiyun 	 * against the finalized system capabilities. The hypervisor will not
1635*4882a593Smuzhiyun 	 * allow any other CPUs from the `possible` set to boot.
1636*4882a593Smuzhiyun 	 */
1637*4882a593Smuzhiyun 	for_each_online_cpu(cpu)
1638*4882a593Smuzhiyun 		hyp_cpu_logical_map[cpu] = cpu_logical_map(cpu);
1639*4882a593Smuzhiyun }
1640*4882a593Smuzhiyun 
1641*4882a593Smuzhiyun #define init_psci_0_1_impl_state(config, what)	\
1642*4882a593Smuzhiyun 	config.psci_0_1_ ## what ## _implemented = psci_ops.what
1643*4882a593Smuzhiyun 
init_psci_relay(void)1644*4882a593Smuzhiyun static bool init_psci_relay(void)
1645*4882a593Smuzhiyun {
1646*4882a593Smuzhiyun 	/*
1647*4882a593Smuzhiyun 	 * If PSCI has not been initialized, protected KVM cannot install
1648*4882a593Smuzhiyun 	 * itself on newly booted CPUs.
1649*4882a593Smuzhiyun 	 */
1650*4882a593Smuzhiyun 	if (!psci_ops.get_version) {
1651*4882a593Smuzhiyun 		kvm_err("Cannot initialize protected mode without PSCI\n");
1652*4882a593Smuzhiyun 		return false;
1653*4882a593Smuzhiyun 	}
1654*4882a593Smuzhiyun 
1655*4882a593Smuzhiyun 	kvm_host_psci_config.version = psci_ops.get_version();
1656*4882a593Smuzhiyun 
1657*4882a593Smuzhiyun 	if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) {
1658*4882a593Smuzhiyun 		kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids();
1659*4882a593Smuzhiyun 		init_psci_0_1_impl_state(kvm_host_psci_config, cpu_suspend);
1660*4882a593Smuzhiyun 		init_psci_0_1_impl_state(kvm_host_psci_config, cpu_on);
1661*4882a593Smuzhiyun 		init_psci_0_1_impl_state(kvm_host_psci_config, cpu_off);
1662*4882a593Smuzhiyun 		init_psci_0_1_impl_state(kvm_host_psci_config, migrate);
1663*4882a593Smuzhiyun 	}
1664*4882a593Smuzhiyun 	return true;
1665*4882a593Smuzhiyun }
1666*4882a593Smuzhiyun 
init_common_resources(void)1667*4882a593Smuzhiyun static int init_common_resources(void)
1668*4882a593Smuzhiyun {
1669*4882a593Smuzhiyun 	return kvm_set_ipa_limit();
1670*4882a593Smuzhiyun }
1671*4882a593Smuzhiyun 
init_subsystems(void)1672*4882a593Smuzhiyun static int init_subsystems(void)
1673*4882a593Smuzhiyun {
1674*4882a593Smuzhiyun 	int err = 0;
1675*4882a593Smuzhiyun 
1676*4882a593Smuzhiyun 	/*
1677*4882a593Smuzhiyun 	 * Enable hardware so that subsystem initialisation can access EL2.
1678*4882a593Smuzhiyun 	 */
1679*4882a593Smuzhiyun 	on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
1680*4882a593Smuzhiyun 
1681*4882a593Smuzhiyun 	/*
1682*4882a593Smuzhiyun 	 * Register CPU lower-power notifier
1683*4882a593Smuzhiyun 	 */
1684*4882a593Smuzhiyun 	hyp_cpu_pm_init();
1685*4882a593Smuzhiyun 
1686*4882a593Smuzhiyun 	/*
1687*4882a593Smuzhiyun 	 * Init HYP view of VGIC
1688*4882a593Smuzhiyun 	 */
1689*4882a593Smuzhiyun 	err = kvm_vgic_hyp_init();
1690*4882a593Smuzhiyun 	switch (err) {
1691*4882a593Smuzhiyun 	case 0:
1692*4882a593Smuzhiyun 		vgic_present = true;
1693*4882a593Smuzhiyun 		break;
1694*4882a593Smuzhiyun 	case -ENODEV:
1695*4882a593Smuzhiyun 	case -ENXIO:
1696*4882a593Smuzhiyun 		vgic_present = false;
1697*4882a593Smuzhiyun 		err = 0;
1698*4882a593Smuzhiyun 		break;
1699*4882a593Smuzhiyun 	default:
1700*4882a593Smuzhiyun 		goto out;
1701*4882a593Smuzhiyun 	}
1702*4882a593Smuzhiyun 
1703*4882a593Smuzhiyun 	/*
1704*4882a593Smuzhiyun 	 * Init HYP architected timer support
1705*4882a593Smuzhiyun 	 */
1706*4882a593Smuzhiyun 	err = kvm_timer_hyp_init(vgic_present);
1707*4882a593Smuzhiyun 	if (err)
1708*4882a593Smuzhiyun 		goto out;
1709*4882a593Smuzhiyun 
1710*4882a593Smuzhiyun 	kvm_perf_init();
1711*4882a593Smuzhiyun 	kvm_sys_reg_table_init();
1712*4882a593Smuzhiyun 
1713*4882a593Smuzhiyun out:
1714*4882a593Smuzhiyun 	if (err || !is_protected_kvm_enabled())
1715*4882a593Smuzhiyun 		on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
1716*4882a593Smuzhiyun 
1717*4882a593Smuzhiyun 	return err;
1718*4882a593Smuzhiyun }
1719*4882a593Smuzhiyun 
teardown_hyp_mode(void)1720*4882a593Smuzhiyun static void teardown_hyp_mode(void)
1721*4882a593Smuzhiyun {
1722*4882a593Smuzhiyun 	int cpu;
1723*4882a593Smuzhiyun 
1724*4882a593Smuzhiyun 	free_hyp_pgds();
1725*4882a593Smuzhiyun 	for_each_possible_cpu(cpu) {
1726*4882a593Smuzhiyun 		free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
1727*4882a593Smuzhiyun 		free_pages(kvm_arm_hyp_percpu_base[cpu], nvhe_percpu_order());
1728*4882a593Smuzhiyun 	}
1729*4882a593Smuzhiyun }
1730*4882a593Smuzhiyun 
do_pkvm_init(u32 hyp_va_bits)1731*4882a593Smuzhiyun static int do_pkvm_init(u32 hyp_va_bits)
1732*4882a593Smuzhiyun {
1733*4882a593Smuzhiyun 	void *per_cpu_base = kvm_ksym_ref(kvm_arm_hyp_percpu_base);
1734*4882a593Smuzhiyun 	int ret;
1735*4882a593Smuzhiyun 
1736*4882a593Smuzhiyun 	preempt_disable();
1737*4882a593Smuzhiyun 	hyp_install_host_vector();
1738*4882a593Smuzhiyun 	ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size,
1739*4882a593Smuzhiyun 				num_possible_cpus(), kern_hyp_va(per_cpu_base),
1740*4882a593Smuzhiyun 				hyp_va_bits);
1741*4882a593Smuzhiyun 	preempt_enable();
1742*4882a593Smuzhiyun 
1743*4882a593Smuzhiyun 	return ret;
1744*4882a593Smuzhiyun }
1745*4882a593Smuzhiyun 
kvm_hyp_init_protection(u32 hyp_va_bits)1746*4882a593Smuzhiyun static int kvm_hyp_init_protection(u32 hyp_va_bits)
1747*4882a593Smuzhiyun {
1748*4882a593Smuzhiyun 	void *addr = phys_to_virt(hyp_mem_base);
1749*4882a593Smuzhiyun 	int ret;
1750*4882a593Smuzhiyun 
1751*4882a593Smuzhiyun 	kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
1752*4882a593Smuzhiyun 	kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
1753*4882a593Smuzhiyun 
1754*4882a593Smuzhiyun 	ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP);
1755*4882a593Smuzhiyun 	if (ret)
1756*4882a593Smuzhiyun 		return ret;
1757*4882a593Smuzhiyun 
1758*4882a593Smuzhiyun 	ret = do_pkvm_init(hyp_va_bits);
1759*4882a593Smuzhiyun 	if (ret)
1760*4882a593Smuzhiyun 		return ret;
1761*4882a593Smuzhiyun 
1762*4882a593Smuzhiyun 	free_hyp_pgds();
1763*4882a593Smuzhiyun 
1764*4882a593Smuzhiyun 	return 0;
1765*4882a593Smuzhiyun }
1766*4882a593Smuzhiyun 
1767*4882a593Smuzhiyun /**
1768*4882a593Smuzhiyun  * Inits Hyp-mode on all online CPUs
1769*4882a593Smuzhiyun  */
init_hyp_mode(void)1770*4882a593Smuzhiyun static int init_hyp_mode(void)
1771*4882a593Smuzhiyun {
1772*4882a593Smuzhiyun 	u32 hyp_va_bits;
1773*4882a593Smuzhiyun 	int cpu;
1774*4882a593Smuzhiyun 	int err = -ENOMEM;
1775*4882a593Smuzhiyun 
1776*4882a593Smuzhiyun 	/*
1777*4882a593Smuzhiyun 	 * The protected Hyp-mode cannot be initialized if the memory pool
1778*4882a593Smuzhiyun 	 * allocation has failed.
1779*4882a593Smuzhiyun 	 */
1780*4882a593Smuzhiyun 	if (is_protected_kvm_enabled() && !hyp_mem_base)
1781*4882a593Smuzhiyun 		goto out_err;
1782*4882a593Smuzhiyun 
1783*4882a593Smuzhiyun 	/*
1784*4882a593Smuzhiyun 	 * Allocate Hyp PGD and setup Hyp identity mapping
1785*4882a593Smuzhiyun 	 */
1786*4882a593Smuzhiyun 	err = kvm_mmu_init(&hyp_va_bits);
1787*4882a593Smuzhiyun 	if (err)
1788*4882a593Smuzhiyun 		goto out_err;
1789*4882a593Smuzhiyun 
1790*4882a593Smuzhiyun 	/*
1791*4882a593Smuzhiyun 	 * Allocate stack pages for Hypervisor-mode
1792*4882a593Smuzhiyun 	 */
1793*4882a593Smuzhiyun 	for_each_possible_cpu(cpu) {
1794*4882a593Smuzhiyun 		unsigned long stack_page;
1795*4882a593Smuzhiyun 
1796*4882a593Smuzhiyun 		stack_page = __get_free_page(GFP_KERNEL);
1797*4882a593Smuzhiyun 		if (!stack_page) {
1798*4882a593Smuzhiyun 			err = -ENOMEM;
1799*4882a593Smuzhiyun 			goto out_err;
1800*4882a593Smuzhiyun 		}
1801*4882a593Smuzhiyun 
1802*4882a593Smuzhiyun 		per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
1803*4882a593Smuzhiyun 	}
1804*4882a593Smuzhiyun 
1805*4882a593Smuzhiyun 	/*
1806*4882a593Smuzhiyun 	 * Allocate and initialize pages for Hypervisor-mode percpu regions.
1807*4882a593Smuzhiyun 	 */
1808*4882a593Smuzhiyun 	for_each_possible_cpu(cpu) {
1809*4882a593Smuzhiyun 		struct page *page;
1810*4882a593Smuzhiyun 		void *page_addr;
1811*4882a593Smuzhiyun 
1812*4882a593Smuzhiyun 		page = alloc_pages(GFP_KERNEL, nvhe_percpu_order());
1813*4882a593Smuzhiyun 		if (!page) {
1814*4882a593Smuzhiyun 			err = -ENOMEM;
1815*4882a593Smuzhiyun 			goto out_err;
1816*4882a593Smuzhiyun 		}
1817*4882a593Smuzhiyun 
1818*4882a593Smuzhiyun 		page_addr = page_address(page);
1819*4882a593Smuzhiyun 		memcpy(page_addr, CHOOSE_NVHE_SYM(__per_cpu_start), nvhe_percpu_size());
1820*4882a593Smuzhiyun 		kvm_arm_hyp_percpu_base[cpu] = (unsigned long)page_addr;
1821*4882a593Smuzhiyun 	}
1822*4882a593Smuzhiyun 
1823*4882a593Smuzhiyun 	/*
1824*4882a593Smuzhiyun 	 * Map the Hyp-code called directly from the host
1825*4882a593Smuzhiyun 	 */
1826*4882a593Smuzhiyun 	err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start),
1827*4882a593Smuzhiyun 				  kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC);
1828*4882a593Smuzhiyun 	if (err) {
1829*4882a593Smuzhiyun 		kvm_err("Cannot map world-switch code\n");
1830*4882a593Smuzhiyun 		goto out_err;
1831*4882a593Smuzhiyun 	}
1832*4882a593Smuzhiyun 
1833*4882a593Smuzhiyun 	err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start),
1834*4882a593Smuzhiyun 				  kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO);
1835*4882a593Smuzhiyun 	if (err) {
1836*4882a593Smuzhiyun 		kvm_err("Cannot map .hyp.rodata section\n");
1837*4882a593Smuzhiyun 		goto out_err;
1838*4882a593Smuzhiyun 	}
1839*4882a593Smuzhiyun 
1840*4882a593Smuzhiyun 	err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
1841*4882a593Smuzhiyun 				  kvm_ksym_ref(__end_rodata), PAGE_HYP_RO);
1842*4882a593Smuzhiyun 	if (err) {
1843*4882a593Smuzhiyun 		kvm_err("Cannot map rodata section\n");
1844*4882a593Smuzhiyun 		goto out_err;
1845*4882a593Smuzhiyun 	}
1846*4882a593Smuzhiyun 
1847*4882a593Smuzhiyun 	/*
1848*4882a593Smuzhiyun 	 * .hyp.bss is guaranteed to be placed at the beginning of the .bss
1849*4882a593Smuzhiyun 	 * section thanks to an assertion in the linker script. Map it RW and
1850*4882a593Smuzhiyun 	 * the rest of .bss RO.
1851*4882a593Smuzhiyun 	 */
1852*4882a593Smuzhiyun 	err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_start),
1853*4882a593Smuzhiyun 				  kvm_ksym_ref(__hyp_bss_end), PAGE_HYP);
1854*4882a593Smuzhiyun 	if (err) {
1855*4882a593Smuzhiyun 		kvm_err("Cannot map hyp bss section: %d\n", err);
1856*4882a593Smuzhiyun 		goto out_err;
1857*4882a593Smuzhiyun 	}
1858*4882a593Smuzhiyun 
1859*4882a593Smuzhiyun 	err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_end),
1860*4882a593Smuzhiyun 				  kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
1861*4882a593Smuzhiyun 	if (err) {
1862*4882a593Smuzhiyun 		kvm_err("Cannot map bss section\n");
1863*4882a593Smuzhiyun 		goto out_err;
1864*4882a593Smuzhiyun 	}
1865*4882a593Smuzhiyun 
1866*4882a593Smuzhiyun 	/*
1867*4882a593Smuzhiyun 	 * Map the Hyp stack pages
1868*4882a593Smuzhiyun 	 */
1869*4882a593Smuzhiyun 	for_each_possible_cpu(cpu) {
1870*4882a593Smuzhiyun 		char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
1871*4882a593Smuzhiyun 		err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
1872*4882a593Smuzhiyun 					  PAGE_HYP);
1873*4882a593Smuzhiyun 
1874*4882a593Smuzhiyun 		if (err) {
1875*4882a593Smuzhiyun 			kvm_err("Cannot map hyp stack\n");
1876*4882a593Smuzhiyun 			goto out_err;
1877*4882a593Smuzhiyun 		}
1878*4882a593Smuzhiyun 	}
1879*4882a593Smuzhiyun 
1880*4882a593Smuzhiyun 	for_each_possible_cpu(cpu) {
1881*4882a593Smuzhiyun 		char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu];
1882*4882a593Smuzhiyun 		char *percpu_end = percpu_begin + nvhe_percpu_size();
1883*4882a593Smuzhiyun 
1884*4882a593Smuzhiyun 		/* Map Hyp percpu pages */
1885*4882a593Smuzhiyun 		err = create_hyp_mappings(percpu_begin, percpu_end, PAGE_HYP);
1886*4882a593Smuzhiyun 		if (err) {
1887*4882a593Smuzhiyun 			kvm_err("Cannot map hyp percpu region\n");
1888*4882a593Smuzhiyun 			goto out_err;
1889*4882a593Smuzhiyun 		}
1890*4882a593Smuzhiyun 
1891*4882a593Smuzhiyun 		/* Prepare the CPU initialization parameters */
1892*4882a593Smuzhiyun 		cpu_prepare_hyp_mode(cpu);
1893*4882a593Smuzhiyun 	}
1894*4882a593Smuzhiyun 
1895*4882a593Smuzhiyun 	if (is_protected_kvm_enabled()) {
1896*4882a593Smuzhiyun 		init_cpu_logical_map();
1897*4882a593Smuzhiyun 
1898*4882a593Smuzhiyun 		if (!init_psci_relay()) {
1899*4882a593Smuzhiyun 			err = -ENODEV;
1900*4882a593Smuzhiyun 			goto out_err;
1901*4882a593Smuzhiyun 		}
1902*4882a593Smuzhiyun 	}
1903*4882a593Smuzhiyun 
1904*4882a593Smuzhiyun 	if (is_protected_kvm_enabled()) {
1905*4882a593Smuzhiyun 		err = kvm_hyp_init_protection(hyp_va_bits);
1906*4882a593Smuzhiyun 		if (err) {
1907*4882a593Smuzhiyun 			kvm_err("Failed to init hyp memory protection\n");
1908*4882a593Smuzhiyun 			goto out_err;
1909*4882a593Smuzhiyun 		}
1910*4882a593Smuzhiyun 	}
1911*4882a593Smuzhiyun 
1912*4882a593Smuzhiyun 	return 0;
1913*4882a593Smuzhiyun 
1914*4882a593Smuzhiyun out_err:
1915*4882a593Smuzhiyun 	teardown_hyp_mode();
1916*4882a593Smuzhiyun 	kvm_err("error initializing Hyp mode: %d\n", err);
1917*4882a593Smuzhiyun 	return err;
1918*4882a593Smuzhiyun }
1919*4882a593Smuzhiyun 
_kvm_host_prot_finalize(void * discard)1920*4882a593Smuzhiyun void _kvm_host_prot_finalize(void *discard)
1921*4882a593Smuzhiyun {
1922*4882a593Smuzhiyun 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize));
1923*4882a593Smuzhiyun }
1924*4882a593Smuzhiyun 
pkvm_mark_hyp(phys_addr_t start,phys_addr_t end)1925*4882a593Smuzhiyun static inline int pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
1926*4882a593Smuzhiyun {
1927*4882a593Smuzhiyun 	return kvm_call_hyp_nvhe(__pkvm_mark_hyp, start, end);
1928*4882a593Smuzhiyun }
1929*4882a593Smuzhiyun 
1930*4882a593Smuzhiyun #define pkvm_mark_hyp_section(__section)		\
1931*4882a593Smuzhiyun 	pkvm_mark_hyp(__pa_symbol(__section##_start),	\
1932*4882a593Smuzhiyun 			__pa_symbol(__section##_end))
1933*4882a593Smuzhiyun 
finalize_hyp_mode(void)1934*4882a593Smuzhiyun static int finalize_hyp_mode(void)
1935*4882a593Smuzhiyun {
1936*4882a593Smuzhiyun 	int cpu, ret;
1937*4882a593Smuzhiyun 
1938*4882a593Smuzhiyun 	if (!is_protected_kvm_enabled())
1939*4882a593Smuzhiyun 		return 0;
1940*4882a593Smuzhiyun 
1941*4882a593Smuzhiyun 	ret = pkvm_mark_hyp_section(__hyp_idmap_text);
1942*4882a593Smuzhiyun 	if (ret)
1943*4882a593Smuzhiyun 		return ret;
1944*4882a593Smuzhiyun 
1945*4882a593Smuzhiyun 	ret = pkvm_mark_hyp_section(__hyp_text);
1946*4882a593Smuzhiyun 	if (ret)
1947*4882a593Smuzhiyun 		return ret;
1948*4882a593Smuzhiyun 
1949*4882a593Smuzhiyun 	ret = pkvm_mark_hyp_section(__hyp_rodata);
1950*4882a593Smuzhiyun 	if (ret)
1951*4882a593Smuzhiyun 		return ret;
1952*4882a593Smuzhiyun 
1953*4882a593Smuzhiyun 	/*
1954*4882a593Smuzhiyun 	 * Exclude HYP BSS from kmemleak so that it doesn't get peeked
1955*4882a593Smuzhiyun 	 * at, which would end badly once the section is inaccessible.
1956*4882a593Smuzhiyun 	 * None of other sections should ever be introspected.
1957*4882a593Smuzhiyun 	 */
1958*4882a593Smuzhiyun 	kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
1959*4882a593Smuzhiyun 	ret = pkvm_mark_hyp_section(__hyp_bss);
1960*4882a593Smuzhiyun 	if (ret)
1961*4882a593Smuzhiyun 		return ret;
1962*4882a593Smuzhiyun 
1963*4882a593Smuzhiyun 	ret = pkvm_mark_hyp(hyp_mem_base, hyp_mem_base + hyp_mem_size);
1964*4882a593Smuzhiyun 	if (ret)
1965*4882a593Smuzhiyun 		return ret;
1966*4882a593Smuzhiyun 
1967*4882a593Smuzhiyun 	for_each_possible_cpu(cpu) {
1968*4882a593Smuzhiyun 		phys_addr_t start = virt_to_phys((void *)kvm_arm_hyp_percpu_base[cpu]);
1969*4882a593Smuzhiyun 		phys_addr_t end = start + (PAGE_SIZE << nvhe_percpu_order());
1970*4882a593Smuzhiyun 
1971*4882a593Smuzhiyun 		ret = pkvm_mark_hyp(start, end);
1972*4882a593Smuzhiyun 		if (ret)
1973*4882a593Smuzhiyun 			return ret;
1974*4882a593Smuzhiyun 
1975*4882a593Smuzhiyun 		start = virt_to_phys((void *)per_cpu(kvm_arm_hyp_stack_page, cpu));
1976*4882a593Smuzhiyun 		end = start + PAGE_SIZE;
1977*4882a593Smuzhiyun 		ret = pkvm_mark_hyp(start, end);
1978*4882a593Smuzhiyun 		if (ret)
1979*4882a593Smuzhiyun 			return ret;
1980*4882a593Smuzhiyun 	}
1981*4882a593Smuzhiyun 
1982*4882a593Smuzhiyun 	/*
1983*4882a593Smuzhiyun 	 * Flip the static key upfront as that may no longer be possible
1984*4882a593Smuzhiyun 	 * once the host stage 2 is installed.
1985*4882a593Smuzhiyun 	 */
1986*4882a593Smuzhiyun 	static_branch_enable(&kvm_protected_mode_initialized);
1987*4882a593Smuzhiyun 	on_each_cpu(_kvm_host_prot_finalize, NULL, 1);
1988*4882a593Smuzhiyun 
1989*4882a593Smuzhiyun 	return 0;
1990*4882a593Smuzhiyun }
1991*4882a593Smuzhiyun 
check_kvm_target_cpu(void * ret)1992*4882a593Smuzhiyun static void check_kvm_target_cpu(void *ret)
1993*4882a593Smuzhiyun {
1994*4882a593Smuzhiyun 	*(int *)ret = kvm_target_cpu();
1995*4882a593Smuzhiyun }
1996*4882a593Smuzhiyun 
kvm_mpidr_to_vcpu(struct kvm * kvm,unsigned long mpidr)1997*4882a593Smuzhiyun struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
1998*4882a593Smuzhiyun {
1999*4882a593Smuzhiyun 	struct kvm_vcpu *vcpu;
2000*4882a593Smuzhiyun 	int i;
2001*4882a593Smuzhiyun 
2002*4882a593Smuzhiyun 	mpidr &= MPIDR_HWID_BITMASK;
2003*4882a593Smuzhiyun 	kvm_for_each_vcpu(i, vcpu, kvm) {
2004*4882a593Smuzhiyun 		if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
2005*4882a593Smuzhiyun 			return vcpu;
2006*4882a593Smuzhiyun 	}
2007*4882a593Smuzhiyun 	return NULL;
2008*4882a593Smuzhiyun }
2009*4882a593Smuzhiyun 
kvm_arch_has_irq_bypass(void)2010*4882a593Smuzhiyun bool kvm_arch_has_irq_bypass(void)
2011*4882a593Smuzhiyun {
2012*4882a593Smuzhiyun 	return true;
2013*4882a593Smuzhiyun }
2014*4882a593Smuzhiyun 
kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer * cons,struct irq_bypass_producer * prod)2015*4882a593Smuzhiyun int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
2016*4882a593Smuzhiyun 				      struct irq_bypass_producer *prod)
2017*4882a593Smuzhiyun {
2018*4882a593Smuzhiyun 	struct kvm_kernel_irqfd *irqfd =
2019*4882a593Smuzhiyun 		container_of(cons, struct kvm_kernel_irqfd, consumer);
2020*4882a593Smuzhiyun 
2021*4882a593Smuzhiyun 	return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
2022*4882a593Smuzhiyun 					  &irqfd->irq_entry);
2023*4882a593Smuzhiyun }
kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer * cons,struct irq_bypass_producer * prod)2024*4882a593Smuzhiyun void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
2025*4882a593Smuzhiyun 				      struct irq_bypass_producer *prod)
2026*4882a593Smuzhiyun {
2027*4882a593Smuzhiyun 	struct kvm_kernel_irqfd *irqfd =
2028*4882a593Smuzhiyun 		container_of(cons, struct kvm_kernel_irqfd, consumer);
2029*4882a593Smuzhiyun 
2030*4882a593Smuzhiyun 	kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
2031*4882a593Smuzhiyun 				     &irqfd->irq_entry);
2032*4882a593Smuzhiyun }
2033*4882a593Smuzhiyun 
kvm_arch_irq_bypass_stop(struct irq_bypass_consumer * cons)2034*4882a593Smuzhiyun void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
2035*4882a593Smuzhiyun {
2036*4882a593Smuzhiyun 	struct kvm_kernel_irqfd *irqfd =
2037*4882a593Smuzhiyun 		container_of(cons, struct kvm_kernel_irqfd, consumer);
2038*4882a593Smuzhiyun 
2039*4882a593Smuzhiyun 	kvm_arm_halt_guest(irqfd->kvm);
2040*4882a593Smuzhiyun }
2041*4882a593Smuzhiyun 
kvm_arch_irq_bypass_start(struct irq_bypass_consumer * cons)2042*4882a593Smuzhiyun void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
2043*4882a593Smuzhiyun {
2044*4882a593Smuzhiyun 	struct kvm_kernel_irqfd *irqfd =
2045*4882a593Smuzhiyun 		container_of(cons, struct kvm_kernel_irqfd, consumer);
2046*4882a593Smuzhiyun 
2047*4882a593Smuzhiyun 	kvm_arm_resume_guest(irqfd->kvm);
2048*4882a593Smuzhiyun }
2049*4882a593Smuzhiyun 
2050*4882a593Smuzhiyun /**
2051*4882a593Smuzhiyun  * Initialize Hyp-mode and memory mappings on all CPUs.
2052*4882a593Smuzhiyun  */
kvm_arch_init(void * opaque)2053*4882a593Smuzhiyun int kvm_arch_init(void *opaque)
2054*4882a593Smuzhiyun {
2055*4882a593Smuzhiyun 	int err;
2056*4882a593Smuzhiyun 	int ret, cpu;
2057*4882a593Smuzhiyun 	bool in_hyp_mode;
2058*4882a593Smuzhiyun 
2059*4882a593Smuzhiyun 	if (!is_hyp_mode_available()) {
2060*4882a593Smuzhiyun 		kvm_info("HYP mode not available\n");
2061*4882a593Smuzhiyun 		return -ENODEV;
2062*4882a593Smuzhiyun 	}
2063*4882a593Smuzhiyun 
2064*4882a593Smuzhiyun 	in_hyp_mode = is_kernel_in_hyp_mode();
2065*4882a593Smuzhiyun 
2066*4882a593Smuzhiyun 	if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) ||
2067*4882a593Smuzhiyun 	    cpus_have_final_cap(ARM64_WORKAROUND_1508412))
2068*4882a593Smuzhiyun 		kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \
2069*4882a593Smuzhiyun 			 "Only trusted guests should be used on this system.\n");
2070*4882a593Smuzhiyun 
2071*4882a593Smuzhiyun 	for_each_online_cpu(cpu) {
2072*4882a593Smuzhiyun 		smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
2073*4882a593Smuzhiyun 		if (ret < 0) {
2074*4882a593Smuzhiyun 			kvm_err("Error, CPU %d not supported!\n", cpu);
2075*4882a593Smuzhiyun 			return -ENODEV;
2076*4882a593Smuzhiyun 		}
2077*4882a593Smuzhiyun 	}
2078*4882a593Smuzhiyun 
2079*4882a593Smuzhiyun 	err = init_common_resources();
2080*4882a593Smuzhiyun 	if (err)
2081*4882a593Smuzhiyun 		return err;
2082*4882a593Smuzhiyun 
2083*4882a593Smuzhiyun 	err = kvm_arm_init_sve();
2084*4882a593Smuzhiyun 	if (err)
2085*4882a593Smuzhiyun 		return err;
2086*4882a593Smuzhiyun 
2087*4882a593Smuzhiyun 	if (!in_hyp_mode) {
2088*4882a593Smuzhiyun 		err = init_hyp_mode();
2089*4882a593Smuzhiyun 		if (err)
2090*4882a593Smuzhiyun 			goto out_err;
2091*4882a593Smuzhiyun 	}
2092*4882a593Smuzhiyun 
2093*4882a593Smuzhiyun 	err = kvm_init_vector_slots();
2094*4882a593Smuzhiyun 	if (err) {
2095*4882a593Smuzhiyun 		kvm_err("Cannot initialise vector slots\n");
2096*4882a593Smuzhiyun 		goto out_err;
2097*4882a593Smuzhiyun 	}
2098*4882a593Smuzhiyun 
2099*4882a593Smuzhiyun 	err = init_subsystems();
2100*4882a593Smuzhiyun 	if (err)
2101*4882a593Smuzhiyun 		goto out_hyp;
2102*4882a593Smuzhiyun 
2103*4882a593Smuzhiyun 	if (!in_hyp_mode) {
2104*4882a593Smuzhiyun 		err = finalize_hyp_mode();
2105*4882a593Smuzhiyun 		if (err) {
2106*4882a593Smuzhiyun 			kvm_err("Failed to finalize Hyp protection\n");
2107*4882a593Smuzhiyun 			goto out_hyp;
2108*4882a593Smuzhiyun 		}
2109*4882a593Smuzhiyun 	}
2110*4882a593Smuzhiyun 
2111*4882a593Smuzhiyun 	if (is_protected_kvm_enabled()) {
2112*4882a593Smuzhiyun 		kvm_info("Protected nVHE mode initialized successfully\n");
2113*4882a593Smuzhiyun 	} else if (in_hyp_mode) {
2114*4882a593Smuzhiyun 		kvm_info("VHE mode initialized successfully\n");
2115*4882a593Smuzhiyun 	} else {
2116*4882a593Smuzhiyun 		kvm_info("Hyp mode initialized successfully\n");
2117*4882a593Smuzhiyun 	}
2118*4882a593Smuzhiyun 
2119*4882a593Smuzhiyun 	return 0;
2120*4882a593Smuzhiyun 
2121*4882a593Smuzhiyun out_hyp:
2122*4882a593Smuzhiyun 	hyp_cpu_pm_exit();
2123*4882a593Smuzhiyun 	if (!in_hyp_mode)
2124*4882a593Smuzhiyun 		teardown_hyp_mode();
2125*4882a593Smuzhiyun out_err:
2126*4882a593Smuzhiyun 	return err;
2127*4882a593Smuzhiyun }
2128*4882a593Smuzhiyun 
2129*4882a593Smuzhiyun /* NOP: Compiling as a module not supported */
kvm_arch_exit(void)2130*4882a593Smuzhiyun void kvm_arch_exit(void)
2131*4882a593Smuzhiyun {
2132*4882a593Smuzhiyun 	kvm_perf_teardown();
2133*4882a593Smuzhiyun }
2134*4882a593Smuzhiyun 
early_kvm_mode_cfg(char * arg)2135*4882a593Smuzhiyun static int __init early_kvm_mode_cfg(char *arg)
2136*4882a593Smuzhiyun {
2137*4882a593Smuzhiyun 	if (!arg)
2138*4882a593Smuzhiyun 		return -EINVAL;
2139*4882a593Smuzhiyun 
2140*4882a593Smuzhiyun 	if (strcmp(arg, "protected") == 0) {
2141*4882a593Smuzhiyun 		kvm_mode = KVM_MODE_PROTECTED;
2142*4882a593Smuzhiyun 		return 0;
2143*4882a593Smuzhiyun 	}
2144*4882a593Smuzhiyun 
2145*4882a593Smuzhiyun 	if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode()))
2146*4882a593Smuzhiyun 		return 0;
2147*4882a593Smuzhiyun 
2148*4882a593Smuzhiyun 	return -EINVAL;
2149*4882a593Smuzhiyun }
2150*4882a593Smuzhiyun early_param("kvm-arm.mode", early_kvm_mode_cfg);
2151*4882a593Smuzhiyun 
kvm_get_mode(void)2152*4882a593Smuzhiyun enum kvm_mode kvm_get_mode(void)
2153*4882a593Smuzhiyun {
2154*4882a593Smuzhiyun 	return kvm_mode;
2155*4882a593Smuzhiyun }
2156*4882a593Smuzhiyun 
arm_init(void)2157*4882a593Smuzhiyun static int arm_init(void)
2158*4882a593Smuzhiyun {
2159*4882a593Smuzhiyun 	int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2160*4882a593Smuzhiyun 	return rc;
2161*4882a593Smuzhiyun }
2162*4882a593Smuzhiyun 
2163*4882a593Smuzhiyun module_init(arm_init);
2164