1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
5*4882a593Smuzhiyun */
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun #include <linux/types.h>
8*4882a593Smuzhiyun #include <linux/string.h>
9*4882a593Smuzhiyun #include <linux/kvm.h>
10*4882a593Smuzhiyun #include <linux/kvm_host.h>
11*4882a593Smuzhiyun #include <linux/hugetlb.h>
12*4882a593Smuzhiyun #include <linux/module.h>
13*4882a593Smuzhiyun #include <linux/log2.h>
14*4882a593Smuzhiyun #include <linux/sizes.h>
15*4882a593Smuzhiyun
16*4882a593Smuzhiyun #include <asm/trace.h>
17*4882a593Smuzhiyun #include <asm/kvm_ppc.h>
18*4882a593Smuzhiyun #include <asm/kvm_book3s.h>
19*4882a593Smuzhiyun #include <asm/book3s/64/mmu-hash.h>
20*4882a593Smuzhiyun #include <asm/hvcall.h>
21*4882a593Smuzhiyun #include <asm/synch.h>
22*4882a593Smuzhiyun #include <asm/ppc-opcode.h>
23*4882a593Smuzhiyun #include <asm/pte-walk.h>
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun /* Translate address of a vmalloc'd thing to a linear map address */
real_vmalloc_addr(void * x)26*4882a593Smuzhiyun static void *real_vmalloc_addr(void *x)
27*4882a593Smuzhiyun {
28*4882a593Smuzhiyun unsigned long addr = (unsigned long) x;
29*4882a593Smuzhiyun pte_t *p;
30*4882a593Smuzhiyun /*
31*4882a593Smuzhiyun * assume we don't have huge pages in vmalloc space...
32*4882a593Smuzhiyun * So don't worry about THP collapse/split. Called
33*4882a593Smuzhiyun * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore.
34*4882a593Smuzhiyun */
35*4882a593Smuzhiyun p = find_init_mm_pte(addr, NULL);
36*4882a593Smuzhiyun if (!p || !pte_present(*p))
37*4882a593Smuzhiyun return NULL;
38*4882a593Smuzhiyun addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
39*4882a593Smuzhiyun return __va(addr);
40*4882a593Smuzhiyun }
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
global_invalidates(struct kvm * kvm)43*4882a593Smuzhiyun static int global_invalidates(struct kvm *kvm)
44*4882a593Smuzhiyun {
45*4882a593Smuzhiyun int global;
46*4882a593Smuzhiyun int cpu;
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun /*
49*4882a593Smuzhiyun * If there is only one vcore, and it's currently running,
50*4882a593Smuzhiyun * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
51*4882a593Smuzhiyun * we can use tlbiel as long as we mark all other physical
52*4882a593Smuzhiyun * cores as potentially having stale TLB entries for this lpid.
53*4882a593Smuzhiyun * Otherwise, don't use tlbiel.
54*4882a593Smuzhiyun */
55*4882a593Smuzhiyun if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
56*4882a593Smuzhiyun global = 0;
57*4882a593Smuzhiyun else
58*4882a593Smuzhiyun global = 1;
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun if (!global) {
61*4882a593Smuzhiyun /* any other core might now have stale TLB entries... */
62*4882a593Smuzhiyun smp_wmb();
63*4882a593Smuzhiyun cpumask_setall(&kvm->arch.need_tlb_flush);
64*4882a593Smuzhiyun cpu = local_paca->kvm_hstate.kvm_vcore->pcpu;
65*4882a593Smuzhiyun /*
66*4882a593Smuzhiyun * On POWER9, threads are independent but the TLB is shared,
67*4882a593Smuzhiyun * so use the bit for the first thread to represent the core.
68*4882a593Smuzhiyun */
69*4882a593Smuzhiyun if (cpu_has_feature(CPU_FTR_ARCH_300))
70*4882a593Smuzhiyun cpu = cpu_first_tlb_thread_sibling(cpu);
71*4882a593Smuzhiyun cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
72*4882a593Smuzhiyun }
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun return global;
75*4882a593Smuzhiyun }
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun /*
78*4882a593Smuzhiyun * Add this HPTE into the chain for the real page.
79*4882a593Smuzhiyun * Must be called with the chain locked; it unlocks the chain.
80*4882a593Smuzhiyun */
kvmppc_add_revmap_chain(struct kvm * kvm,struct revmap_entry * rev,unsigned long * rmap,long pte_index,int realmode)81*4882a593Smuzhiyun void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
82*4882a593Smuzhiyun unsigned long *rmap, long pte_index, int realmode)
83*4882a593Smuzhiyun {
84*4882a593Smuzhiyun struct revmap_entry *head, *tail;
85*4882a593Smuzhiyun unsigned long i;
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun if (*rmap & KVMPPC_RMAP_PRESENT) {
88*4882a593Smuzhiyun i = *rmap & KVMPPC_RMAP_INDEX;
89*4882a593Smuzhiyun head = &kvm->arch.hpt.rev[i];
90*4882a593Smuzhiyun if (realmode)
91*4882a593Smuzhiyun head = real_vmalloc_addr(head);
92*4882a593Smuzhiyun tail = &kvm->arch.hpt.rev[head->back];
93*4882a593Smuzhiyun if (realmode)
94*4882a593Smuzhiyun tail = real_vmalloc_addr(tail);
95*4882a593Smuzhiyun rev->forw = i;
96*4882a593Smuzhiyun rev->back = head->back;
97*4882a593Smuzhiyun tail->forw = pte_index;
98*4882a593Smuzhiyun head->back = pte_index;
99*4882a593Smuzhiyun } else {
100*4882a593Smuzhiyun rev->forw = rev->back = pte_index;
101*4882a593Smuzhiyun *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
102*4882a593Smuzhiyun pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT;
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun unlock_rmap(rmap);
105*4882a593Smuzhiyun }
106*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun /* Update the dirty bitmap of a memslot */
kvmppc_update_dirty_map(const struct kvm_memory_slot * memslot,unsigned long gfn,unsigned long psize)109*4882a593Smuzhiyun void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
110*4882a593Smuzhiyun unsigned long gfn, unsigned long psize)
111*4882a593Smuzhiyun {
112*4882a593Smuzhiyun unsigned long npages;
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun if (!psize || !memslot->dirty_bitmap)
115*4882a593Smuzhiyun return;
116*4882a593Smuzhiyun npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE;
117*4882a593Smuzhiyun gfn -= memslot->base_gfn;
118*4882a593Smuzhiyun set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages);
119*4882a593Smuzhiyun }
120*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map);
121*4882a593Smuzhiyun
kvmppc_set_dirty_from_hpte(struct kvm * kvm,unsigned long hpte_v,unsigned long hpte_gr)122*4882a593Smuzhiyun static void kvmppc_set_dirty_from_hpte(struct kvm *kvm,
123*4882a593Smuzhiyun unsigned long hpte_v, unsigned long hpte_gr)
124*4882a593Smuzhiyun {
125*4882a593Smuzhiyun struct kvm_memory_slot *memslot;
126*4882a593Smuzhiyun unsigned long gfn;
127*4882a593Smuzhiyun unsigned long psize;
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun psize = kvmppc_actual_pgsz(hpte_v, hpte_gr);
130*4882a593Smuzhiyun gfn = hpte_rpn(hpte_gr, psize);
131*4882a593Smuzhiyun memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
132*4882a593Smuzhiyun if (memslot && memslot->dirty_bitmap)
133*4882a593Smuzhiyun kvmppc_update_dirty_map(memslot, gfn, psize);
134*4882a593Smuzhiyun }
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun /* Returns a pointer to the revmap entry for the page mapped by a HPTE */
revmap_for_hpte(struct kvm * kvm,unsigned long hpte_v,unsigned long hpte_gr,struct kvm_memory_slot ** memslotp,unsigned long * gfnp)137*4882a593Smuzhiyun static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
138*4882a593Smuzhiyun unsigned long hpte_gr,
139*4882a593Smuzhiyun struct kvm_memory_slot **memslotp,
140*4882a593Smuzhiyun unsigned long *gfnp)
141*4882a593Smuzhiyun {
142*4882a593Smuzhiyun struct kvm_memory_slot *memslot;
143*4882a593Smuzhiyun unsigned long *rmap;
144*4882a593Smuzhiyun unsigned long gfn;
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun gfn = hpte_rpn(hpte_gr, kvmppc_actual_pgsz(hpte_v, hpte_gr));
147*4882a593Smuzhiyun memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
148*4882a593Smuzhiyun if (memslotp)
149*4882a593Smuzhiyun *memslotp = memslot;
150*4882a593Smuzhiyun if (gfnp)
151*4882a593Smuzhiyun *gfnp = gfn;
152*4882a593Smuzhiyun if (!memslot)
153*4882a593Smuzhiyun return NULL;
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
156*4882a593Smuzhiyun return rmap;
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun /* Remove this HPTE from the chain for a real page */
remove_revmap_chain(struct kvm * kvm,long pte_index,struct revmap_entry * rev,unsigned long hpte_v,unsigned long hpte_r)160*4882a593Smuzhiyun static void remove_revmap_chain(struct kvm *kvm, long pte_index,
161*4882a593Smuzhiyun struct revmap_entry *rev,
162*4882a593Smuzhiyun unsigned long hpte_v, unsigned long hpte_r)
163*4882a593Smuzhiyun {
164*4882a593Smuzhiyun struct revmap_entry *next, *prev;
165*4882a593Smuzhiyun unsigned long ptel, head;
166*4882a593Smuzhiyun unsigned long *rmap;
167*4882a593Smuzhiyun unsigned long rcbits;
168*4882a593Smuzhiyun struct kvm_memory_slot *memslot;
169*4882a593Smuzhiyun unsigned long gfn;
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
172*4882a593Smuzhiyun ptel = rev->guest_rpte |= rcbits;
173*4882a593Smuzhiyun rmap = revmap_for_hpte(kvm, hpte_v, ptel, &memslot, &gfn);
174*4882a593Smuzhiyun if (!rmap)
175*4882a593Smuzhiyun return;
176*4882a593Smuzhiyun lock_rmap(rmap);
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun head = *rmap & KVMPPC_RMAP_INDEX;
179*4882a593Smuzhiyun next = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->forw]);
180*4882a593Smuzhiyun prev = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->back]);
181*4882a593Smuzhiyun next->back = rev->back;
182*4882a593Smuzhiyun prev->forw = rev->forw;
183*4882a593Smuzhiyun if (head == pte_index) {
184*4882a593Smuzhiyun head = rev->forw;
185*4882a593Smuzhiyun if (head == pte_index)
186*4882a593Smuzhiyun *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
187*4882a593Smuzhiyun else
188*4882a593Smuzhiyun *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
189*4882a593Smuzhiyun }
190*4882a593Smuzhiyun *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
191*4882a593Smuzhiyun if (rcbits & HPTE_R_C)
192*4882a593Smuzhiyun kvmppc_update_dirty_map(memslot, gfn,
193*4882a593Smuzhiyun kvmppc_actual_pgsz(hpte_v, hpte_r));
194*4882a593Smuzhiyun unlock_rmap(rmap);
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun
kvmppc_do_h_enter(struct kvm * kvm,unsigned long flags,long pte_index,unsigned long pteh,unsigned long ptel,pgd_t * pgdir,bool realmode,unsigned long * pte_idx_ret)197*4882a593Smuzhiyun long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
198*4882a593Smuzhiyun long pte_index, unsigned long pteh, unsigned long ptel,
199*4882a593Smuzhiyun pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
200*4882a593Smuzhiyun {
201*4882a593Smuzhiyun unsigned long i, pa, gpa, gfn, psize;
202*4882a593Smuzhiyun unsigned long slot_fn, hva;
203*4882a593Smuzhiyun __be64 *hpte;
204*4882a593Smuzhiyun struct revmap_entry *rev;
205*4882a593Smuzhiyun unsigned long g_ptel;
206*4882a593Smuzhiyun struct kvm_memory_slot *memslot;
207*4882a593Smuzhiyun unsigned hpage_shift;
208*4882a593Smuzhiyun bool is_ci;
209*4882a593Smuzhiyun unsigned long *rmap;
210*4882a593Smuzhiyun pte_t *ptep;
211*4882a593Smuzhiyun unsigned int writing;
212*4882a593Smuzhiyun unsigned long mmu_seq;
213*4882a593Smuzhiyun unsigned long rcbits;
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun if (kvm_is_radix(kvm))
216*4882a593Smuzhiyun return H_FUNCTION;
217*4882a593Smuzhiyun psize = kvmppc_actual_pgsz(pteh, ptel);
218*4882a593Smuzhiyun if (!psize)
219*4882a593Smuzhiyun return H_PARAMETER;
220*4882a593Smuzhiyun writing = hpte_is_writable(ptel);
221*4882a593Smuzhiyun pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
222*4882a593Smuzhiyun ptel &= ~HPTE_GR_RESERVED;
223*4882a593Smuzhiyun g_ptel = ptel;
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun /* used later to detect if we might have been invalidated */
226*4882a593Smuzhiyun mmu_seq = kvm->mmu_notifier_seq;
227*4882a593Smuzhiyun smp_rmb();
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun /* Find the memslot (if any) for this address */
230*4882a593Smuzhiyun gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
231*4882a593Smuzhiyun gfn = gpa >> PAGE_SHIFT;
232*4882a593Smuzhiyun memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
233*4882a593Smuzhiyun pa = 0;
234*4882a593Smuzhiyun is_ci = false;
235*4882a593Smuzhiyun rmap = NULL;
236*4882a593Smuzhiyun if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
237*4882a593Smuzhiyun /* Emulated MMIO - mark this with key=31 */
238*4882a593Smuzhiyun pteh |= HPTE_V_ABSENT;
239*4882a593Smuzhiyun ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
240*4882a593Smuzhiyun goto do_insert;
241*4882a593Smuzhiyun }
242*4882a593Smuzhiyun
243*4882a593Smuzhiyun /* Check if the requested page fits entirely in the memslot. */
244*4882a593Smuzhiyun if (!slot_is_aligned(memslot, psize))
245*4882a593Smuzhiyun return H_PARAMETER;
246*4882a593Smuzhiyun slot_fn = gfn - memslot->base_gfn;
247*4882a593Smuzhiyun rmap = &memslot->arch.rmap[slot_fn];
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun /* Translate to host virtual address */
250*4882a593Smuzhiyun hva = __gfn_to_hva_memslot(memslot, gfn);
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
253*4882a593Smuzhiyun ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &hpage_shift);
254*4882a593Smuzhiyun if (ptep) {
255*4882a593Smuzhiyun pte_t pte;
256*4882a593Smuzhiyun unsigned int host_pte_size;
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun if (hpage_shift)
259*4882a593Smuzhiyun host_pte_size = 1ul << hpage_shift;
260*4882a593Smuzhiyun else
261*4882a593Smuzhiyun host_pte_size = PAGE_SIZE;
262*4882a593Smuzhiyun /*
263*4882a593Smuzhiyun * We should always find the guest page size
264*4882a593Smuzhiyun * to <= host page size, if host is using hugepage
265*4882a593Smuzhiyun */
266*4882a593Smuzhiyun if (host_pte_size < psize) {
267*4882a593Smuzhiyun arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
268*4882a593Smuzhiyun return H_PARAMETER;
269*4882a593Smuzhiyun }
270*4882a593Smuzhiyun pte = kvmppc_read_update_linux_pte(ptep, writing);
271*4882a593Smuzhiyun if (pte_present(pte) && !pte_protnone(pte)) {
272*4882a593Smuzhiyun if (writing && !__pte_write(pte))
273*4882a593Smuzhiyun /* make the actual HPTE be read-only */
274*4882a593Smuzhiyun ptel = hpte_make_readonly(ptel);
275*4882a593Smuzhiyun is_ci = pte_ci(pte);
276*4882a593Smuzhiyun pa = pte_pfn(pte) << PAGE_SHIFT;
277*4882a593Smuzhiyun pa |= hva & (host_pte_size - 1);
278*4882a593Smuzhiyun pa |= gpa & ~PAGE_MASK;
279*4882a593Smuzhiyun }
280*4882a593Smuzhiyun }
281*4882a593Smuzhiyun arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1);
284*4882a593Smuzhiyun ptel |= pa;
285*4882a593Smuzhiyun
286*4882a593Smuzhiyun if (pa)
287*4882a593Smuzhiyun pteh |= HPTE_V_VALID;
288*4882a593Smuzhiyun else {
289*4882a593Smuzhiyun pteh |= HPTE_V_ABSENT;
290*4882a593Smuzhiyun ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
291*4882a593Smuzhiyun }
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun /*If we had host pte mapping then Check WIMG */
294*4882a593Smuzhiyun if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
295*4882a593Smuzhiyun if (is_ci)
296*4882a593Smuzhiyun return H_PARAMETER;
297*4882a593Smuzhiyun /*
298*4882a593Smuzhiyun * Allow guest to map emulated device memory as
299*4882a593Smuzhiyun * uncacheable, but actually make it cacheable.
300*4882a593Smuzhiyun */
301*4882a593Smuzhiyun ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
302*4882a593Smuzhiyun ptel |= HPTE_R_M;
303*4882a593Smuzhiyun }
304*4882a593Smuzhiyun
305*4882a593Smuzhiyun /* Find and lock the HPTEG slot to use */
306*4882a593Smuzhiyun do_insert:
307*4882a593Smuzhiyun if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
308*4882a593Smuzhiyun return H_PARAMETER;
309*4882a593Smuzhiyun if (likely((flags & H_EXACT) == 0)) {
310*4882a593Smuzhiyun pte_index &= ~7UL;
311*4882a593Smuzhiyun hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
312*4882a593Smuzhiyun for (i = 0; i < 8; ++i) {
313*4882a593Smuzhiyun if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 &&
314*4882a593Smuzhiyun try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
315*4882a593Smuzhiyun HPTE_V_ABSENT))
316*4882a593Smuzhiyun break;
317*4882a593Smuzhiyun hpte += 2;
318*4882a593Smuzhiyun }
319*4882a593Smuzhiyun if (i == 8) {
320*4882a593Smuzhiyun /*
321*4882a593Smuzhiyun * Since try_lock_hpte doesn't retry (not even stdcx.
322*4882a593Smuzhiyun * failures), it could be that there is a free slot
323*4882a593Smuzhiyun * but we transiently failed to lock it. Try again,
324*4882a593Smuzhiyun * actually locking each slot and checking it.
325*4882a593Smuzhiyun */
326*4882a593Smuzhiyun hpte -= 16;
327*4882a593Smuzhiyun for (i = 0; i < 8; ++i) {
328*4882a593Smuzhiyun u64 pte;
329*4882a593Smuzhiyun while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
330*4882a593Smuzhiyun cpu_relax();
331*4882a593Smuzhiyun pte = be64_to_cpu(hpte[0]);
332*4882a593Smuzhiyun if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
333*4882a593Smuzhiyun break;
334*4882a593Smuzhiyun __unlock_hpte(hpte, pte);
335*4882a593Smuzhiyun hpte += 2;
336*4882a593Smuzhiyun }
337*4882a593Smuzhiyun if (i == 8)
338*4882a593Smuzhiyun return H_PTEG_FULL;
339*4882a593Smuzhiyun }
340*4882a593Smuzhiyun pte_index += i;
341*4882a593Smuzhiyun } else {
342*4882a593Smuzhiyun hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
343*4882a593Smuzhiyun if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
344*4882a593Smuzhiyun HPTE_V_ABSENT)) {
345*4882a593Smuzhiyun /* Lock the slot and check again */
346*4882a593Smuzhiyun u64 pte;
347*4882a593Smuzhiyun
348*4882a593Smuzhiyun while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
349*4882a593Smuzhiyun cpu_relax();
350*4882a593Smuzhiyun pte = be64_to_cpu(hpte[0]);
351*4882a593Smuzhiyun if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
352*4882a593Smuzhiyun __unlock_hpte(hpte, pte);
353*4882a593Smuzhiyun return H_PTEG_FULL;
354*4882a593Smuzhiyun }
355*4882a593Smuzhiyun }
356*4882a593Smuzhiyun }
357*4882a593Smuzhiyun
358*4882a593Smuzhiyun /* Save away the guest's idea of the second HPTE dword */
359*4882a593Smuzhiyun rev = &kvm->arch.hpt.rev[pte_index];
360*4882a593Smuzhiyun if (realmode)
361*4882a593Smuzhiyun rev = real_vmalloc_addr(rev);
362*4882a593Smuzhiyun if (rev) {
363*4882a593Smuzhiyun rev->guest_rpte = g_ptel;
364*4882a593Smuzhiyun note_hpte_modification(kvm, rev);
365*4882a593Smuzhiyun }
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun /* Link HPTE into reverse-map chain */
368*4882a593Smuzhiyun if (pteh & HPTE_V_VALID) {
369*4882a593Smuzhiyun if (realmode)
370*4882a593Smuzhiyun rmap = real_vmalloc_addr(rmap);
371*4882a593Smuzhiyun lock_rmap(rmap);
372*4882a593Smuzhiyun /* Check for pending invalidations under the rmap chain lock */
373*4882a593Smuzhiyun if (mmu_notifier_retry(kvm, mmu_seq)) {
374*4882a593Smuzhiyun /* inval in progress, write a non-present HPTE */
375*4882a593Smuzhiyun pteh |= HPTE_V_ABSENT;
376*4882a593Smuzhiyun pteh &= ~HPTE_V_VALID;
377*4882a593Smuzhiyun ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
378*4882a593Smuzhiyun unlock_rmap(rmap);
379*4882a593Smuzhiyun } else {
380*4882a593Smuzhiyun kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
381*4882a593Smuzhiyun realmode);
382*4882a593Smuzhiyun /* Only set R/C in real HPTE if already set in *rmap */
383*4882a593Smuzhiyun rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
384*4882a593Smuzhiyun ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
385*4882a593Smuzhiyun }
386*4882a593Smuzhiyun }
387*4882a593Smuzhiyun
388*4882a593Smuzhiyun /* Convert to new format on P9 */
389*4882a593Smuzhiyun if (cpu_has_feature(CPU_FTR_ARCH_300)) {
390*4882a593Smuzhiyun ptel = hpte_old_to_new_r(pteh, ptel);
391*4882a593Smuzhiyun pteh = hpte_old_to_new_v(pteh);
392*4882a593Smuzhiyun }
393*4882a593Smuzhiyun hpte[1] = cpu_to_be64(ptel);
394*4882a593Smuzhiyun
395*4882a593Smuzhiyun /* Write the first HPTE dword, unlocking the HPTE and making it valid */
396*4882a593Smuzhiyun eieio();
397*4882a593Smuzhiyun __unlock_hpte(hpte, pteh);
398*4882a593Smuzhiyun asm volatile("ptesync" : : : "memory");
399*4882a593Smuzhiyun
400*4882a593Smuzhiyun *pte_idx_ret = pte_index;
401*4882a593Smuzhiyun return H_SUCCESS;
402*4882a593Smuzhiyun }
403*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
404*4882a593Smuzhiyun
kvmppc_h_enter(struct kvm_vcpu * vcpu,unsigned long flags,long pte_index,unsigned long pteh,unsigned long ptel)405*4882a593Smuzhiyun long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
406*4882a593Smuzhiyun long pte_index, unsigned long pteh, unsigned long ptel)
407*4882a593Smuzhiyun {
408*4882a593Smuzhiyun return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
409*4882a593Smuzhiyun vcpu->arch.pgdir, true,
410*4882a593Smuzhiyun &vcpu->arch.regs.gpr[4]);
411*4882a593Smuzhiyun }
412*4882a593Smuzhiyun
413*4882a593Smuzhiyun #ifdef __BIG_ENDIAN__
414*4882a593Smuzhiyun #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
415*4882a593Smuzhiyun #else
416*4882a593Smuzhiyun #define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
417*4882a593Smuzhiyun #endif
418*4882a593Smuzhiyun
is_mmio_hpte(unsigned long v,unsigned long r)419*4882a593Smuzhiyun static inline int is_mmio_hpte(unsigned long v, unsigned long r)
420*4882a593Smuzhiyun {
421*4882a593Smuzhiyun return ((v & HPTE_V_ABSENT) &&
422*4882a593Smuzhiyun (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
423*4882a593Smuzhiyun (HPTE_R_KEY_HI | HPTE_R_KEY_LO));
424*4882a593Smuzhiyun }
425*4882a593Smuzhiyun
fixup_tlbie_lpid(unsigned long rb_value,unsigned long lpid)426*4882a593Smuzhiyun static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid)
427*4882a593Smuzhiyun {
428*4882a593Smuzhiyun
429*4882a593Smuzhiyun if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
430*4882a593Smuzhiyun /* Radix flush for a hash guest */
431*4882a593Smuzhiyun
432*4882a593Smuzhiyun unsigned long rb,rs,prs,r,ric;
433*4882a593Smuzhiyun
434*4882a593Smuzhiyun rb = PPC_BIT(52); /* IS = 2 */
435*4882a593Smuzhiyun rs = 0; /* lpid = 0 */
436*4882a593Smuzhiyun prs = 0; /* partition scoped */
437*4882a593Smuzhiyun r = 1; /* radix format */
438*4882a593Smuzhiyun ric = 0; /* RIC_FLSUH_TLB */
439*4882a593Smuzhiyun
440*4882a593Smuzhiyun /*
441*4882a593Smuzhiyun * Need the extra ptesync to make sure we don't
442*4882a593Smuzhiyun * re-order the tlbie
443*4882a593Smuzhiyun */
444*4882a593Smuzhiyun asm volatile("ptesync": : :"memory");
445*4882a593Smuzhiyun asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
446*4882a593Smuzhiyun : : "r"(rb), "i"(r), "i"(prs),
447*4882a593Smuzhiyun "i"(ric), "r"(rs) : "memory");
448*4882a593Smuzhiyun }
449*4882a593Smuzhiyun
450*4882a593Smuzhiyun if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
451*4882a593Smuzhiyun asm volatile("ptesync": : :"memory");
452*4882a593Smuzhiyun asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
453*4882a593Smuzhiyun "r" (rb_value), "r" (lpid));
454*4882a593Smuzhiyun }
455*4882a593Smuzhiyun }
456*4882a593Smuzhiyun
do_tlbies(struct kvm * kvm,unsigned long * rbvalues,long npages,int global,bool need_sync)457*4882a593Smuzhiyun static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
458*4882a593Smuzhiyun long npages, int global, bool need_sync)
459*4882a593Smuzhiyun {
460*4882a593Smuzhiyun long i;
461*4882a593Smuzhiyun
462*4882a593Smuzhiyun /*
463*4882a593Smuzhiyun * We use the POWER9 5-operand versions of tlbie and tlbiel here.
464*4882a593Smuzhiyun * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores
465*4882a593Smuzhiyun * the RS field, this is backwards-compatible with P7 and P8.
466*4882a593Smuzhiyun */
467*4882a593Smuzhiyun if (global) {
468*4882a593Smuzhiyun if (need_sync)
469*4882a593Smuzhiyun asm volatile("ptesync" : : : "memory");
470*4882a593Smuzhiyun for (i = 0; i < npages; ++i) {
471*4882a593Smuzhiyun asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
472*4882a593Smuzhiyun "r" (rbvalues[i]), "r" (kvm->arch.lpid));
473*4882a593Smuzhiyun }
474*4882a593Smuzhiyun
475*4882a593Smuzhiyun fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid);
476*4882a593Smuzhiyun asm volatile("eieio; tlbsync; ptesync" : : : "memory");
477*4882a593Smuzhiyun } else {
478*4882a593Smuzhiyun if (need_sync)
479*4882a593Smuzhiyun asm volatile("ptesync" : : : "memory");
480*4882a593Smuzhiyun for (i = 0; i < npages; ++i) {
481*4882a593Smuzhiyun asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : :
482*4882a593Smuzhiyun "r" (rbvalues[i]), "r" (0));
483*4882a593Smuzhiyun }
484*4882a593Smuzhiyun asm volatile("ptesync" : : : "memory");
485*4882a593Smuzhiyun }
486*4882a593Smuzhiyun }
487*4882a593Smuzhiyun
kvmppc_do_h_remove(struct kvm * kvm,unsigned long flags,unsigned long pte_index,unsigned long avpn,unsigned long * hpret)488*4882a593Smuzhiyun long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
489*4882a593Smuzhiyun unsigned long pte_index, unsigned long avpn,
490*4882a593Smuzhiyun unsigned long *hpret)
491*4882a593Smuzhiyun {
492*4882a593Smuzhiyun __be64 *hpte;
493*4882a593Smuzhiyun unsigned long v, r, rb;
494*4882a593Smuzhiyun struct revmap_entry *rev;
495*4882a593Smuzhiyun u64 pte, orig_pte, pte_r;
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun if (kvm_is_radix(kvm))
498*4882a593Smuzhiyun return H_FUNCTION;
499*4882a593Smuzhiyun if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
500*4882a593Smuzhiyun return H_PARAMETER;
501*4882a593Smuzhiyun hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
502*4882a593Smuzhiyun while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
503*4882a593Smuzhiyun cpu_relax();
504*4882a593Smuzhiyun pte = orig_pte = be64_to_cpu(hpte[0]);
505*4882a593Smuzhiyun pte_r = be64_to_cpu(hpte[1]);
506*4882a593Smuzhiyun if (cpu_has_feature(CPU_FTR_ARCH_300)) {
507*4882a593Smuzhiyun pte = hpte_new_to_old_v(pte, pte_r);
508*4882a593Smuzhiyun pte_r = hpte_new_to_old_r(pte_r);
509*4882a593Smuzhiyun }
510*4882a593Smuzhiyun if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
511*4882a593Smuzhiyun ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
512*4882a593Smuzhiyun ((flags & H_ANDCOND) && (pte & avpn) != 0)) {
513*4882a593Smuzhiyun __unlock_hpte(hpte, orig_pte);
514*4882a593Smuzhiyun return H_NOT_FOUND;
515*4882a593Smuzhiyun }
516*4882a593Smuzhiyun
517*4882a593Smuzhiyun rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
518*4882a593Smuzhiyun v = pte & ~HPTE_V_HVLOCK;
519*4882a593Smuzhiyun if (v & HPTE_V_VALID) {
520*4882a593Smuzhiyun hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
521*4882a593Smuzhiyun rb = compute_tlbie_rb(v, pte_r, pte_index);
522*4882a593Smuzhiyun do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
523*4882a593Smuzhiyun /*
524*4882a593Smuzhiyun * The reference (R) and change (C) bits in a HPT
525*4882a593Smuzhiyun * entry can be set by hardware at any time up until
526*4882a593Smuzhiyun * the HPTE is invalidated and the TLB invalidation
527*4882a593Smuzhiyun * sequence has completed. This means that when
528*4882a593Smuzhiyun * removing a HPTE, we need to re-read the HPTE after
529*4882a593Smuzhiyun * the invalidation sequence has completed in order to
530*4882a593Smuzhiyun * obtain reliable values of R and C.
531*4882a593Smuzhiyun */
532*4882a593Smuzhiyun remove_revmap_chain(kvm, pte_index, rev, v,
533*4882a593Smuzhiyun be64_to_cpu(hpte[1]));
534*4882a593Smuzhiyun }
535*4882a593Smuzhiyun r = rev->guest_rpte & ~HPTE_GR_RESERVED;
536*4882a593Smuzhiyun note_hpte_modification(kvm, rev);
537*4882a593Smuzhiyun unlock_hpte(hpte, 0);
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun if (is_mmio_hpte(v, pte_r))
540*4882a593Smuzhiyun atomic64_inc(&kvm->arch.mmio_update);
541*4882a593Smuzhiyun
542*4882a593Smuzhiyun if (v & HPTE_V_ABSENT)
543*4882a593Smuzhiyun v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID;
544*4882a593Smuzhiyun hpret[0] = v;
545*4882a593Smuzhiyun hpret[1] = r;
546*4882a593Smuzhiyun return H_SUCCESS;
547*4882a593Smuzhiyun }
548*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
549*4882a593Smuzhiyun
kvmppc_h_remove(struct kvm_vcpu * vcpu,unsigned long flags,unsigned long pte_index,unsigned long avpn)550*4882a593Smuzhiyun long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
551*4882a593Smuzhiyun unsigned long pte_index, unsigned long avpn)
552*4882a593Smuzhiyun {
553*4882a593Smuzhiyun return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
554*4882a593Smuzhiyun &vcpu->arch.regs.gpr[4]);
555*4882a593Smuzhiyun }
556*4882a593Smuzhiyun
kvmppc_h_bulk_remove(struct kvm_vcpu * vcpu)557*4882a593Smuzhiyun long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
558*4882a593Smuzhiyun {
559*4882a593Smuzhiyun struct kvm *kvm = vcpu->kvm;
560*4882a593Smuzhiyun unsigned long *args = &vcpu->arch.regs.gpr[4];
561*4882a593Smuzhiyun __be64 *hp, *hptes[4];
562*4882a593Smuzhiyun unsigned long tlbrb[4];
563*4882a593Smuzhiyun long int i, j, k, n, found, indexes[4];
564*4882a593Smuzhiyun unsigned long flags, req, pte_index, rcbits;
565*4882a593Smuzhiyun int global;
566*4882a593Smuzhiyun long int ret = H_SUCCESS;
567*4882a593Smuzhiyun struct revmap_entry *rev, *revs[4];
568*4882a593Smuzhiyun u64 hp0, hp1;
569*4882a593Smuzhiyun
570*4882a593Smuzhiyun if (kvm_is_radix(kvm))
571*4882a593Smuzhiyun return H_FUNCTION;
572*4882a593Smuzhiyun global = global_invalidates(kvm);
573*4882a593Smuzhiyun for (i = 0; i < 4 && ret == H_SUCCESS; ) {
574*4882a593Smuzhiyun n = 0;
575*4882a593Smuzhiyun for (; i < 4; ++i) {
576*4882a593Smuzhiyun j = i * 2;
577*4882a593Smuzhiyun pte_index = args[j];
578*4882a593Smuzhiyun flags = pte_index >> 56;
579*4882a593Smuzhiyun pte_index &= ((1ul << 56) - 1);
580*4882a593Smuzhiyun req = flags >> 6;
581*4882a593Smuzhiyun flags &= 3;
582*4882a593Smuzhiyun if (req == 3) { /* no more requests */
583*4882a593Smuzhiyun i = 4;
584*4882a593Smuzhiyun break;
585*4882a593Smuzhiyun }
586*4882a593Smuzhiyun if (req != 1 || flags == 3 ||
587*4882a593Smuzhiyun pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) {
588*4882a593Smuzhiyun /* parameter error */
589*4882a593Smuzhiyun args[j] = ((0xa0 | flags) << 56) + pte_index;
590*4882a593Smuzhiyun ret = H_PARAMETER;
591*4882a593Smuzhiyun break;
592*4882a593Smuzhiyun }
593*4882a593Smuzhiyun hp = (__be64 *) (kvm->arch.hpt.virt + (pte_index << 4));
594*4882a593Smuzhiyun /* to avoid deadlock, don't spin except for first */
595*4882a593Smuzhiyun if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
596*4882a593Smuzhiyun if (n)
597*4882a593Smuzhiyun break;
598*4882a593Smuzhiyun while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
599*4882a593Smuzhiyun cpu_relax();
600*4882a593Smuzhiyun }
601*4882a593Smuzhiyun found = 0;
602*4882a593Smuzhiyun hp0 = be64_to_cpu(hp[0]);
603*4882a593Smuzhiyun hp1 = be64_to_cpu(hp[1]);
604*4882a593Smuzhiyun if (cpu_has_feature(CPU_FTR_ARCH_300)) {
605*4882a593Smuzhiyun hp0 = hpte_new_to_old_v(hp0, hp1);
606*4882a593Smuzhiyun hp1 = hpte_new_to_old_r(hp1);
607*4882a593Smuzhiyun }
608*4882a593Smuzhiyun if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) {
609*4882a593Smuzhiyun switch (flags & 3) {
610*4882a593Smuzhiyun case 0: /* absolute */
611*4882a593Smuzhiyun found = 1;
612*4882a593Smuzhiyun break;
613*4882a593Smuzhiyun case 1: /* andcond */
614*4882a593Smuzhiyun if (!(hp0 & args[j + 1]))
615*4882a593Smuzhiyun found = 1;
616*4882a593Smuzhiyun break;
617*4882a593Smuzhiyun case 2: /* AVPN */
618*4882a593Smuzhiyun if ((hp0 & ~0x7fUL) == args[j + 1])
619*4882a593Smuzhiyun found = 1;
620*4882a593Smuzhiyun break;
621*4882a593Smuzhiyun }
622*4882a593Smuzhiyun }
623*4882a593Smuzhiyun if (!found) {
624*4882a593Smuzhiyun hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
625*4882a593Smuzhiyun args[j] = ((0x90 | flags) << 56) + pte_index;
626*4882a593Smuzhiyun continue;
627*4882a593Smuzhiyun }
628*4882a593Smuzhiyun
629*4882a593Smuzhiyun args[j] = ((0x80 | flags) << 56) + pte_index;
630*4882a593Smuzhiyun rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
631*4882a593Smuzhiyun note_hpte_modification(kvm, rev);
632*4882a593Smuzhiyun
633*4882a593Smuzhiyun if (!(hp0 & HPTE_V_VALID)) {
634*4882a593Smuzhiyun /* insert R and C bits from PTE */
635*4882a593Smuzhiyun rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
636*4882a593Smuzhiyun args[j] |= rcbits << (56 - 5);
637*4882a593Smuzhiyun hp[0] = 0;
638*4882a593Smuzhiyun if (is_mmio_hpte(hp0, hp1))
639*4882a593Smuzhiyun atomic64_inc(&kvm->arch.mmio_update);
640*4882a593Smuzhiyun continue;
641*4882a593Smuzhiyun }
642*4882a593Smuzhiyun
643*4882a593Smuzhiyun /* leave it locked */
644*4882a593Smuzhiyun hp[0] &= ~cpu_to_be64(HPTE_V_VALID);
645*4882a593Smuzhiyun tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index);
646*4882a593Smuzhiyun indexes[n] = j;
647*4882a593Smuzhiyun hptes[n] = hp;
648*4882a593Smuzhiyun revs[n] = rev;
649*4882a593Smuzhiyun ++n;
650*4882a593Smuzhiyun }
651*4882a593Smuzhiyun
652*4882a593Smuzhiyun if (!n)
653*4882a593Smuzhiyun break;
654*4882a593Smuzhiyun
655*4882a593Smuzhiyun /* Now that we've collected a batch, do the tlbies */
656*4882a593Smuzhiyun do_tlbies(kvm, tlbrb, n, global, true);
657*4882a593Smuzhiyun
658*4882a593Smuzhiyun /* Read PTE low words after tlbie to get final R/C values */
659*4882a593Smuzhiyun for (k = 0; k < n; ++k) {
660*4882a593Smuzhiyun j = indexes[k];
661*4882a593Smuzhiyun pte_index = args[j] & ((1ul << 56) - 1);
662*4882a593Smuzhiyun hp = hptes[k];
663*4882a593Smuzhiyun rev = revs[k];
664*4882a593Smuzhiyun remove_revmap_chain(kvm, pte_index, rev,
665*4882a593Smuzhiyun be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
666*4882a593Smuzhiyun rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
667*4882a593Smuzhiyun args[j] |= rcbits << (56 - 5);
668*4882a593Smuzhiyun __unlock_hpte(hp, 0);
669*4882a593Smuzhiyun }
670*4882a593Smuzhiyun }
671*4882a593Smuzhiyun
672*4882a593Smuzhiyun return ret;
673*4882a593Smuzhiyun }
674*4882a593Smuzhiyun
kvmppc_h_protect(struct kvm_vcpu * vcpu,unsigned long flags,unsigned long pte_index,unsigned long avpn,unsigned long va)675*4882a593Smuzhiyun long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
676*4882a593Smuzhiyun unsigned long pte_index, unsigned long avpn,
677*4882a593Smuzhiyun unsigned long va)
678*4882a593Smuzhiyun {
679*4882a593Smuzhiyun struct kvm *kvm = vcpu->kvm;
680*4882a593Smuzhiyun __be64 *hpte;
681*4882a593Smuzhiyun struct revmap_entry *rev;
682*4882a593Smuzhiyun unsigned long v, r, rb, mask, bits;
683*4882a593Smuzhiyun u64 pte_v, pte_r;
684*4882a593Smuzhiyun
685*4882a593Smuzhiyun if (kvm_is_radix(kvm))
686*4882a593Smuzhiyun return H_FUNCTION;
687*4882a593Smuzhiyun if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
688*4882a593Smuzhiyun return H_PARAMETER;
689*4882a593Smuzhiyun
690*4882a593Smuzhiyun hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
691*4882a593Smuzhiyun while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
692*4882a593Smuzhiyun cpu_relax();
693*4882a593Smuzhiyun v = pte_v = be64_to_cpu(hpte[0]);
694*4882a593Smuzhiyun if (cpu_has_feature(CPU_FTR_ARCH_300))
695*4882a593Smuzhiyun v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1]));
696*4882a593Smuzhiyun if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
697*4882a593Smuzhiyun ((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) {
698*4882a593Smuzhiyun __unlock_hpte(hpte, pte_v);
699*4882a593Smuzhiyun return H_NOT_FOUND;
700*4882a593Smuzhiyun }
701*4882a593Smuzhiyun
702*4882a593Smuzhiyun pte_r = be64_to_cpu(hpte[1]);
703*4882a593Smuzhiyun bits = (flags << 55) & HPTE_R_PP0;
704*4882a593Smuzhiyun bits |= (flags << 48) & HPTE_R_KEY_HI;
705*4882a593Smuzhiyun bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
706*4882a593Smuzhiyun
707*4882a593Smuzhiyun /* Update guest view of 2nd HPTE dword */
708*4882a593Smuzhiyun mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
709*4882a593Smuzhiyun HPTE_R_KEY_HI | HPTE_R_KEY_LO;
710*4882a593Smuzhiyun rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
711*4882a593Smuzhiyun if (rev) {
712*4882a593Smuzhiyun r = (rev->guest_rpte & ~mask) | bits;
713*4882a593Smuzhiyun rev->guest_rpte = r;
714*4882a593Smuzhiyun note_hpte_modification(kvm, rev);
715*4882a593Smuzhiyun }
716*4882a593Smuzhiyun
717*4882a593Smuzhiyun /* Update HPTE */
718*4882a593Smuzhiyun if (v & HPTE_V_VALID) {
719*4882a593Smuzhiyun /*
720*4882a593Smuzhiyun * If the page is valid, don't let it transition from
721*4882a593Smuzhiyun * readonly to writable. If it should be writable, we'll
722*4882a593Smuzhiyun * take a trap and let the page fault code sort it out.
723*4882a593Smuzhiyun */
724*4882a593Smuzhiyun r = (pte_r & ~mask) | bits;
725*4882a593Smuzhiyun if (hpte_is_writable(r) && !hpte_is_writable(pte_r))
726*4882a593Smuzhiyun r = hpte_make_readonly(r);
727*4882a593Smuzhiyun /* If the PTE is changing, invalidate it first */
728*4882a593Smuzhiyun if (r != pte_r) {
729*4882a593Smuzhiyun rb = compute_tlbie_rb(v, r, pte_index);
730*4882a593Smuzhiyun hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) |
731*4882a593Smuzhiyun HPTE_V_ABSENT);
732*4882a593Smuzhiyun do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
733*4882a593Smuzhiyun /* Don't lose R/C bit updates done by hardware */
734*4882a593Smuzhiyun r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C);
735*4882a593Smuzhiyun hpte[1] = cpu_to_be64(r);
736*4882a593Smuzhiyun }
737*4882a593Smuzhiyun }
738*4882a593Smuzhiyun unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK);
739*4882a593Smuzhiyun asm volatile("ptesync" : : : "memory");
740*4882a593Smuzhiyun if (is_mmio_hpte(v, pte_r))
741*4882a593Smuzhiyun atomic64_inc(&kvm->arch.mmio_update);
742*4882a593Smuzhiyun
743*4882a593Smuzhiyun return H_SUCCESS;
744*4882a593Smuzhiyun }
745*4882a593Smuzhiyun
kvmppc_h_read(struct kvm_vcpu * vcpu,unsigned long flags,unsigned long pte_index)746*4882a593Smuzhiyun long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
747*4882a593Smuzhiyun unsigned long pte_index)
748*4882a593Smuzhiyun {
749*4882a593Smuzhiyun struct kvm *kvm = vcpu->kvm;
750*4882a593Smuzhiyun __be64 *hpte;
751*4882a593Smuzhiyun unsigned long v, r;
752*4882a593Smuzhiyun int i, n = 1;
753*4882a593Smuzhiyun struct revmap_entry *rev = NULL;
754*4882a593Smuzhiyun
755*4882a593Smuzhiyun if (kvm_is_radix(kvm))
756*4882a593Smuzhiyun return H_FUNCTION;
757*4882a593Smuzhiyun if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
758*4882a593Smuzhiyun return H_PARAMETER;
759*4882a593Smuzhiyun if (flags & H_READ_4) {
760*4882a593Smuzhiyun pte_index &= ~3;
761*4882a593Smuzhiyun n = 4;
762*4882a593Smuzhiyun }
763*4882a593Smuzhiyun rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
764*4882a593Smuzhiyun for (i = 0; i < n; ++i, ++pte_index) {
765*4882a593Smuzhiyun hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
766*4882a593Smuzhiyun v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
767*4882a593Smuzhiyun r = be64_to_cpu(hpte[1]);
768*4882a593Smuzhiyun if (cpu_has_feature(CPU_FTR_ARCH_300)) {
769*4882a593Smuzhiyun v = hpte_new_to_old_v(v, r);
770*4882a593Smuzhiyun r = hpte_new_to_old_r(r);
771*4882a593Smuzhiyun }
772*4882a593Smuzhiyun if (v & HPTE_V_ABSENT) {
773*4882a593Smuzhiyun v &= ~HPTE_V_ABSENT;
774*4882a593Smuzhiyun v |= HPTE_V_VALID;
775*4882a593Smuzhiyun }
776*4882a593Smuzhiyun if (v & HPTE_V_VALID) {
777*4882a593Smuzhiyun r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
778*4882a593Smuzhiyun r &= ~HPTE_GR_RESERVED;
779*4882a593Smuzhiyun }
780*4882a593Smuzhiyun vcpu->arch.regs.gpr[4 + i * 2] = v;
781*4882a593Smuzhiyun vcpu->arch.regs.gpr[5 + i * 2] = r;
782*4882a593Smuzhiyun }
783*4882a593Smuzhiyun return H_SUCCESS;
784*4882a593Smuzhiyun }
785*4882a593Smuzhiyun
kvmppc_h_clear_ref(struct kvm_vcpu * vcpu,unsigned long flags,unsigned long pte_index)786*4882a593Smuzhiyun long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
787*4882a593Smuzhiyun unsigned long pte_index)
788*4882a593Smuzhiyun {
789*4882a593Smuzhiyun struct kvm *kvm = vcpu->kvm;
790*4882a593Smuzhiyun __be64 *hpte;
791*4882a593Smuzhiyun unsigned long v, r, gr;
792*4882a593Smuzhiyun struct revmap_entry *rev;
793*4882a593Smuzhiyun unsigned long *rmap;
794*4882a593Smuzhiyun long ret = H_NOT_FOUND;
795*4882a593Smuzhiyun
796*4882a593Smuzhiyun if (kvm_is_radix(kvm))
797*4882a593Smuzhiyun return H_FUNCTION;
798*4882a593Smuzhiyun if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
799*4882a593Smuzhiyun return H_PARAMETER;
800*4882a593Smuzhiyun
801*4882a593Smuzhiyun rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
802*4882a593Smuzhiyun hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
803*4882a593Smuzhiyun while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
804*4882a593Smuzhiyun cpu_relax();
805*4882a593Smuzhiyun v = be64_to_cpu(hpte[0]);
806*4882a593Smuzhiyun r = be64_to_cpu(hpte[1]);
807*4882a593Smuzhiyun if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
808*4882a593Smuzhiyun goto out;
809*4882a593Smuzhiyun
810*4882a593Smuzhiyun gr = rev->guest_rpte;
811*4882a593Smuzhiyun if (rev->guest_rpte & HPTE_R_R) {
812*4882a593Smuzhiyun rev->guest_rpte &= ~HPTE_R_R;
813*4882a593Smuzhiyun note_hpte_modification(kvm, rev);
814*4882a593Smuzhiyun }
815*4882a593Smuzhiyun if (v & HPTE_V_VALID) {
816*4882a593Smuzhiyun gr |= r & (HPTE_R_R | HPTE_R_C);
817*4882a593Smuzhiyun if (r & HPTE_R_R) {
818*4882a593Smuzhiyun kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
819*4882a593Smuzhiyun rmap = revmap_for_hpte(kvm, v, gr, NULL, NULL);
820*4882a593Smuzhiyun if (rmap) {
821*4882a593Smuzhiyun lock_rmap(rmap);
822*4882a593Smuzhiyun *rmap |= KVMPPC_RMAP_REFERENCED;
823*4882a593Smuzhiyun unlock_rmap(rmap);
824*4882a593Smuzhiyun }
825*4882a593Smuzhiyun }
826*4882a593Smuzhiyun }
827*4882a593Smuzhiyun vcpu->arch.regs.gpr[4] = gr;
828*4882a593Smuzhiyun ret = H_SUCCESS;
829*4882a593Smuzhiyun out:
830*4882a593Smuzhiyun unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
831*4882a593Smuzhiyun return ret;
832*4882a593Smuzhiyun }
833*4882a593Smuzhiyun
kvmppc_h_clear_mod(struct kvm_vcpu * vcpu,unsigned long flags,unsigned long pte_index)834*4882a593Smuzhiyun long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
835*4882a593Smuzhiyun unsigned long pte_index)
836*4882a593Smuzhiyun {
837*4882a593Smuzhiyun struct kvm *kvm = vcpu->kvm;
838*4882a593Smuzhiyun __be64 *hpte;
839*4882a593Smuzhiyun unsigned long v, r, gr;
840*4882a593Smuzhiyun struct revmap_entry *rev;
841*4882a593Smuzhiyun long ret = H_NOT_FOUND;
842*4882a593Smuzhiyun
843*4882a593Smuzhiyun if (kvm_is_radix(kvm))
844*4882a593Smuzhiyun return H_FUNCTION;
845*4882a593Smuzhiyun if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
846*4882a593Smuzhiyun return H_PARAMETER;
847*4882a593Smuzhiyun
848*4882a593Smuzhiyun rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
849*4882a593Smuzhiyun hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
850*4882a593Smuzhiyun while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
851*4882a593Smuzhiyun cpu_relax();
852*4882a593Smuzhiyun v = be64_to_cpu(hpte[0]);
853*4882a593Smuzhiyun r = be64_to_cpu(hpte[1]);
854*4882a593Smuzhiyun if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
855*4882a593Smuzhiyun goto out;
856*4882a593Smuzhiyun
857*4882a593Smuzhiyun gr = rev->guest_rpte;
858*4882a593Smuzhiyun if (gr & HPTE_R_C) {
859*4882a593Smuzhiyun rev->guest_rpte &= ~HPTE_R_C;
860*4882a593Smuzhiyun note_hpte_modification(kvm, rev);
861*4882a593Smuzhiyun }
862*4882a593Smuzhiyun if (v & HPTE_V_VALID) {
863*4882a593Smuzhiyun /* need to make it temporarily absent so C is stable */
864*4882a593Smuzhiyun hpte[0] |= cpu_to_be64(HPTE_V_ABSENT);
865*4882a593Smuzhiyun kvmppc_invalidate_hpte(kvm, hpte, pte_index);
866*4882a593Smuzhiyun r = be64_to_cpu(hpte[1]);
867*4882a593Smuzhiyun gr |= r & (HPTE_R_R | HPTE_R_C);
868*4882a593Smuzhiyun if (r & HPTE_R_C) {
869*4882a593Smuzhiyun hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
870*4882a593Smuzhiyun eieio();
871*4882a593Smuzhiyun kvmppc_set_dirty_from_hpte(kvm, v, gr);
872*4882a593Smuzhiyun }
873*4882a593Smuzhiyun }
874*4882a593Smuzhiyun vcpu->arch.regs.gpr[4] = gr;
875*4882a593Smuzhiyun ret = H_SUCCESS;
876*4882a593Smuzhiyun out:
877*4882a593Smuzhiyun unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
878*4882a593Smuzhiyun return ret;
879*4882a593Smuzhiyun }
880*4882a593Smuzhiyun
kvmppc_get_hpa(struct kvm_vcpu * vcpu,unsigned long mmu_seq,unsigned long gpa,int writing,unsigned long * hpa,struct kvm_memory_slot ** memslot_p)881*4882a593Smuzhiyun static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
882*4882a593Smuzhiyun unsigned long gpa, int writing, unsigned long *hpa,
883*4882a593Smuzhiyun struct kvm_memory_slot **memslot_p)
884*4882a593Smuzhiyun {
885*4882a593Smuzhiyun struct kvm *kvm = vcpu->kvm;
886*4882a593Smuzhiyun struct kvm_memory_slot *memslot;
887*4882a593Smuzhiyun unsigned long gfn, hva, pa, psize = PAGE_SHIFT;
888*4882a593Smuzhiyun unsigned int shift;
889*4882a593Smuzhiyun pte_t *ptep, pte;
890*4882a593Smuzhiyun
891*4882a593Smuzhiyun /* Find the memslot for this address */
892*4882a593Smuzhiyun gfn = gpa >> PAGE_SHIFT;
893*4882a593Smuzhiyun memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
894*4882a593Smuzhiyun if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
895*4882a593Smuzhiyun return H_PARAMETER;
896*4882a593Smuzhiyun
897*4882a593Smuzhiyun /* Translate to host virtual address */
898*4882a593Smuzhiyun hva = __gfn_to_hva_memslot(memslot, gfn);
899*4882a593Smuzhiyun
900*4882a593Smuzhiyun /* Try to find the host pte for that virtual address */
901*4882a593Smuzhiyun ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
902*4882a593Smuzhiyun if (!ptep)
903*4882a593Smuzhiyun return H_TOO_HARD;
904*4882a593Smuzhiyun pte = kvmppc_read_update_linux_pte(ptep, writing);
905*4882a593Smuzhiyun if (!pte_present(pte))
906*4882a593Smuzhiyun return H_TOO_HARD;
907*4882a593Smuzhiyun
908*4882a593Smuzhiyun /* Convert to a physical address */
909*4882a593Smuzhiyun if (shift)
910*4882a593Smuzhiyun psize = 1UL << shift;
911*4882a593Smuzhiyun pa = pte_pfn(pte) << PAGE_SHIFT;
912*4882a593Smuzhiyun pa |= hva & (psize - 1);
913*4882a593Smuzhiyun pa |= gpa & ~PAGE_MASK;
914*4882a593Smuzhiyun
915*4882a593Smuzhiyun if (hpa)
916*4882a593Smuzhiyun *hpa = pa;
917*4882a593Smuzhiyun if (memslot_p)
918*4882a593Smuzhiyun *memslot_p = memslot;
919*4882a593Smuzhiyun
920*4882a593Smuzhiyun return H_SUCCESS;
921*4882a593Smuzhiyun }
922*4882a593Smuzhiyun
kvmppc_do_h_page_init_zero(struct kvm_vcpu * vcpu,unsigned long dest)923*4882a593Smuzhiyun static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu,
924*4882a593Smuzhiyun unsigned long dest)
925*4882a593Smuzhiyun {
926*4882a593Smuzhiyun struct kvm_memory_slot *memslot;
927*4882a593Smuzhiyun struct kvm *kvm = vcpu->kvm;
928*4882a593Smuzhiyun unsigned long pa, mmu_seq;
929*4882a593Smuzhiyun long ret = H_SUCCESS;
930*4882a593Smuzhiyun int i;
931*4882a593Smuzhiyun
932*4882a593Smuzhiyun /* Used later to detect if we might have been invalidated */
933*4882a593Smuzhiyun mmu_seq = kvm->mmu_notifier_seq;
934*4882a593Smuzhiyun smp_rmb();
935*4882a593Smuzhiyun
936*4882a593Smuzhiyun arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
937*4882a593Smuzhiyun
938*4882a593Smuzhiyun ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &pa, &memslot);
939*4882a593Smuzhiyun if (ret != H_SUCCESS)
940*4882a593Smuzhiyun goto out_unlock;
941*4882a593Smuzhiyun
942*4882a593Smuzhiyun /* Zero the page */
943*4882a593Smuzhiyun for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES)
944*4882a593Smuzhiyun dcbz((void *)pa);
945*4882a593Smuzhiyun kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
946*4882a593Smuzhiyun
947*4882a593Smuzhiyun out_unlock:
948*4882a593Smuzhiyun arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
949*4882a593Smuzhiyun return ret;
950*4882a593Smuzhiyun }
951*4882a593Smuzhiyun
kvmppc_do_h_page_init_copy(struct kvm_vcpu * vcpu,unsigned long dest,unsigned long src)952*4882a593Smuzhiyun static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu,
953*4882a593Smuzhiyun unsigned long dest, unsigned long src)
954*4882a593Smuzhiyun {
955*4882a593Smuzhiyun unsigned long dest_pa, src_pa, mmu_seq;
956*4882a593Smuzhiyun struct kvm_memory_slot *dest_memslot;
957*4882a593Smuzhiyun struct kvm *kvm = vcpu->kvm;
958*4882a593Smuzhiyun long ret = H_SUCCESS;
959*4882a593Smuzhiyun
960*4882a593Smuzhiyun /* Used later to detect if we might have been invalidated */
961*4882a593Smuzhiyun mmu_seq = kvm->mmu_notifier_seq;
962*4882a593Smuzhiyun smp_rmb();
963*4882a593Smuzhiyun
964*4882a593Smuzhiyun arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
965*4882a593Smuzhiyun ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &dest_pa, &dest_memslot);
966*4882a593Smuzhiyun if (ret != H_SUCCESS)
967*4882a593Smuzhiyun goto out_unlock;
968*4882a593Smuzhiyun
969*4882a593Smuzhiyun ret = kvmppc_get_hpa(vcpu, mmu_seq, src, 0, &src_pa, NULL);
970*4882a593Smuzhiyun if (ret != H_SUCCESS)
971*4882a593Smuzhiyun goto out_unlock;
972*4882a593Smuzhiyun
973*4882a593Smuzhiyun /* Copy the page */
974*4882a593Smuzhiyun memcpy((void *)dest_pa, (void *)src_pa, SZ_4K);
975*4882a593Smuzhiyun
976*4882a593Smuzhiyun kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
977*4882a593Smuzhiyun
978*4882a593Smuzhiyun out_unlock:
979*4882a593Smuzhiyun arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
980*4882a593Smuzhiyun return ret;
981*4882a593Smuzhiyun }
982*4882a593Smuzhiyun
kvmppc_rm_h_page_init(struct kvm_vcpu * vcpu,unsigned long flags,unsigned long dest,unsigned long src)983*4882a593Smuzhiyun long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
984*4882a593Smuzhiyun unsigned long dest, unsigned long src)
985*4882a593Smuzhiyun {
986*4882a593Smuzhiyun struct kvm *kvm = vcpu->kvm;
987*4882a593Smuzhiyun u64 pg_mask = SZ_4K - 1; /* 4K page size */
988*4882a593Smuzhiyun long ret = H_SUCCESS;
989*4882a593Smuzhiyun
990*4882a593Smuzhiyun /* Don't handle radix mode here, go up to the virtual mode handler */
991*4882a593Smuzhiyun if (kvm_is_radix(kvm))
992*4882a593Smuzhiyun return H_TOO_HARD;
993*4882a593Smuzhiyun
994*4882a593Smuzhiyun /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */
995*4882a593Smuzhiyun if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE |
996*4882a593Smuzhiyun H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED))
997*4882a593Smuzhiyun return H_PARAMETER;
998*4882a593Smuzhiyun
999*4882a593Smuzhiyun /* dest (and src if copy_page flag set) must be page aligned */
1000*4882a593Smuzhiyun if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask)))
1001*4882a593Smuzhiyun return H_PARAMETER;
1002*4882a593Smuzhiyun
1003*4882a593Smuzhiyun /* zero and/or copy the page as determined by the flags */
1004*4882a593Smuzhiyun if (flags & H_COPY_PAGE)
1005*4882a593Smuzhiyun ret = kvmppc_do_h_page_init_copy(vcpu, dest, src);
1006*4882a593Smuzhiyun else if (flags & H_ZERO_PAGE)
1007*4882a593Smuzhiyun ret = kvmppc_do_h_page_init_zero(vcpu, dest);
1008*4882a593Smuzhiyun
1009*4882a593Smuzhiyun /* We can ignore the other flags */
1010*4882a593Smuzhiyun
1011*4882a593Smuzhiyun return ret;
1012*4882a593Smuzhiyun }
1013*4882a593Smuzhiyun
kvmppc_invalidate_hpte(struct kvm * kvm,__be64 * hptep,unsigned long pte_index)1014*4882a593Smuzhiyun void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
1015*4882a593Smuzhiyun unsigned long pte_index)
1016*4882a593Smuzhiyun {
1017*4882a593Smuzhiyun unsigned long rb;
1018*4882a593Smuzhiyun u64 hp0, hp1;
1019*4882a593Smuzhiyun
1020*4882a593Smuzhiyun hptep[0] &= ~cpu_to_be64(HPTE_V_VALID);
1021*4882a593Smuzhiyun hp0 = be64_to_cpu(hptep[0]);
1022*4882a593Smuzhiyun hp1 = be64_to_cpu(hptep[1]);
1023*4882a593Smuzhiyun if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1024*4882a593Smuzhiyun hp0 = hpte_new_to_old_v(hp0, hp1);
1025*4882a593Smuzhiyun hp1 = hpte_new_to_old_r(hp1);
1026*4882a593Smuzhiyun }
1027*4882a593Smuzhiyun rb = compute_tlbie_rb(hp0, hp1, pte_index);
1028*4882a593Smuzhiyun do_tlbies(kvm, &rb, 1, 1, true);
1029*4882a593Smuzhiyun }
1030*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
1031*4882a593Smuzhiyun
kvmppc_clear_ref_hpte(struct kvm * kvm,__be64 * hptep,unsigned long pte_index)1032*4882a593Smuzhiyun void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
1033*4882a593Smuzhiyun unsigned long pte_index)
1034*4882a593Smuzhiyun {
1035*4882a593Smuzhiyun unsigned long rb;
1036*4882a593Smuzhiyun unsigned char rbyte;
1037*4882a593Smuzhiyun u64 hp0, hp1;
1038*4882a593Smuzhiyun
1039*4882a593Smuzhiyun hp0 = be64_to_cpu(hptep[0]);
1040*4882a593Smuzhiyun hp1 = be64_to_cpu(hptep[1]);
1041*4882a593Smuzhiyun if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1042*4882a593Smuzhiyun hp0 = hpte_new_to_old_v(hp0, hp1);
1043*4882a593Smuzhiyun hp1 = hpte_new_to_old_r(hp1);
1044*4882a593Smuzhiyun }
1045*4882a593Smuzhiyun rb = compute_tlbie_rb(hp0, hp1, pte_index);
1046*4882a593Smuzhiyun rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8;
1047*4882a593Smuzhiyun /* modify only the second-last byte, which contains the ref bit */
1048*4882a593Smuzhiyun *((char *)hptep + 14) = rbyte;
1049*4882a593Smuzhiyun do_tlbies(kvm, &rb, 1, 1, false);
1050*4882a593Smuzhiyun }
1051*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
1052*4882a593Smuzhiyun
1053*4882a593Smuzhiyun static int slb_base_page_shift[4] = {
1054*4882a593Smuzhiyun 24, /* 16M */
1055*4882a593Smuzhiyun 16, /* 64k */
1056*4882a593Smuzhiyun 34, /* 16G */
1057*4882a593Smuzhiyun 20, /* 1M, unsupported */
1058*4882a593Smuzhiyun };
1059*4882a593Smuzhiyun
mmio_cache_search(struct kvm_vcpu * vcpu,unsigned long eaddr,unsigned long slb_v,long mmio_update)1060*4882a593Smuzhiyun static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu,
1061*4882a593Smuzhiyun unsigned long eaddr, unsigned long slb_v, long mmio_update)
1062*4882a593Smuzhiyun {
1063*4882a593Smuzhiyun struct mmio_hpte_cache_entry *entry = NULL;
1064*4882a593Smuzhiyun unsigned int pshift;
1065*4882a593Smuzhiyun unsigned int i;
1066*4882a593Smuzhiyun
1067*4882a593Smuzhiyun for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) {
1068*4882a593Smuzhiyun entry = &vcpu->arch.mmio_cache.entry[i];
1069*4882a593Smuzhiyun if (entry->mmio_update == mmio_update) {
1070*4882a593Smuzhiyun pshift = entry->slb_base_pshift;
1071*4882a593Smuzhiyun if ((entry->eaddr >> pshift) == (eaddr >> pshift) &&
1072*4882a593Smuzhiyun entry->slb_v == slb_v)
1073*4882a593Smuzhiyun return entry;
1074*4882a593Smuzhiyun }
1075*4882a593Smuzhiyun }
1076*4882a593Smuzhiyun return NULL;
1077*4882a593Smuzhiyun }
1078*4882a593Smuzhiyun
1079*4882a593Smuzhiyun static struct mmio_hpte_cache_entry *
next_mmio_cache_entry(struct kvm_vcpu * vcpu)1080*4882a593Smuzhiyun next_mmio_cache_entry(struct kvm_vcpu *vcpu)
1081*4882a593Smuzhiyun {
1082*4882a593Smuzhiyun unsigned int index = vcpu->arch.mmio_cache.index;
1083*4882a593Smuzhiyun
1084*4882a593Smuzhiyun vcpu->arch.mmio_cache.index++;
1085*4882a593Smuzhiyun if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE)
1086*4882a593Smuzhiyun vcpu->arch.mmio_cache.index = 0;
1087*4882a593Smuzhiyun
1088*4882a593Smuzhiyun return &vcpu->arch.mmio_cache.entry[index];
1089*4882a593Smuzhiyun }
1090*4882a593Smuzhiyun
1091*4882a593Smuzhiyun /* When called from virtmode, this func should be protected by
1092*4882a593Smuzhiyun * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK
1093*4882a593Smuzhiyun * can trigger deadlock issue.
1094*4882a593Smuzhiyun */
kvmppc_hv_find_lock_hpte(struct kvm * kvm,gva_t eaddr,unsigned long slb_v,unsigned long valid)1095*4882a593Smuzhiyun long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
1096*4882a593Smuzhiyun unsigned long valid)
1097*4882a593Smuzhiyun {
1098*4882a593Smuzhiyun unsigned int i;
1099*4882a593Smuzhiyun unsigned int pshift;
1100*4882a593Smuzhiyun unsigned long somask;
1101*4882a593Smuzhiyun unsigned long vsid, hash;
1102*4882a593Smuzhiyun unsigned long avpn;
1103*4882a593Smuzhiyun __be64 *hpte;
1104*4882a593Smuzhiyun unsigned long mask, val;
1105*4882a593Smuzhiyun unsigned long v, r, orig_v;
1106*4882a593Smuzhiyun
1107*4882a593Smuzhiyun /* Get page shift, work out hash and AVPN etc. */
1108*4882a593Smuzhiyun mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
1109*4882a593Smuzhiyun val = 0;
1110*4882a593Smuzhiyun pshift = 12;
1111*4882a593Smuzhiyun if (slb_v & SLB_VSID_L) {
1112*4882a593Smuzhiyun mask |= HPTE_V_LARGE;
1113*4882a593Smuzhiyun val |= HPTE_V_LARGE;
1114*4882a593Smuzhiyun pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4];
1115*4882a593Smuzhiyun }
1116*4882a593Smuzhiyun if (slb_v & SLB_VSID_B_1T) {
1117*4882a593Smuzhiyun somask = (1UL << 40) - 1;
1118*4882a593Smuzhiyun vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T;
1119*4882a593Smuzhiyun vsid ^= vsid << 25;
1120*4882a593Smuzhiyun } else {
1121*4882a593Smuzhiyun somask = (1UL << 28) - 1;
1122*4882a593Smuzhiyun vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
1123*4882a593Smuzhiyun }
1124*4882a593Smuzhiyun hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(&kvm->arch.hpt);
1125*4882a593Smuzhiyun avpn = slb_v & ~(somask >> 16); /* also includes B */
1126*4882a593Smuzhiyun avpn |= (eaddr & somask) >> 16;
1127*4882a593Smuzhiyun
1128*4882a593Smuzhiyun if (pshift >= 24)
1129*4882a593Smuzhiyun avpn &= ~((1UL << (pshift - 16)) - 1);
1130*4882a593Smuzhiyun else
1131*4882a593Smuzhiyun avpn &= ~0x7fUL;
1132*4882a593Smuzhiyun val |= avpn;
1133*4882a593Smuzhiyun
1134*4882a593Smuzhiyun for (;;) {
1135*4882a593Smuzhiyun hpte = (__be64 *)(kvm->arch.hpt.virt + (hash << 7));
1136*4882a593Smuzhiyun
1137*4882a593Smuzhiyun for (i = 0; i < 16; i += 2) {
1138*4882a593Smuzhiyun /* Read the PTE racily */
1139*4882a593Smuzhiyun v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
1140*4882a593Smuzhiyun if (cpu_has_feature(CPU_FTR_ARCH_300))
1141*4882a593Smuzhiyun v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1]));
1142*4882a593Smuzhiyun
1143*4882a593Smuzhiyun /* Check valid/absent, hash, segment size and AVPN */
1144*4882a593Smuzhiyun if (!(v & valid) || (v & mask) != val)
1145*4882a593Smuzhiyun continue;
1146*4882a593Smuzhiyun
1147*4882a593Smuzhiyun /* Lock the PTE and read it under the lock */
1148*4882a593Smuzhiyun while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
1149*4882a593Smuzhiyun cpu_relax();
1150*4882a593Smuzhiyun v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
1151*4882a593Smuzhiyun r = be64_to_cpu(hpte[i+1]);
1152*4882a593Smuzhiyun if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1153*4882a593Smuzhiyun v = hpte_new_to_old_v(v, r);
1154*4882a593Smuzhiyun r = hpte_new_to_old_r(r);
1155*4882a593Smuzhiyun }
1156*4882a593Smuzhiyun
1157*4882a593Smuzhiyun /*
1158*4882a593Smuzhiyun * Check the HPTE again, including base page size
1159*4882a593Smuzhiyun */
1160*4882a593Smuzhiyun if ((v & valid) && (v & mask) == val &&
1161*4882a593Smuzhiyun kvmppc_hpte_base_page_shift(v, r) == pshift)
1162*4882a593Smuzhiyun /* Return with the HPTE still locked */
1163*4882a593Smuzhiyun return (hash << 3) + (i >> 1);
1164*4882a593Smuzhiyun
1165*4882a593Smuzhiyun __unlock_hpte(&hpte[i], orig_v);
1166*4882a593Smuzhiyun }
1167*4882a593Smuzhiyun
1168*4882a593Smuzhiyun if (val & HPTE_V_SECONDARY)
1169*4882a593Smuzhiyun break;
1170*4882a593Smuzhiyun val |= HPTE_V_SECONDARY;
1171*4882a593Smuzhiyun hash = hash ^ kvmppc_hpt_mask(&kvm->arch.hpt);
1172*4882a593Smuzhiyun }
1173*4882a593Smuzhiyun return -1;
1174*4882a593Smuzhiyun }
1175*4882a593Smuzhiyun EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
1176*4882a593Smuzhiyun
1177*4882a593Smuzhiyun /*
1178*4882a593Smuzhiyun * Called in real mode to check whether an HPTE not found fault
1179*4882a593Smuzhiyun * is due to accessing a paged-out page or an emulated MMIO page,
1180*4882a593Smuzhiyun * or if a protection fault is due to accessing a page that the
1181*4882a593Smuzhiyun * guest wanted read/write access to but which we made read-only.
1182*4882a593Smuzhiyun * Returns a possibly modified status (DSISR) value if not
1183*4882a593Smuzhiyun * (i.e. pass the interrupt to the guest),
1184*4882a593Smuzhiyun * -1 to pass the fault up to host kernel mode code, -2 to do that
1185*4882a593Smuzhiyun * and also load the instruction word (for MMIO emulation),
1186*4882a593Smuzhiyun * or 0 if we should make the guest retry the access.
1187*4882a593Smuzhiyun */
kvmppc_hpte_hv_fault(struct kvm_vcpu * vcpu,unsigned long addr,unsigned long slb_v,unsigned int status,bool data)1188*4882a593Smuzhiyun long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
1189*4882a593Smuzhiyun unsigned long slb_v, unsigned int status, bool data)
1190*4882a593Smuzhiyun {
1191*4882a593Smuzhiyun struct kvm *kvm = vcpu->kvm;
1192*4882a593Smuzhiyun long int index;
1193*4882a593Smuzhiyun unsigned long v, r, gr, orig_v;
1194*4882a593Smuzhiyun __be64 *hpte;
1195*4882a593Smuzhiyun unsigned long valid;
1196*4882a593Smuzhiyun struct revmap_entry *rev;
1197*4882a593Smuzhiyun unsigned long pp, key;
1198*4882a593Smuzhiyun struct mmio_hpte_cache_entry *cache_entry = NULL;
1199*4882a593Smuzhiyun long mmio_update = 0;
1200*4882a593Smuzhiyun
1201*4882a593Smuzhiyun /* For protection fault, expect to find a valid HPTE */
1202*4882a593Smuzhiyun valid = HPTE_V_VALID;
1203*4882a593Smuzhiyun if (status & DSISR_NOHPTE) {
1204*4882a593Smuzhiyun valid |= HPTE_V_ABSENT;
1205*4882a593Smuzhiyun mmio_update = atomic64_read(&kvm->arch.mmio_update);
1206*4882a593Smuzhiyun cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update);
1207*4882a593Smuzhiyun }
1208*4882a593Smuzhiyun if (cache_entry) {
1209*4882a593Smuzhiyun index = cache_entry->pte_index;
1210*4882a593Smuzhiyun v = cache_entry->hpte_v;
1211*4882a593Smuzhiyun r = cache_entry->hpte_r;
1212*4882a593Smuzhiyun gr = cache_entry->rpte;
1213*4882a593Smuzhiyun } else {
1214*4882a593Smuzhiyun index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
1215*4882a593Smuzhiyun if (index < 0) {
1216*4882a593Smuzhiyun if (status & DSISR_NOHPTE)
1217*4882a593Smuzhiyun return status; /* there really was no HPTE */
1218*4882a593Smuzhiyun return 0; /* for prot fault, HPTE disappeared */
1219*4882a593Smuzhiyun }
1220*4882a593Smuzhiyun hpte = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
1221*4882a593Smuzhiyun v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
1222*4882a593Smuzhiyun r = be64_to_cpu(hpte[1]);
1223*4882a593Smuzhiyun if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1224*4882a593Smuzhiyun v = hpte_new_to_old_v(v, r);
1225*4882a593Smuzhiyun r = hpte_new_to_old_r(r);
1226*4882a593Smuzhiyun }
1227*4882a593Smuzhiyun rev = real_vmalloc_addr(&kvm->arch.hpt.rev[index]);
1228*4882a593Smuzhiyun gr = rev->guest_rpte;
1229*4882a593Smuzhiyun
1230*4882a593Smuzhiyun unlock_hpte(hpte, orig_v);
1231*4882a593Smuzhiyun }
1232*4882a593Smuzhiyun
1233*4882a593Smuzhiyun /* For not found, if the HPTE is valid by now, retry the instruction */
1234*4882a593Smuzhiyun if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
1235*4882a593Smuzhiyun return 0;
1236*4882a593Smuzhiyun
1237*4882a593Smuzhiyun /* Check access permissions to the page */
1238*4882a593Smuzhiyun pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
1239*4882a593Smuzhiyun key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
1240*4882a593Smuzhiyun status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */
1241*4882a593Smuzhiyun if (!data) {
1242*4882a593Smuzhiyun if (gr & (HPTE_R_N | HPTE_R_G))
1243*4882a593Smuzhiyun return status | SRR1_ISI_N_G_OR_CIP;
1244*4882a593Smuzhiyun if (!hpte_read_permission(pp, slb_v & key))
1245*4882a593Smuzhiyun return status | SRR1_ISI_PROT;
1246*4882a593Smuzhiyun } else if (status & DSISR_ISSTORE) {
1247*4882a593Smuzhiyun /* check write permission */
1248*4882a593Smuzhiyun if (!hpte_write_permission(pp, slb_v & key))
1249*4882a593Smuzhiyun return status | DSISR_PROTFAULT;
1250*4882a593Smuzhiyun } else {
1251*4882a593Smuzhiyun if (!hpte_read_permission(pp, slb_v & key))
1252*4882a593Smuzhiyun return status | DSISR_PROTFAULT;
1253*4882a593Smuzhiyun }
1254*4882a593Smuzhiyun
1255*4882a593Smuzhiyun /* Check storage key, if applicable */
1256*4882a593Smuzhiyun if (data && (vcpu->arch.shregs.msr & MSR_DR)) {
1257*4882a593Smuzhiyun unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr);
1258*4882a593Smuzhiyun if (status & DSISR_ISSTORE)
1259*4882a593Smuzhiyun perm >>= 1;
1260*4882a593Smuzhiyun if (perm & 1)
1261*4882a593Smuzhiyun return status | DSISR_KEYFAULT;
1262*4882a593Smuzhiyun }
1263*4882a593Smuzhiyun
1264*4882a593Smuzhiyun /* Save HPTE info for virtual-mode handler */
1265*4882a593Smuzhiyun vcpu->arch.pgfault_addr = addr;
1266*4882a593Smuzhiyun vcpu->arch.pgfault_index = index;
1267*4882a593Smuzhiyun vcpu->arch.pgfault_hpte[0] = v;
1268*4882a593Smuzhiyun vcpu->arch.pgfault_hpte[1] = r;
1269*4882a593Smuzhiyun vcpu->arch.pgfault_cache = cache_entry;
1270*4882a593Smuzhiyun
1271*4882a593Smuzhiyun /* Check the storage key to see if it is possibly emulated MMIO */
1272*4882a593Smuzhiyun if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
1273*4882a593Smuzhiyun (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) {
1274*4882a593Smuzhiyun if (!cache_entry) {
1275*4882a593Smuzhiyun unsigned int pshift = 12;
1276*4882a593Smuzhiyun unsigned int pshift_index;
1277*4882a593Smuzhiyun
1278*4882a593Smuzhiyun if (slb_v & SLB_VSID_L) {
1279*4882a593Smuzhiyun pshift_index = ((slb_v & SLB_VSID_LP) >> 4);
1280*4882a593Smuzhiyun pshift = slb_base_page_shift[pshift_index];
1281*4882a593Smuzhiyun }
1282*4882a593Smuzhiyun cache_entry = next_mmio_cache_entry(vcpu);
1283*4882a593Smuzhiyun cache_entry->eaddr = addr;
1284*4882a593Smuzhiyun cache_entry->slb_base_pshift = pshift;
1285*4882a593Smuzhiyun cache_entry->pte_index = index;
1286*4882a593Smuzhiyun cache_entry->hpte_v = v;
1287*4882a593Smuzhiyun cache_entry->hpte_r = r;
1288*4882a593Smuzhiyun cache_entry->rpte = gr;
1289*4882a593Smuzhiyun cache_entry->slb_v = slb_v;
1290*4882a593Smuzhiyun cache_entry->mmio_update = mmio_update;
1291*4882a593Smuzhiyun }
1292*4882a593Smuzhiyun if (data && (vcpu->arch.shregs.msr & MSR_IR))
1293*4882a593Smuzhiyun return -2; /* MMIO emulation - load instr word */
1294*4882a593Smuzhiyun }
1295*4882a593Smuzhiyun
1296*4882a593Smuzhiyun return -1; /* send fault up to host kernel mode */
1297*4882a593Smuzhiyun }
1298