2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
6 * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
9 #include <linux/types.h>
10 #include <linux/string.h>
11 #include <linux/kvm.h>
12 #include <linux/kvm_host.h>
13 #include <linux/hugetlb.h>
14 #include <linux/module.h>
15 #include <linux/log2.h>
17 #include <asm/tlbflush.h>
18 #include <asm/kvm_ppc.h>
19 #include <asm/kvm_book3s.h>
20 #include <asm/mmu-hash64.h>
21 #include <asm/hvcall.h>
22 #include <asm/synch.h>
23 #include <asm/ppc-opcode.h>
25 /* Translate address of a vmalloc'd thing to a linear map address */
26 static void *real_vmalloc_addr(void *x)
28 unsigned long addr = (unsigned long) x;
31 * assume we don't have huge pages in vmalloc space...
32 * So don't worry about THP collapse/split. Called
33 * Only in realmode, hence won't need irq_save/restore.
35 p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
36 if (!p || !pte_present(*p))
38 addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
42 /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
43 static int global_invalidates(struct kvm *kvm, unsigned long flags)
48 * If there is only one vcore, and it's currently running,
49 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
50 * we can use tlbiel as long as we mark all other physical
51 * cores as potentially having stale TLB entries for this lpid.
52 * Otherwise, don't use tlbiel.
54 if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
60 /* any other core might now have stale TLB entries... */
62 cpumask_setall(&kvm->arch.need_tlb_flush);
63 cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu,
64 &kvm->arch.need_tlb_flush);
71 * Add this HPTE into the chain for the real page.
72 * Must be called with the chain locked; it unlocks the chain.
74 void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
75 unsigned long *rmap, long pte_index, int realmode)
77 struct revmap_entry *head, *tail;
80 if (*rmap & KVMPPC_RMAP_PRESENT) {
81 i = *rmap & KVMPPC_RMAP_INDEX;
82 head = &kvm->arch.revmap[i];
84 head = real_vmalloc_addr(head);
85 tail = &kvm->arch.revmap[head->back];
87 tail = real_vmalloc_addr(tail);
89 rev->back = head->back;
90 tail->forw = pte_index;
91 head->back = pte_index;
93 rev->forw = rev->back = pte_index;
94 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
95 pte_index | KVMPPC_RMAP_PRESENT;
99 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
101 /* Update the changed page order field of an rmap entry */
102 void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize)
108 order = ilog2(psize);
109 order <<= KVMPPC_RMAP_CHG_SHIFT;
110 if (order > (*rmap & KVMPPC_RMAP_CHG_ORDER))
111 *rmap = (*rmap & ~KVMPPC_RMAP_CHG_ORDER) | order;
113 EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change);
115 /* Remove this HPTE from the chain for a real page */
116 static void remove_revmap_chain(struct kvm *kvm, long pte_index,
117 struct revmap_entry *rev,
118 unsigned long hpte_v, unsigned long hpte_r)
120 struct revmap_entry *next, *prev;
121 unsigned long gfn, ptel, head;
122 struct kvm_memory_slot *memslot;
124 unsigned long rcbits;
126 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
127 ptel = rev->guest_rpte |= rcbits;
128 gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
129 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
133 rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
136 head = *rmap & KVMPPC_RMAP_INDEX;
137 next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]);
138 prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]);
139 next->back = rev->back;
140 prev->forw = rev->forw;
141 if (head == pte_index) {
143 if (head == pte_index)
144 *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
146 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
148 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
149 if (rcbits & HPTE_R_C)
150 kvmppc_update_rmap_change(rmap, hpte_page_size(hpte_v, hpte_r));
154 long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
155 long pte_index, unsigned long pteh, unsigned long ptel,
156 pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
158 unsigned long i, pa, gpa, gfn, psize;
159 unsigned long slot_fn, hva;
161 struct revmap_entry *rev;
162 unsigned long g_ptel;
163 struct kvm_memory_slot *memslot;
164 unsigned hpage_shift;
168 unsigned int writing;
169 unsigned long mmu_seq;
170 unsigned long rcbits, irq_flags = 0;
172 psize = hpte_page_size(pteh, ptel);
175 writing = hpte_is_writable(ptel);
176 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
177 ptel &= ~HPTE_GR_RESERVED;
180 /* used later to detect if we might have been invalidated */
181 mmu_seq = kvm->mmu_notifier_seq;
184 /* Find the memslot (if any) for this address */
185 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
186 gfn = gpa >> PAGE_SHIFT;
187 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
191 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
192 /* Emulated MMIO - mark this with key=31 */
193 pteh |= HPTE_V_ABSENT;
194 ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
198 /* Check if the requested page fits entirely in the memslot. */
199 if (!slot_is_aligned(memslot, psize))
201 slot_fn = gfn - memslot->base_gfn;
202 rmap = &memslot->arch.rmap[slot_fn];
204 /* Translate to host virtual address */
205 hva = __gfn_to_hva_memslot(memslot, gfn);
207 * If we had a page table table change after lookup, we would
208 * retry via mmu_notifier_retry.
211 ptep = __find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
213 local_irq_save(irq_flags);
214 ptep = find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
218 unsigned int host_pte_size;
221 host_pte_size = 1ul << hpage_shift;
223 host_pte_size = PAGE_SIZE;
225 * We should always find the guest page size
226 * to <= host page size, if host is using hugepage
228 if (host_pte_size < psize) {
230 local_irq_restore(flags);
233 pte = kvmppc_read_update_linux_pte(ptep, writing);
234 if (pte_present(pte) && !pte_protnone(pte)) {
235 if (writing && !pte_write(pte))
236 /* make the actual HPTE be read-only */
237 ptel = hpte_make_readonly(ptel);
238 is_io = hpte_cache_bits(pte_val(pte));
239 pa = pte_pfn(pte) << PAGE_SHIFT;
240 pa |= hva & (host_pte_size - 1);
241 pa |= gpa & ~PAGE_MASK;
245 local_irq_restore(irq_flags);
247 ptel &= ~(HPTE_R_PP0 - psize);
251 pteh |= HPTE_V_VALID;
253 pteh |= HPTE_V_ABSENT;
256 if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) {
260 * Allow guest to map emulated device memory as
261 * uncacheable, but actually make it cacheable.
263 ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
267 /* Find and lock the HPTEG slot to use */
269 if (pte_index >= kvm->arch.hpt_npte)
271 if (likely((flags & H_EXACT) == 0)) {
273 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
274 for (i = 0; i < 8; ++i) {
275 if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 &&
276 try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
283 * Since try_lock_hpte doesn't retry (not even stdcx.
284 * failures), it could be that there is a free slot
285 * but we transiently failed to lock it. Try again,
286 * actually locking each slot and checking it.
289 for (i = 0; i < 8; ++i) {
291 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
293 pte = be64_to_cpu(hpte[0]);
294 if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
296 __unlock_hpte(hpte, pte);
304 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
305 if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
307 /* Lock the slot and check again */
310 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
312 pte = be64_to_cpu(hpte[0]);
313 if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
314 __unlock_hpte(hpte, pte);
320 /* Save away the guest's idea of the second HPTE dword */
321 rev = &kvm->arch.revmap[pte_index];
323 rev = real_vmalloc_addr(rev);
325 rev->guest_rpte = g_ptel;
326 note_hpte_modification(kvm, rev);
329 /* Link HPTE into reverse-map chain */
330 if (pteh & HPTE_V_VALID) {
332 rmap = real_vmalloc_addr(rmap);
334 /* Check for pending invalidations under the rmap chain lock */
335 if (mmu_notifier_retry(kvm, mmu_seq)) {
336 /* inval in progress, write a non-present HPTE */
337 pteh |= HPTE_V_ABSENT;
338 pteh &= ~HPTE_V_VALID;
341 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
343 /* Only set R/C in real HPTE if already set in *rmap */
344 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
345 ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
349 hpte[1] = cpu_to_be64(ptel);
351 /* Write the first HPTE dword, unlocking the HPTE and making it valid */
353 __unlock_hpte(hpte, pteh);
354 asm volatile("ptesync" : : : "memory");
356 *pte_idx_ret = pte_index;
359 EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
361 long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
362 long pte_index, unsigned long pteh, unsigned long ptel)
364 return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
365 vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
368 #ifdef __BIG_ENDIAN__
369 #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
371 #define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
374 static inline int try_lock_tlbie(unsigned int *lock)
376 unsigned int tmp, old;
377 unsigned int token = LOCK_TOKEN;
379 asm volatile("1:lwarx %1,0,%2\n"
386 : "=&r" (tmp), "=&r" (old)
387 : "r" (lock), "r" (token)
392 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
393 long npages, int global, bool need_sync)
398 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
401 asm volatile("ptesync" : : : "memory");
402 for (i = 0; i < npages; ++i)
403 asm volatile(PPC_TLBIE(%1,%0) : :
404 "r" (rbvalues[i]), "r" (kvm->arch.lpid));
405 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
406 kvm->arch.tlbie_lock = 0;
409 asm volatile("ptesync" : : : "memory");
410 for (i = 0; i < npages; ++i)
411 asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
412 asm volatile("ptesync" : : : "memory");
416 long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
417 unsigned long pte_index, unsigned long avpn,
418 unsigned long *hpret)
421 unsigned long v, r, rb;
422 struct revmap_entry *rev;
425 if (pte_index >= kvm->arch.hpt_npte)
427 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
428 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
430 pte = be64_to_cpu(hpte[0]);
431 if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
432 ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
433 ((flags & H_ANDCOND) && (pte & avpn) != 0)) {
434 __unlock_hpte(hpte, pte);
438 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
439 v = pte & ~HPTE_V_HVLOCK;
440 if (v & HPTE_V_VALID) {
441 hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
442 rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index);
443 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
445 * The reference (R) and change (C) bits in a HPT
446 * entry can be set by hardware at any time up until
447 * the HPTE is invalidated and the TLB invalidation
448 * sequence has completed. This means that when
449 * removing a HPTE, we need to re-read the HPTE after
450 * the invalidation sequence has completed in order to
451 * obtain reliable values of R and C.
453 remove_revmap_chain(kvm, pte_index, rev, v,
454 be64_to_cpu(hpte[1]));
456 r = rev->guest_rpte & ~HPTE_GR_RESERVED;
457 note_hpte_modification(kvm, rev);
458 unlock_hpte(hpte, 0);
464 EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
466 long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
467 unsigned long pte_index, unsigned long avpn)
469 return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
473 long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
475 struct kvm *kvm = vcpu->kvm;
476 unsigned long *args = &vcpu->arch.gpr[4];
477 __be64 *hp, *hptes[4];
478 unsigned long tlbrb[4];
479 long int i, j, k, n, found, indexes[4];
480 unsigned long flags, req, pte_index, rcbits;
482 long int ret = H_SUCCESS;
483 struct revmap_entry *rev, *revs[4];
486 global = global_invalidates(kvm, 0);
487 for (i = 0; i < 4 && ret == H_SUCCESS; ) {
492 flags = pte_index >> 56;
493 pte_index &= ((1ul << 56) - 1);
496 if (req == 3) { /* no more requests */
500 if (req != 1 || flags == 3 ||
501 pte_index >= kvm->arch.hpt_npte) {
502 /* parameter error */
503 args[j] = ((0xa0 | flags) << 56) + pte_index;
507 hp = (__be64 *) (kvm->arch.hpt_virt + (pte_index << 4));
508 /* to avoid deadlock, don't spin except for first */
509 if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
512 while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
516 hp0 = be64_to_cpu(hp[0]);
517 if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) {
519 case 0: /* absolute */
522 case 1: /* andcond */
523 if (!(hp0 & args[j + 1]))
527 if ((hp0 & ~0x7fUL) == args[j + 1])
533 hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
534 args[j] = ((0x90 | flags) << 56) + pte_index;
538 args[j] = ((0x80 | flags) << 56) + pte_index;
539 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
540 note_hpte_modification(kvm, rev);
542 if (!(hp0 & HPTE_V_VALID)) {
543 /* insert R and C bits from PTE */
544 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
545 args[j] |= rcbits << (56 - 5);
550 /* leave it locked */
551 hp[0] &= ~cpu_to_be64(HPTE_V_VALID);
552 tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]),
553 be64_to_cpu(hp[1]), pte_index);
563 /* Now that we've collected a batch, do the tlbies */
564 do_tlbies(kvm, tlbrb, n, global, true);
566 /* Read PTE low words after tlbie to get final R/C values */
567 for (k = 0; k < n; ++k) {
569 pte_index = args[j] & ((1ul << 56) - 1);
572 remove_revmap_chain(kvm, pte_index, rev,
573 be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
574 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
575 args[j] |= rcbits << (56 - 5);
576 __unlock_hpte(hp, 0);
583 long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
584 unsigned long pte_index, unsigned long avpn,
587 struct kvm *kvm = vcpu->kvm;
589 struct revmap_entry *rev;
590 unsigned long v, r, rb, mask, bits;
593 if (pte_index >= kvm->arch.hpt_npte)
596 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
597 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
599 pte = be64_to_cpu(hpte[0]);
600 if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
601 ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) {
602 __unlock_hpte(hpte, pte);
607 bits = (flags << 55) & HPTE_R_PP0;
608 bits |= (flags << 48) & HPTE_R_KEY_HI;
609 bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
611 /* Update guest view of 2nd HPTE dword */
612 mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
613 HPTE_R_KEY_HI | HPTE_R_KEY_LO;
614 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
616 r = (rev->guest_rpte & ~mask) | bits;
618 note_hpte_modification(kvm, rev);
622 if (v & HPTE_V_VALID) {
624 * If the page is valid, don't let it transition from
625 * readonly to writable. If it should be writable, we'll
626 * take a trap and let the page fault code sort it out.
628 pte = be64_to_cpu(hpte[1]);
629 r = (pte & ~mask) | bits;
630 if (hpte_is_writable(r) && !hpte_is_writable(pte))
631 r = hpte_make_readonly(r);
632 /* If the PTE is changing, invalidate it first */
634 rb = compute_tlbie_rb(v, r, pte_index);
635 hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) |
637 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
639 hpte[1] = cpu_to_be64(r);
642 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
643 asm volatile("ptesync" : : : "memory");
647 long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
648 unsigned long pte_index)
650 struct kvm *kvm = vcpu->kvm;
654 struct revmap_entry *rev = NULL;
656 if (pte_index >= kvm->arch.hpt_npte)
658 if (flags & H_READ_4) {
662 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
663 for (i = 0; i < n; ++i, ++pte_index) {
664 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
665 v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
666 r = be64_to_cpu(hpte[1]);
667 if (v & HPTE_V_ABSENT) {
671 if (v & HPTE_V_VALID) {
672 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
673 r &= ~HPTE_GR_RESERVED;
675 vcpu->arch.gpr[4 + i * 2] = v;
676 vcpu->arch.gpr[5 + i * 2] = r;
681 void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
682 unsigned long pte_index)
686 hptep[0] &= ~cpu_to_be64(HPTE_V_VALID);
687 rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
689 do_tlbies(kvm, &rb, 1, 1, true);
691 EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
693 void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
694 unsigned long pte_index)
699 rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
701 rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8;
702 /* modify only the second-last byte, which contains the ref bit */
703 *((char *)hptep + 14) = rbyte;
704 do_tlbies(kvm, &rb, 1, 1, false);
706 EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
708 static int slb_base_page_shift[4] = {
712 20, /* 1M, unsupported */
715 /* When called from virtmode, this func should be protected by
716 * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK
717 * can trigger deadlock issue.
719 long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
724 unsigned long somask;
725 unsigned long vsid, hash;
728 unsigned long mask, val;
731 /* Get page shift, work out hash and AVPN etc. */
732 mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
735 if (slb_v & SLB_VSID_L) {
736 mask |= HPTE_V_LARGE;
738 pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4];
740 if (slb_v & SLB_VSID_B_1T) {
741 somask = (1UL << 40) - 1;
742 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T;
745 somask = (1UL << 28) - 1;
746 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
748 hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask;
749 avpn = slb_v & ~(somask >> 16); /* also includes B */
750 avpn |= (eaddr & somask) >> 16;
753 avpn &= ~((1UL << (pshift - 16)) - 1);
759 hpte = (__be64 *)(kvm->arch.hpt_virt + (hash << 7));
761 for (i = 0; i < 16; i += 2) {
762 /* Read the PTE racily */
763 v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
765 /* Check valid/absent, hash, segment size and AVPN */
766 if (!(v & valid) || (v & mask) != val)
769 /* Lock the PTE and read it under the lock */
770 while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
772 v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
773 r = be64_to_cpu(hpte[i+1]);
776 * Check the HPTE again, including base page size
778 if ((v & valid) && (v & mask) == val &&
779 hpte_base_page_size(v, r) == (1ul << pshift))
780 /* Return with the HPTE still locked */
781 return (hash << 3) + (i >> 1);
783 __unlock_hpte(&hpte[i], v);
786 if (val & HPTE_V_SECONDARY)
788 val |= HPTE_V_SECONDARY;
789 hash = hash ^ kvm->arch.hpt_mask;
793 EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
796 * Called in real mode to check whether an HPTE not found fault
797 * is due to accessing a paged-out page or an emulated MMIO page,
798 * or if a protection fault is due to accessing a page that the
799 * guest wanted read/write access to but which we made read-only.
800 * Returns a possibly modified status (DSISR) value if not
801 * (i.e. pass the interrupt to the guest),
802 * -1 to pass the fault up to host kernel mode code, -2 to do that
803 * and also load the instruction word (for MMIO emulation),
804 * or 0 if we should make the guest retry the access.
806 long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
807 unsigned long slb_v, unsigned int status, bool data)
809 struct kvm *kvm = vcpu->kvm;
811 unsigned long v, r, gr;
814 struct revmap_entry *rev;
815 unsigned long pp, key;
817 /* For protection fault, expect to find a valid HPTE */
818 valid = HPTE_V_VALID;
819 if (status & DSISR_NOHPTE)
820 valid |= HPTE_V_ABSENT;
822 index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
824 if (status & DSISR_NOHPTE)
825 return status; /* there really was no HPTE */
826 return 0; /* for prot fault, HPTE disappeared */
828 hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
829 v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
830 r = be64_to_cpu(hpte[1]);
831 rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
832 gr = rev->guest_rpte;
834 unlock_hpte(hpte, v);
836 /* For not found, if the HPTE is valid by now, retry the instruction */
837 if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
840 /* Check access permissions to the page */
841 pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
842 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
843 status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */
845 if (gr & (HPTE_R_N | HPTE_R_G))
846 return status | SRR1_ISI_N_OR_G;
847 if (!hpte_read_permission(pp, slb_v & key))
848 return status | SRR1_ISI_PROT;
849 } else if (status & DSISR_ISSTORE) {
850 /* check write permission */
851 if (!hpte_write_permission(pp, slb_v & key))
852 return status | DSISR_PROTFAULT;
854 if (!hpte_read_permission(pp, slb_v & key))
855 return status | DSISR_PROTFAULT;
858 /* Check storage key, if applicable */
859 if (data && (vcpu->arch.shregs.msr & MSR_DR)) {
860 unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr);
861 if (status & DSISR_ISSTORE)
864 return status | DSISR_KEYFAULT;
867 /* Save HPTE info for virtual-mode handler */
868 vcpu->arch.pgfault_addr = addr;
869 vcpu->arch.pgfault_index = index;
870 vcpu->arch.pgfault_hpte[0] = v;
871 vcpu->arch.pgfault_hpte[1] = r;
873 /* Check the storage key to see if it is possibly emulated MMIO */
874 if (data && (vcpu->arch.shregs.msr & MSR_IR) &&
875 (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
876 (HPTE_R_KEY_HI | HPTE_R_KEY_LO))
877 return -2; /* MMIO emulation - load instr word */
879 return -1; /* send fault up to host kernel mode */