Commit cd758a9b authored by Paul Mackerras's avatar Paul Mackerras

KVM: PPC: Book3S HV: Use __gfn_to_pfn_memslot in HPT page fault handler

This makes the same changes in the page fault handler for HPT guests
that commits 31c8b0d0 ("KVM: PPC: Book3S HV: Use __gfn_to_pfn_memslot()
in page fault handler", 2018-03-01), 71d29f43 ("KVM: PPC: Book3S HV:
Don't use compound_order to determine host mapping size", 2018-09-11)
and 6579804c ("KVM: PPC: Book3S HV: Avoid crash from THP collapse
during radix page fault", 2018-10-04) made for the page fault handler
for radix guests.

In summary, where we used to call get_user_pages_fast() and then do
special handling for VM_PFNMAP vmas, we now call __get_user_pages_fast()
and then __gfn_to_pfn_memslot() if that fails, followed by reading the
Linux PTE to get the host PFN, host page size and mapping attributes.

This also brings in the change from SetPageDirty() to set_page_dirty_lock()
which was done for the radix page fault handler in commit c3856aeb
("KVM: PPC: Book3S HV: Fix handling of large pages in radix page fault
handler", 2018-02-23).
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
parent 1c482452
...@@ -485,18 +485,18 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -485,18 +485,18 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
__be64 *hptep; __be64 *hptep;
unsigned long mmu_seq, psize, pte_size; unsigned long mmu_seq, psize, pte_size;
unsigned long gpa_base, gfn_base; unsigned long gpa_base, gfn_base;
unsigned long gpa, gfn, hva, pfn; unsigned long gpa, gfn, hva, pfn, hpa;
struct kvm_memory_slot *memslot; struct kvm_memory_slot *memslot;
unsigned long *rmap; unsigned long *rmap;
struct revmap_entry *rev; struct revmap_entry *rev;
struct page *page, *pages[1]; struct page *page;
long index, ret, npages; long index, ret;
bool is_ci; bool is_ci;
unsigned int writing, write_ok; bool writing, write_ok;
struct vm_area_struct *vma; unsigned int shift;
unsigned long rcbits; unsigned long rcbits;
long mmio_update; long mmio_update;
struct mm_struct *mm; pte_t pte, *ptep;
if (kvm_is_radix(kvm)) if (kvm_is_radix(kvm))
return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr); return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
...@@ -570,59 +570,62 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -570,59 +570,62 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
smp_rmb(); smp_rmb();
ret = -EFAULT; ret = -EFAULT;
is_ci = false;
pfn = 0;
page = NULL; page = NULL;
mm = kvm->mm;
pte_size = PAGE_SIZE;
writing = (dsisr & DSISR_ISSTORE) != 0; writing = (dsisr & DSISR_ISSTORE) != 0;
/* If writing != 0, then the HPTE must allow writing, if we get here */ /* If writing != 0, then the HPTE must allow writing, if we get here */
write_ok = writing; write_ok = writing;
hva = gfn_to_hva_memslot(memslot, gfn); hva = gfn_to_hva_memslot(memslot, gfn);
npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages);
if (npages < 1) { /*
/* Check if it's an I/O mapping */ * Do a fast check first, since __gfn_to_pfn_memslot doesn't
down_read(&mm->mmap_sem); * do it with !atomic && !async, which is how we call it.
vma = find_vma(mm, hva); * We always ask for write permission since the common case
if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end && * is that the page is writable.
(vma->vm_flags & VM_PFNMAP)) { */
pfn = vma->vm_pgoff + if (__get_user_pages_fast(hva, 1, 1, &page) == 1) {
((hva - vma->vm_start) >> PAGE_SHIFT); write_ok = true;
pte_size = psize;
is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
write_ok = vma->vm_flags & VM_WRITE;
}
up_read(&mm->mmap_sem);
if (!pfn)
goto out_put;
} else { } else {
page = pages[0]; /* Call KVM generic code to do the slow-path check */
pfn = page_to_pfn(page); pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
if (PageHuge(page)) { writing, &write_ok);
page = compound_head(page); if (is_error_noslot_pfn(pfn))
pte_size <<= compound_order(page); return -EFAULT;
} page = NULL;
/* if the guest wants write access, see if that is OK */ if (pfn_valid(pfn)) {
if (!writing && hpte_is_writable(r)) { page = pfn_to_page(pfn);
pte_t *ptep, pte; if (PageReserved(page))
unsigned long flags; page = NULL;
/*
* We need to protect against page table destruction
* hugepage split and collapse.
*/
local_irq_save(flags);
ptep = find_current_mm_pte(mm->pgd, hva, NULL, NULL);
if (ptep) {
pte = kvmppc_read_update_linux_pte(ptep, 1);
if (__pte_write(pte))
write_ok = 1;
}
local_irq_restore(flags);
} }
} }
/*
* Read the PTE from the process' radix tree and use that
* so we get the shift and attribute bits.
*/
local_irq_disable();
ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
/*
* If the PTE disappeared temporarily due to a THP
* collapse, just return and let the guest try again.
*/
if (!ptep) {
local_irq_enable();
if (page)
put_page(page);
return RESUME_GUEST;
}
pte = *ptep;
local_irq_enable();
hpa = pte_pfn(pte) << PAGE_SHIFT;
pte_size = PAGE_SIZE;
if (shift)
pte_size = 1ul << shift;
is_ci = pte_ci(pte);
if (psize > pte_size) if (psize > pte_size)
goto out_put; goto out_put;
if (pte_size > psize)
hpa |= hva & (pte_size - psize);
/* Check WIMG vs. the actual page we're accessing */ /* Check WIMG vs. the actual page we're accessing */
if (!hpte_cache_flags_ok(r, is_ci)) { if (!hpte_cache_flags_ok(r, is_ci)) {
...@@ -636,14 +639,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -636,14 +639,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
} }
/* /*
* Set the HPTE to point to pfn. * Set the HPTE to point to hpa.
* Since the pfn is at PAGE_SIZE granularity, make sure we * Since the hpa is at PAGE_SIZE granularity, make sure we
* don't mask out lower-order bits if psize < PAGE_SIZE. * don't mask out lower-order bits if psize < PAGE_SIZE.
*/ */
if (psize < PAGE_SIZE) if (psize < PAGE_SIZE)
psize = PAGE_SIZE; psize = PAGE_SIZE;
r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | hpa;
((pfn << PAGE_SHIFT) & ~(psize - 1));
if (hpte_is_writable(r) && !write_ok) if (hpte_is_writable(r) && !write_ok)
r = hpte_make_readonly(r); r = hpte_make_readonly(r);
ret = RESUME_GUEST; ret = RESUME_GUEST;
...@@ -708,20 +710,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -708,20 +710,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
asm volatile("ptesync" : : : "memory"); asm volatile("ptesync" : : : "memory");
preempt_enable(); preempt_enable();
if (page && hpte_is_writable(r)) if (page && hpte_is_writable(r))
SetPageDirty(page); set_page_dirty_lock(page);
out_put: out_put:
trace_kvm_page_fault_exit(vcpu, hpte, ret); trace_kvm_page_fault_exit(vcpu, hpte, ret);
if (page) { if (page)
/* put_page(page);
* We drop pages[0] here, not page because page might
* have been set to the head page of a compound, but
* we have to drop the reference on the correct tail
* page to match the get inside gup()
*/
put_page(pages[0]);
}
return ret; return ret;
out_unlock: out_unlock:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment