Commit 32ab5a5e authored by Marc Zyngier's avatar Marc Zyngier

Merge branch kvm-arm64/mmu/MMIO-block-mapping into kvmarm-master/next

MMIO block mapping support from Keqian Zhu, allowing larger
(and lazy) mappings for devices assigned to guests.

* kvm-arm64/mmu/MMIO-block-mapping:
  KVM: arm64: Try stage2 block mapping for host device MMIO
  KVM: arm64: Remove the creation time's mapping of MMIO regions
parents 32e92b71 2aa53d68
......@@ -822,6 +822,35 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
return PAGE_SIZE;
}
static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
{
unsigned long pa;
if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
return huge_page_shift(hstate_vma(vma));
if (!(vma->vm_flags & VM_PFNMAP))
return PAGE_SHIFT;
VM_BUG_ON(is_vm_hugetlb_page(vma));
pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);
#ifndef __PAGETABLE_PMD_FOLDED
if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
ALIGN(hva, PUD_SIZE) <= vma->vm_end)
return PUD_SHIFT;
#endif
if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
ALIGN(hva, PMD_SIZE) <= vma->vm_end)
return PMD_SHIFT;
return PAGE_SHIFT;
}
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
......@@ -853,7 +882,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
/* Let's check if we will get back a huge page backed by hugetlbfs */
/*
* Let's check if we will get back a huge page backed by hugetlbfs, or
* get block mapping for device MMIO region.
*/
mmap_read_lock(current->mm);
vma = find_vma_intersection(current->mm, hva, hva + 1);
if (unlikely(!vma)) {
......@@ -862,15 +894,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
if (is_vm_hugetlb_page(vma))
vma_shift = huge_page_shift(hstate_vma(vma));
else
vma_shift = PAGE_SHIFT;
if (logging_active ||
(vma->vm_flags & VM_PFNMAP)) {
/*
* logging_active is guaranteed to never be true for VM_PFNMAP
* memslots.
*/
if (logging_active) {
force_pte = true;
vma_shift = PAGE_SHIFT;
} else {
vma_shift = get_vma_page_shift(vma, hva);
}
switch (vma_shift) {
......@@ -943,8 +975,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
if (kvm_is_device_pfn(pfn)) {
/*
* If the page was identified as device early by looking at
* the VMA flags, vma_pagesize is already representing the
* largest quantity we can map. If instead it was mapped
* via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE
* and must not be upgraded.
*
* In both cases, we don't let transparent_hugepage_adjust()
* change things at the last minute.
*/
device = true;
force_pte = true;
} else if (logging_active && !write_fault) {
/*
* Only actually map the page as writable if this was a write
......@@ -965,7 +1006,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* If we are not forced to use page mapping, check if we are
* backed by a THP and thus use block mapping if possible.
*/
if (vma_pagesize == PAGE_SIZE && !force_pte)
if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
&pfn, &fault_ipa);
if (writable)
......@@ -1346,7 +1387,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
{
hva_t hva = mem->userspace_addr;
hva_t reg_end = hva + mem->memory_size;
bool writable = !(mem->flags & KVM_MEM_READONLY);
int ret = 0;
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
......@@ -1363,8 +1403,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
mmap_read_lock(current->mm);
/*
* A memory region could potentially cover multiple VMAs, and any holes
* between them, so iterate over all of them to find out if we can map
* any of them right now.
* between them, so iterate over all of them.
*
* +--------------------------------------------+
* +---------------+----------------+ +----------------+
......@@ -1375,51 +1414,21 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
*/
do {
struct vm_area_struct *vma;
hva_t vm_start, vm_end;
vma = find_vma_intersection(current->mm, hva, reg_end);
if (!vma)
break;
/*
* Take the intersection of this VMA with the memory region
*/
vm_start = max(hva, vma->vm_start);
vm_end = min(reg_end, vma->vm_end);
if (vma->vm_flags & VM_PFNMAP) {
gpa_t gpa = mem->guest_phys_addr +
(vm_start - mem->userspace_addr);
phys_addr_t pa;
pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
pa += vm_start - vma->vm_start;
/* IO region dirty page logging not allowed */
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;
goto out;
}
ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
vm_end - vm_start,
writable);
if (ret)
break;
}
}
hva = vm_end;
hva = min(reg_end, vma->vm_end);
} while (hva < reg_end);
if (change == KVM_MR_FLAGS_ONLY)
goto out;
spin_lock(&kvm->mmu_lock);
if (ret)
unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size);
else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
stage2_flush_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
out:
mmap_read_unlock(current->mm);
return ret;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment