Commit c126d94f authored by Xiao Guangrong's avatar Xiao Guangrong Committed by Marcelo Tosatti

KVM: MMU: lazily drop large spte

Currently, kvm zaps the large spte if write-protected is needed, the later
read can fault on that spte. Actually, we can make the large spte readonly
instead of making them un-present, the page fault caused by read access can
be avoided

The idea is from Avi:
| As I mentioned before, write-protecting a large spte is a good idea,
| since it moves some work from protect-time to fault-time, so it reduces
| jitter.  This removes the need for the return value.

This version has fixed the issue reported in 6b73a960, the reason of that
issue is that fast_page_fault() directly sets the readonly large spte to
writable but only dirty the first page into the dirty-bitmap that means
other pages are missed. Fixed it by only the normal sptes (on the
PT_PAGE_TABLE_LEVEL level) can be fast fixed
Reviewed-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: default avatarXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
parent 92a476cb
...@@ -1176,8 +1176,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) ...@@ -1176,8 +1176,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
/* /*
* Write-protect on the specified @sptep, @pt_protect indicates whether * Write-protect on the specified @sptep, @pt_protect indicates whether
* spte writ-protection is caused by protecting shadow page table. * spte write-protection is caused by protecting shadow page table.
* @flush indicates whether tlb need be flushed.
* *
* Note: write protection is difference between drity logging and spte * Note: write protection is difference between drity logging and spte
* protection: * protection:
...@@ -1186,10 +1185,9 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) ...@@ -1186,10 +1185,9 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
* - for spte protection, the spte can be writable only after unsync-ing * - for spte protection, the spte can be writable only after unsync-ing
* shadow page. * shadow page.
* *
* Return true if the spte is dropped. * Return true if tlb need be flushed.
*/ */
static bool static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect)
spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
{ {
u64 spte = *sptep; u64 spte = *sptep;
...@@ -1199,17 +1197,11 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) ...@@ -1199,17 +1197,11 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep);
if (__drop_large_spte(kvm, sptep)) {
*flush |= true;
return true;
}
if (pt_protect) if (pt_protect)
spte &= ~SPTE_MMU_WRITEABLE; spte &= ~SPTE_MMU_WRITEABLE;
spte = spte & ~PT_WRITABLE_MASK; spte = spte & ~PT_WRITABLE_MASK;
*flush |= mmu_spte_update(sptep, spte); return mmu_spte_update(sptep, spte);
return false;
} }
static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
...@@ -1221,11 +1213,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, ...@@ -1221,11 +1213,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
BUG_ON(!(*sptep & PT_PRESENT_MASK)); BUG_ON(!(*sptep & PT_PRESENT_MASK));
if (spte_write_protect(kvm, sptep, &flush, pt_protect)) {
sptep = rmap_get_first(*rmapp, &iter);
continue;
}
flush |= spte_write_protect(kvm, sptep, pt_protect);
sptep = rmap_get_next(&iter); sptep = rmap_get_next(&iter);
} }
...@@ -2876,6 +2865,19 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, ...@@ -2876,6 +2865,19 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
if (!spte_is_locklessly_modifiable(spte)) if (!spte_is_locklessly_modifiable(spte))
goto exit; goto exit;
/*
* Do not fix write-permission on the large spte since we only dirty
* the first page into the dirty-bitmap in fast_pf_fix_direct_spte()
* that means other pages are missed if its slot is dirty-logged.
*
* Instead, we let the slow page fault path create a normal spte to
* fix the access.
*
* See the comments in kvm_arch_commit_memory_region().
*/
if (sp->role.level > PT_PAGE_TABLE_LEVEL)
goto exit;
/* /*
* Currently, fast page fault only works for direct mapping since * Currently, fast page fault only works for direct mapping since
* the gfn is not stable for indirect shadow page. * the gfn is not stable for indirect shadow page.
......
...@@ -7315,8 +7315,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, ...@@ -7315,8 +7315,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
/* /*
* Write protect all pages for dirty logging. * Write protect all pages for dirty logging.
* Existing largepage mappings are destroyed here and new ones will *
* not be created until the end of the logging. * All the sptes including the large sptes which point to this
* slot are set to readonly. We can not create any new large
* spte on this slot until the end of the logging.
*
* See the comments in fast_page_fault().
*/ */
if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
kvm_mmu_slot_remove_write_access(kvm, mem->slot); kvm_mmu_slot_remove_write_access(kvm, mem->slot);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment