Commit caf6900f authored by Xiao Guangrong's avatar Xiao Guangrong Committed by Marcelo Tosatti

KVM: MMU: lazily drop large spte

Currently, kvm zaps the large spte if write-protected is needed, the later
read can fault on that spte. Actually, we can make the large spte readonly
instead of making them not present, the page fault caused by read access can
be avoided

The idea is from Avi:
| As I mentioned before, write-protecting a large spte is a good idea,
| since it moves some work from protect-time to fault-time, so it reduces
| jitter.  This removes the need for the return value.
Reviewed-by: default avatarGleb Natapov <gleb@redhat.com>
Signed-off-by: default avatarXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
parent 5037878e
...@@ -1106,8 +1106,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) ...@@ -1106,8 +1106,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
/* /*
* Write-protect on the specified @sptep, @pt_protect indicates whether * Write-protect on the specified @sptep, @pt_protect indicates whether
* spte writ-protection is caused by protecting shadow page table. * spte write-protection is caused by protecting shadow page table.
* @flush indicates whether tlb need be flushed.
* *
* Note: write protection is difference between drity logging and spte * Note: write protection is difference between drity logging and spte
* protection: * protection:
...@@ -1116,10 +1115,9 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) ...@@ -1116,10 +1115,9 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
* - for spte protection, the spte can be writable only after unsync-ing * - for spte protection, the spte can be writable only after unsync-ing
* shadow page. * shadow page.
* *
* Return true if the spte is dropped. * Return true if tlb need be flushed.
*/ */
static bool static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect)
spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
{ {
u64 spte = *sptep; u64 spte = *sptep;
...@@ -1129,17 +1127,11 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) ...@@ -1129,17 +1127,11 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep);
if (__drop_large_spte(kvm, sptep)) {
*flush |= true;
return true;
}
if (pt_protect) if (pt_protect)
spte &= ~SPTE_MMU_WRITEABLE; spte &= ~SPTE_MMU_WRITEABLE;
spte = spte & ~PT_WRITABLE_MASK; spte = spte & ~PT_WRITABLE_MASK;
*flush |= mmu_spte_update(sptep, spte); return mmu_spte_update(sptep, spte);
return false;
} }
static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
...@@ -1151,11 +1143,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, ...@@ -1151,11 +1143,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
BUG_ON(!(*sptep & PT_PRESENT_MASK)); BUG_ON(!(*sptep & PT_PRESENT_MASK));
if (spte_write_protect(kvm, sptep, &flush, pt_protect)) {
sptep = rmap_get_first(*rmapp, &iter);
continue;
}
flush |= spte_write_protect(kvm, sptep, pt_protect);
sptep = rmap_get_next(&iter); sptep = rmap_get_next(&iter);
} }
...@@ -2596,6 +2585,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, ...@@ -2596,6 +2585,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
break; break;
} }
drop_large_spte(vcpu, iterator.sptep);
if (!is_shadow_present_pte(*iterator.sptep)) { if (!is_shadow_present_pte(*iterator.sptep)) {
u64 base_addr = iterator.addr; u64 base_addr = iterator.addr;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment