Commit 365c8868 authored by Xiao Guangrong's avatar Xiao Guangrong Committed by Gleb Natapov

KVM: MMU: reclaim the zapped-obsolete page first

As Marcelo pointed out that
| "(retention of large number of pages while zapping)
| can be fatal, it can lead to OOM and host crash"

We introduce a list, kvm->arch.zapped_obsolete_pages, to link all
the pages which are deleted from the mmu cache but not actually
freed. When page reclaiming is needed, we always zap this kind of
pages first.
Signed-off-by: default avatarXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Reviewed-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: default avatarGleb Natapov <gleb@redhat.com>
parent f34d251d
...@@ -536,6 +536,8 @@ struct kvm_arch { ...@@ -536,6 +536,8 @@ struct kvm_arch {
* Hash table of struct kvm_mmu_page. * Hash table of struct kvm_mmu_page.
*/ */
struct list_head active_mmu_pages; struct list_head active_mmu_pages;
struct list_head zapped_obsolete_pages;
struct list_head assigned_dev_head; struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain; struct iommu_domain *iommu_domain;
int iommu_flags; int iommu_flags;
......
...@@ -4211,7 +4211,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) ...@@ -4211,7 +4211,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
static void kvm_zap_obsolete_pages(struct kvm *kvm) static void kvm_zap_obsolete_pages(struct kvm *kvm)
{ {
struct kvm_mmu_page *sp, *node; struct kvm_mmu_page *sp, *node;
LIST_HEAD(invalid_list);
int batch = 0; int batch = 0;
restart: restart:
...@@ -4244,7 +4243,8 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm) ...@@ -4244,7 +4243,8 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm)
goto restart; goto restart;
} }
ret = kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); ret = kvm_mmu_prepare_zap_page(kvm, sp,
&kvm->arch.zapped_obsolete_pages);
batch += ret; batch += ret;
if (ret) if (ret)
...@@ -4255,7 +4255,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm) ...@@ -4255,7 +4255,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm)
* Should flush tlb before free page tables since lockless-walking * Should flush tlb before free page tables since lockless-walking
* may use the pages. * may use the pages.
*/ */
kvm_mmu_commit_zap_page(kvm, &invalid_list); kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
} }
/* /*
...@@ -4306,6 +4306,11 @@ void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) ...@@ -4306,6 +4306,11 @@ void kvm_mmu_zap_mmio_sptes(struct kvm *kvm)
spin_unlock(&kvm->mmu_lock); spin_unlock(&kvm->mmu_lock);
} }
static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
{
return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
}
static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
{ {
struct kvm *kvm; struct kvm *kvm;
...@@ -4334,15 +4339,23 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) ...@@ -4334,15 +4339,23 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
* want to shrink a VM that only started to populate its MMU * want to shrink a VM that only started to populate its MMU
* anyway. * anyway.
*/ */
if (!kvm->arch.n_used_mmu_pages) if (!kvm->arch.n_used_mmu_pages &&
!kvm_has_zapped_obsolete_pages(kvm))
continue; continue;
idx = srcu_read_lock(&kvm->srcu); idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock); spin_lock(&kvm->mmu_lock);
if (kvm_has_zapped_obsolete_pages(kvm)) {
kvm_mmu_commit_zap_page(kvm,
&kvm->arch.zapped_obsolete_pages);
goto unlock;
}
prepare_zap_oldest_mmu_page(kvm, &invalid_list); prepare_zap_oldest_mmu_page(kvm, &invalid_list);
kvm_mmu_commit_zap_page(kvm, &invalid_list); kvm_mmu_commit_zap_page(kvm, &invalid_list);
unlock:
spin_unlock(&kvm->mmu_lock); spin_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx); srcu_read_unlock(&kvm->srcu, idx);
......
...@@ -6832,6 +6832,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ...@@ -6832,6 +6832,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return -EINVAL; return -EINVAL;
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment