Commit aaee2c94 authored by Marcelo Tosatti's avatar Marcelo Tosatti Committed by Avi Kivity

KVM: MMU: Switch to mmu spinlock

Convert the synchronization of the shadow handling to a separate mmu_lock
spinlock.

Also guard fetch() by mmap_sem in read-mode to protect against alias
and memslot changes.
Signed-off-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: default avatarAvi Kivity <avi@qumranet.com>
parent d7824fff
...@@ -971,16 +971,12 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) ...@@ -971,16 +971,12 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
{ {
} }
static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
gfn_t gfn, struct page *page)
{ {
int level = PT32E_ROOT_LEVEL; int level = PT32E_ROOT_LEVEL;
hpa_t table_addr = vcpu->arch.mmu.root_hpa; hpa_t table_addr = vcpu->arch.mmu.root_hpa;
int pt_write = 0; int pt_write = 0;
struct page *page;
down_read(&current->mm->mmap_sem);
page = gfn_to_page(vcpu->kvm, gfn);
up_read(&current->mm->mmap_sem);
for (; ; level--) { for (; ; level--) {
u32 index = PT64_INDEX(v, level); u32 index = PT64_INDEX(v, level);
...@@ -1022,9 +1018,17 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) ...@@ -1022,9 +1018,17 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
{ {
int r; int r;
mutex_lock(&vcpu->kvm->lock); struct page *page;
r = __nonpaging_map(vcpu, v, write, gfn);
mutex_unlock(&vcpu->kvm->lock); down_read(&current->mm->mmap_sem);
page = gfn_to_page(vcpu->kvm, gfn);
spin_lock(&vcpu->kvm->mmu_lock);
r = __nonpaging_map(vcpu, v, write, gfn, page);
spin_unlock(&vcpu->kvm->mmu_lock);
up_read(&current->mm->mmap_sem);
return r; return r;
} }
...@@ -1045,7 +1049,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) ...@@ -1045,7 +1049,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return; return;
mutex_lock(&vcpu->kvm->lock); spin_lock(&vcpu->kvm->mmu_lock);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa; hpa_t root = vcpu->arch.mmu.root_hpa;
...@@ -1053,7 +1057,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) ...@@ -1053,7 +1057,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
sp = page_header(root); sp = page_header(root);
--sp->root_count; --sp->root_count;
vcpu->arch.mmu.root_hpa = INVALID_PAGE; vcpu->arch.mmu.root_hpa = INVALID_PAGE;
mutex_unlock(&vcpu->kvm->lock); spin_unlock(&vcpu->kvm->mmu_lock);
return; return;
} }
#endif #endif
...@@ -1067,7 +1071,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) ...@@ -1067,7 +1071,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
} }
vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
} }
mutex_unlock(&vcpu->kvm->lock); spin_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.root_hpa = INVALID_PAGE; vcpu->arch.mmu.root_hpa = INVALID_PAGE;
} }
...@@ -1270,9 +1274,9 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) ...@@ -1270,9 +1274,9 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
r = mmu_topup_memory_caches(vcpu); r = mmu_topup_memory_caches(vcpu);
if (r) if (r)
goto out; goto out;
mutex_lock(&vcpu->kvm->lock); spin_lock(&vcpu->kvm->mmu_lock);
mmu_alloc_roots(vcpu); mmu_alloc_roots(vcpu);
mutex_unlock(&vcpu->kvm->lock); spin_unlock(&vcpu->kvm->mmu_lock);
kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
kvm_mmu_flush_tlb(vcpu); kvm_mmu_flush_tlb(vcpu);
out: out:
...@@ -1408,7 +1412,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -1408,7 +1412,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes);
mutex_lock(&vcpu->kvm->lock); spin_lock(&vcpu->kvm->mmu_lock);
++vcpu->kvm->stat.mmu_pte_write; ++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, "pre pte write"); kvm_mmu_audit(vcpu, "pre pte write");
if (gfn == vcpu->arch.last_pt_write_gfn if (gfn == vcpu->arch.last_pt_write_gfn
...@@ -1477,7 +1481,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -1477,7 +1481,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
} }
} }
kvm_mmu_audit(vcpu, "post pte write"); kvm_mmu_audit(vcpu, "post pte write");
mutex_unlock(&vcpu->kvm->lock); spin_unlock(&vcpu->kvm->mmu_lock);
if (vcpu->arch.update_pte.page) { if (vcpu->arch.update_pte.page) {
kvm_release_page_clean(vcpu->arch.update_pte.page); kvm_release_page_clean(vcpu->arch.update_pte.page);
vcpu->arch.update_pte.page = NULL; vcpu->arch.update_pte.page = NULL;
...@@ -1493,15 +1497,15 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) ...@@ -1493,15 +1497,15 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
up_read(&current->mm->mmap_sem); up_read(&current->mm->mmap_sem);
mutex_lock(&vcpu->kvm->lock); spin_lock(&vcpu->kvm->mmu_lock);
r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
mutex_unlock(&vcpu->kvm->lock); spin_unlock(&vcpu->kvm->mmu_lock);
return r; return r;
} }
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
{ {
mutex_lock(&vcpu->kvm->lock); spin_lock(&vcpu->kvm->mmu_lock);
while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) { while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) {
struct kvm_mmu_page *sp; struct kvm_mmu_page *sp;
...@@ -1510,7 +1514,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) ...@@ -1510,7 +1514,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
kvm_mmu_zap_page(vcpu->kvm, sp); kvm_mmu_zap_page(vcpu->kvm, sp);
++vcpu->kvm->stat.mmu_recycled; ++vcpu->kvm->stat.mmu_recycled;
} }
mutex_unlock(&vcpu->kvm->lock); spin_unlock(&vcpu->kvm->mmu_lock);
} }
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
...@@ -1642,10 +1646,10 @@ void kvm_mmu_zap_all(struct kvm *kvm) ...@@ -1642,10 +1646,10 @@ void kvm_mmu_zap_all(struct kvm *kvm)
{ {
struct kvm_mmu_page *sp, *node; struct kvm_mmu_page *sp, *node;
mutex_lock(&kvm->lock); spin_lock(&kvm->mmu_lock);
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
kvm_mmu_zap_page(kvm, sp); kvm_mmu_zap_page(kvm, sp);
mutex_unlock(&kvm->lock); spin_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm); kvm_flush_remote_tlbs(kvm);
} }
......
...@@ -387,7 +387,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -387,7 +387,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
*/ */
r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
fetch_fault); fetch_fault);
up_read(&current->mm->mmap_sem);
/* /*
* The page is not mapped by the guest. Let the guest handle it. * The page is not mapped by the guest. Let the guest handle it.
...@@ -396,12 +395,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -396,12 +395,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
pgprintk("%s: guest page fault\n", __FUNCTION__); pgprintk("%s: guest page fault\n", __FUNCTION__);
inject_page_fault(vcpu, addr, walker.error_code); inject_page_fault(vcpu, addr, walker.error_code);
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
up_read(&current->mm->mmap_sem);
return 0; return 0;
} }
page = gfn_to_page(vcpu->kvm, walker.gfn); page = gfn_to_page(vcpu->kvm, walker.gfn);
mutex_lock(&vcpu->kvm->lock); spin_lock(&vcpu->kvm->mmu_lock);
shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
&write_pt, page); &write_pt, page);
pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
...@@ -414,13 +414,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -414,13 +414,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
* mmio: emulate if accessible, otherwise its a guest fault. * mmio: emulate if accessible, otherwise its a guest fault.
*/ */
if (shadow_pte && is_io_pte(*shadow_pte)) { if (shadow_pte && is_io_pte(*shadow_pte)) {
mutex_unlock(&vcpu->kvm->lock); spin_unlock(&vcpu->kvm->mmu_lock);
up_read(&current->mm->mmap_sem);
return 1; return 1;
} }
++vcpu->stat.pf_fixed; ++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, "post page fault (fixed)"); kvm_mmu_audit(vcpu, "post page fault (fixed)");
mutex_unlock(&vcpu->kvm->lock); spin_unlock(&vcpu->kvm->mmu_lock);
up_read(&current->mm->mmap_sem);
return write_pt; return write_pt;
} }
......
...@@ -1477,7 +1477,6 @@ static int alloc_apic_access_page(struct kvm *kvm) ...@@ -1477,7 +1477,6 @@ static int alloc_apic_access_page(struct kvm *kvm)
struct kvm_userspace_memory_region kvm_userspace_mem; struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0; int r = 0;
mutex_lock(&kvm->lock);
down_write(&current->mm->mmap_sem); down_write(&current->mm->mmap_sem);
if (kvm->arch.apic_access_page) if (kvm->arch.apic_access_page)
goto out; goto out;
...@@ -1491,7 +1490,6 @@ static int alloc_apic_access_page(struct kvm *kvm) ...@@ -1491,7 +1490,6 @@ static int alloc_apic_access_page(struct kvm *kvm)
kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
out: out:
up_write(&current->mm->mmap_sem); up_write(&current->mm->mmap_sem);
mutex_unlock(&kvm->lock);
return r; return r;
} }
......
...@@ -104,7 +104,8 @@ struct kvm_memory_slot { ...@@ -104,7 +104,8 @@ struct kvm_memory_slot {
}; };
struct kvm { struct kvm {
struct mutex lock; /* protects everything except vcpus */ struct mutex lock; /* protects the vcpus array and APIC accesses */
spinlock_t mmu_lock;
struct mm_struct *mm; /* userspace tied to this vm */ struct mm_struct *mm; /* userspace tied to this vm */
int nmemslots; int nmemslots;
struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
......
...@@ -165,6 +165,7 @@ static struct kvm *kvm_create_vm(void) ...@@ -165,6 +165,7 @@ static struct kvm *kvm_create_vm(void)
kvm->mm = current->mm; kvm->mm = current->mm;
atomic_inc(&kvm->mm->mm_count); atomic_inc(&kvm->mm->mm_count);
spin_lock_init(&kvm->mmu_lock);
kvm_io_bus_init(&kvm->pio_bus); kvm_io_bus_init(&kvm->pio_bus);
mutex_init(&kvm->lock); mutex_init(&kvm->lock);
kvm_io_bus_init(&kvm->mmio_bus); kvm_io_bus_init(&kvm->mmio_bus);
...@@ -552,9 +553,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, ...@@ -552,9 +553,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
addr = gfn_to_hva(kvm, gfn); addr = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(addr)) if (kvm_is_error_hva(addr))
return -EFAULT; return -EFAULT;
pagefault_disable();
r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
pagefault_enable();
if (r) if (r)
return -EFAULT; return -EFAULT;
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment