Commit d048c098 authored by Radim Krčmář's avatar Radim Krčmář Committed by Paolo Bonzini

KVM: nVMX: fix msr bitmaps to prevent L2 from accessing L0 x2APIC

msr bitmap can be used to avoid a VM exit (interception) on guest MSR
accesses.  In some configurations of VMX controls, the guest can even
directly access host's x2APIC MSRs.  See SDM 29.5 VIRTUALIZING MSR-BASED
APIC ACCESSES.

L2 could read all L0's x2APIC MSRs and write TPR, EOI, and SELF_IPI.
To do so, L1 would first trick KVM to disable all possible interceptions
by enabling APICv features and then would turn those features off;
nested_vmx_merge_msr_bitmap() only disabled interceptions, so VMX would
not intercept previously enabled MSRs even though they were not safe
with the new configuration.

Correctly re-enabling interceptions is not enough as a second bug would
still allow L1+L2 to access host's MSRs: msr bitmap was shared for all
VMCSs, so L1 could trigger a race to get the desired combination of msr
bitmap and VMX controls.

This fix allocates a msr bitmap for every L1 VCPU, allows only safe
x2APIC MSRs from L1's msr bitmap, and disables msr bitmaps if they would
have to intercept everything anyway.

Fixes: 3af18d9c ("KVM: nVMX: Prepare for using hardware MSR bitmap")
Reported-by: default avatarJim Mattson <jmattson@google.com>
Suggested-by: default avatarWincy Van <fanwenyi0529@gmail.com>
Reviewed-by: default avatarWanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: default avatarRadim Krčmář <rkrcmar@redhat.com>
parent 184ca823
...@@ -435,6 +435,8 @@ struct nested_vmx { ...@@ -435,6 +435,8 @@ struct nested_vmx {
bool pi_pending; bool pi_pending;
u16 posted_intr_nv; u16 posted_intr_nv;
unsigned long *msr_bitmap;
struct hrtimer preemption_timer; struct hrtimer preemption_timer;
bool preemption_timer_expired; bool preemption_timer_expired;
...@@ -924,7 +926,6 @@ static unsigned long *vmx_msr_bitmap_legacy; ...@@ -924,7 +926,6 @@ static unsigned long *vmx_msr_bitmap_legacy;
static unsigned long *vmx_msr_bitmap_longmode; static unsigned long *vmx_msr_bitmap_longmode;
static unsigned long *vmx_msr_bitmap_legacy_x2apic; static unsigned long *vmx_msr_bitmap_legacy_x2apic;
static unsigned long *vmx_msr_bitmap_longmode_x2apic; static unsigned long *vmx_msr_bitmap_longmode_x2apic;
static unsigned long *vmx_msr_bitmap_nested;
static unsigned long *vmx_vmread_bitmap; static unsigned long *vmx_vmread_bitmap;
static unsigned long *vmx_vmwrite_bitmap; static unsigned long *vmx_vmwrite_bitmap;
...@@ -2508,7 +2509,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) ...@@ -2508,7 +2509,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
unsigned long *msr_bitmap; unsigned long *msr_bitmap;
if (is_guest_mode(vcpu)) if (is_guest_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_nested; msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
else if (cpu_has_secondary_exec_ctrls() && else if (cpu_has_secondary_exec_ctrls() &&
(vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
...@@ -6363,13 +6364,6 @@ static __init int hardware_setup(void) ...@@ -6363,13 +6364,6 @@ static __init int hardware_setup(void)
if (!vmx_msr_bitmap_longmode_x2apic) if (!vmx_msr_bitmap_longmode_x2apic)
goto out4; goto out4;
if (nested) {
vmx_msr_bitmap_nested =
(unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_msr_bitmap_nested)
goto out5;
}
vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_vmread_bitmap) if (!vmx_vmread_bitmap)
goto out6; goto out6;
...@@ -6392,8 +6386,6 @@ static __init int hardware_setup(void) ...@@ -6392,8 +6386,6 @@ static __init int hardware_setup(void)
memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
if (nested)
memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
if (setup_vmcs_config(&vmcs_config) < 0) { if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO; r = -EIO;
...@@ -6529,9 +6521,6 @@ static __init int hardware_setup(void) ...@@ -6529,9 +6521,6 @@ static __init int hardware_setup(void)
out7: out7:
free_page((unsigned long)vmx_vmread_bitmap); free_page((unsigned long)vmx_vmread_bitmap);
out6: out6:
if (nested)
free_page((unsigned long)vmx_msr_bitmap_nested);
out5:
free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
out4: out4:
free_page((unsigned long)vmx_msr_bitmap_longmode); free_page((unsigned long)vmx_msr_bitmap_longmode);
...@@ -6557,8 +6546,6 @@ static __exit void hardware_unsetup(void) ...@@ -6557,8 +6546,6 @@ static __exit void hardware_unsetup(void)
free_page((unsigned long)vmx_io_bitmap_a); free_page((unsigned long)vmx_io_bitmap_a);
free_page((unsigned long)vmx_vmwrite_bitmap); free_page((unsigned long)vmx_vmwrite_bitmap);
free_page((unsigned long)vmx_vmread_bitmap); free_page((unsigned long)vmx_vmread_bitmap);
if (nested)
free_page((unsigned long)vmx_msr_bitmap_nested);
free_kvm_area(); free_kvm_area();
} }
...@@ -6995,16 +6982,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu) ...@@ -6995,16 +6982,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
return 1; return 1;
} }
if (cpu_has_vmx_msr_bitmap()) {
vmx->nested.msr_bitmap =
(unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx->nested.msr_bitmap)
goto out_msr_bitmap;
}
vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
if (!vmx->nested.cached_vmcs12) if (!vmx->nested.cached_vmcs12)
return -ENOMEM; goto out_cached_vmcs12;
if (enable_shadow_vmcs) { if (enable_shadow_vmcs) {
shadow_vmcs = alloc_vmcs(); shadow_vmcs = alloc_vmcs();
if (!shadow_vmcs) { if (!shadow_vmcs)
kfree(vmx->nested.cached_vmcs12); goto out_shadow_vmcs;
return -ENOMEM;
}
/* mark vmcs as shadow */ /* mark vmcs as shadow */
shadow_vmcs->revision_id |= (1u << 31); shadow_vmcs->revision_id |= (1u << 31);
/* init shadow vmcs */ /* init shadow vmcs */
...@@ -7024,6 +7016,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu) ...@@ -7024,6 +7016,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
skip_emulated_instruction(vcpu); skip_emulated_instruction(vcpu);
nested_vmx_succeed(vcpu); nested_vmx_succeed(vcpu);
return 1; return 1;
out_shadow_vmcs:
kfree(vmx->nested.cached_vmcs12);
out_cached_vmcs12:
free_page((unsigned long)vmx->nested.msr_bitmap);
out_msr_bitmap:
return -ENOMEM;
} }
/* /*
...@@ -7098,6 +7099,10 @@ static void free_nested(struct vcpu_vmx *vmx) ...@@ -7098,6 +7099,10 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx->nested.vmxon = false; vmx->nested.vmxon = false;
free_vpid(vmx->nested.vpid02); free_vpid(vmx->nested.vpid02);
nested_release_vmcs12(vmx); nested_release_vmcs12(vmx);
if (vmx->nested.msr_bitmap) {
free_page((unsigned long)vmx->nested.msr_bitmap);
vmx->nested.msr_bitmap = NULL;
}
if (enable_shadow_vmcs) if (enable_shadow_vmcs)
free_vmcs(vmx->nested.current_shadow_vmcs); free_vmcs(vmx->nested.current_shadow_vmcs);
kfree(vmx->nested.cached_vmcs12); kfree(vmx->nested.cached_vmcs12);
...@@ -9472,8 +9477,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, ...@@ -9472,8 +9477,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
{ {
int msr; int msr;
struct page *page; struct page *page;
unsigned long *msr_bitmap; unsigned long *msr_bitmap_l1;
unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
/* This shortcut is ok because we support only x2APIC MSRs so far. */
if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
return false; return false;
...@@ -9482,63 +9489,37 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, ...@@ -9482,63 +9489,37 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
WARN_ON(1); WARN_ON(1);
return false; return false;
} }
msr_bitmap = (unsigned long *)kmap(page); msr_bitmap_l1 = (unsigned long *)kmap(page);
if (!msr_bitmap) { if (!msr_bitmap_l1) {
nested_release_page_clean(page); nested_release_page_clean(page);
WARN_ON(1); WARN_ON(1);
return false; return false;
} }
memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
if (nested_cpu_has_virt_x2apic_mode(vmcs12)) { if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
if (nested_cpu_has_apic_reg_virt(vmcs12)) if (nested_cpu_has_apic_reg_virt(vmcs12))
for (msr = 0x800; msr <= 0x8ff; msr++) for (msr = 0x800; msr <= 0x8ff; msr++)
nested_vmx_disable_intercept_for_msr( nested_vmx_disable_intercept_for_msr(
msr_bitmap, msr_bitmap_l1, msr_bitmap_l0,
vmx_msr_bitmap_nested,
msr, MSR_TYPE_R); msr, MSR_TYPE_R);
/* TPR is allowed */
nested_vmx_disable_intercept_for_msr(msr_bitmap, nested_vmx_disable_intercept_for_msr(
vmx_msr_bitmap_nested, msr_bitmap_l1, msr_bitmap_l0,
APIC_BASE_MSR + (APIC_TASKPRI >> 4), APIC_BASE_MSR + (APIC_TASKPRI >> 4),
MSR_TYPE_R | MSR_TYPE_W); MSR_TYPE_R | MSR_TYPE_W);
if (nested_cpu_has_vid(vmcs12)) { if (nested_cpu_has_vid(vmcs12)) {
/* EOI and self-IPI are allowed */
nested_vmx_disable_intercept_for_msr( nested_vmx_disable_intercept_for_msr(
msr_bitmap, msr_bitmap_l1, msr_bitmap_l0,
vmx_msr_bitmap_nested,
APIC_BASE_MSR + (APIC_EOI >> 4), APIC_BASE_MSR + (APIC_EOI >> 4),
MSR_TYPE_W); MSR_TYPE_W);
nested_vmx_disable_intercept_for_msr( nested_vmx_disable_intercept_for_msr(
msr_bitmap, msr_bitmap_l1, msr_bitmap_l0,
vmx_msr_bitmap_nested,
APIC_BASE_MSR + (APIC_SELF_IPI >> 4), APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
MSR_TYPE_W); MSR_TYPE_W);
} }
} else {
/*
* Enable reading intercept of all the x2apic
* MSRs. We should not rely on vmcs12 to do any
* optimizations here, it may have been modified
* by L1.
*/
for (msr = 0x800; msr <= 0x8ff; msr++)
__vmx_enable_intercept_for_msr(
vmx_msr_bitmap_nested,
msr,
MSR_TYPE_R);
__vmx_enable_intercept_for_msr(
vmx_msr_bitmap_nested,
APIC_BASE_MSR + (APIC_TASKPRI >> 4),
MSR_TYPE_W);
__vmx_enable_intercept_for_msr(
vmx_msr_bitmap_nested,
APIC_BASE_MSR + (APIC_EOI >> 4),
MSR_TYPE_W);
__vmx_enable_intercept_for_msr(
vmx_msr_bitmap_nested,
APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
MSR_TYPE_W);
} }
kunmap(page); kunmap(page);
nested_release_page_clean(page); nested_release_page_clean(page);
...@@ -9957,10 +9938,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) ...@@ -9957,10 +9938,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
} }
if (cpu_has_vmx_msr_bitmap() && if (cpu_has_vmx_msr_bitmap() &&
exec_control & CPU_BASED_USE_MSR_BITMAPS) { exec_control & CPU_BASED_USE_MSR_BITMAPS &&
nested_vmx_merge_msr_bitmap(vcpu, vmcs12); nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
/* MSR_BITMAP will be set by following vmx_set_efer. */ ; /* MSR_BITMAP will be set by following vmx_set_efer. */
} else else
exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment