Commit edc90b7d authored by Xiao Guangrong's avatar Xiao Guangrong Committed by Paolo Bonzini

KVM: MMU: fix SMAP virtualization

KVM may turn a user page to a kernel page when kernel writes a readonly
user page if CR0.WP = 1. This shadow page entry will be reused after
SMAP is enabled so that kernel is allowed to access this user page

Fix it by setting SMAP && !CR0.WP into shadow page's role and reset mmu
once CR4.SMAP is updated
Signed-off-by: default avatarXiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 428e3d08
...@@ -169,6 +169,10 @@ Shadow pages contain the following information: ...@@ -169,6 +169,10 @@ Shadow pages contain the following information:
Contains the value of cr4.smep && !cr0.wp for which the page is valid Contains the value of cr4.smep && !cr0.wp for which the page is valid
(pages for which this is true are different from other pages; see the (pages for which this is true are different from other pages; see the
treatment of cr0.wp=0 below). treatment of cr0.wp=0 below).
role.smap_andnot_wp:
Contains the value of cr4.smap && !cr0.wp for which the page is valid
(pages for which this is true are different from other pages; see the
treatment of cr0.wp=0 below).
gfn: gfn:
Either the guest page table containing the translations shadowed by this Either the guest page table containing the translations shadowed by this
page, or the base page frame for linear translations. See role.direct. page, or the base page frame for linear translations. See role.direct.
...@@ -344,10 +348,16 @@ on fault type: ...@@ -344,10 +348,16 @@ on fault type:
(user write faults generate a #PF) (user write faults generate a #PF)
In the first case there is an additional complication if CR4.SMEP is In the first case there are two additional complications:
enabled: since we've turned the page into a kernel page, the kernel may now - if CR4.SMEP is enabled: since we've turned the page into a kernel page,
execute it. We handle this by also setting spte.nx. If we get a user the kernel may now execute it. We handle this by also setting spte.nx.
fetch or read fault, we'll change spte.u=1 and spte.nx=gpte.nx back. If we get a user fetch or read fault, we'll change spte.u=1 and
spte.nx=gpte.nx back.
- if CR4.SMAP is disabled: since the page has been changed to a kernel
page, it can not be reused when CR4.SMAP is enabled. We set
CR4.SMAP && !CR0.WP into shadow page's role to avoid this case. Note,
here we do not care the case that CR4.SMAP is enabled since KVM will
directly inject #PF to guest due to failed permission check.
To prevent an spte that was converted into a kernel page with cr0.wp=0 To prevent an spte that was converted into a kernel page with cr0.wp=0
from being written by the kernel after cr0.wp has changed to 1, we make from being written by the kernel after cr0.wp has changed to 1, we make
......
...@@ -207,6 +207,7 @@ union kvm_mmu_page_role { ...@@ -207,6 +207,7 @@ union kvm_mmu_page_role {
unsigned nxe:1; unsigned nxe:1;
unsigned cr0_wp:1; unsigned cr0_wp:1;
unsigned smep_andnot_wp:1; unsigned smep_andnot_wp:1;
unsigned smap_andnot_wp:1;
}; };
}; };
......
...@@ -3736,8 +3736,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, ...@@ -3736,8 +3736,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
} }
} }
void update_permission_bitmask(struct kvm_vcpu *vcpu, static void update_permission_bitmask(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu, bool ept) struct kvm_mmu *mmu, bool ept)
{ {
unsigned bit, byte, pfec; unsigned bit, byte, pfec;
u8 map; u8 map;
...@@ -3918,6 +3918,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) ...@@ -3918,6 +3918,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
{ {
bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
struct kvm_mmu *context = &vcpu->arch.mmu; struct kvm_mmu *context = &vcpu->arch.mmu;
MMU_WARN_ON(VALID_PAGE(context->root_hpa)); MMU_WARN_ON(VALID_PAGE(context->root_hpa));
...@@ -3936,6 +3937,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) ...@@ -3936,6 +3937,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
context->base_role.cr0_wp = is_write_protection(vcpu); context->base_role.cr0_wp = is_write_protection(vcpu);
context->base_role.smep_andnot_wp context->base_role.smep_andnot_wp
= smep && !is_write_protection(vcpu); = smep && !is_write_protection(vcpu);
context->base_role.smap_andnot_wp
= smap && !is_write_protection(vcpu);
} }
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
...@@ -4207,12 +4210,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -4207,12 +4210,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes) const u8 *new, int bytes)
{ {
gfn_t gfn = gpa >> PAGE_SHIFT; gfn_t gfn = gpa >> PAGE_SHIFT;
union kvm_mmu_page_role mask = { .word = 0 };
struct kvm_mmu_page *sp; struct kvm_mmu_page *sp;
LIST_HEAD(invalid_list); LIST_HEAD(invalid_list);
u64 entry, gentry, *spte; u64 entry, gentry, *spte;
int npte; int npte;
bool remote_flush, local_flush, zap_page; bool remote_flush, local_flush, zap_page;
union kvm_mmu_page_role mask = (union kvm_mmu_page_role) {
.cr0_wp = 1,
.cr4_pae = 1,
.nxe = 1,
.smep_andnot_wp = 1,
.smap_andnot_wp = 1,
};
/* /*
* If we don't have indirect shadow pages, it means no page is * If we don't have indirect shadow pages, it means no page is
...@@ -4238,7 +4247,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -4238,7 +4247,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
++vcpu->kvm->stat.mmu_pte_write; ++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
mask.cr0_wp = mask.cr4_pae = mask.nxe = mask.smep_andnot_wp = 1;
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
if (detect_write_misaligned(sp, gpa, bytes) || if (detect_write_misaligned(sp, gpa, bytes) ||
detect_write_flooding(sp)) { detect_write_flooding(sp)) {
......
...@@ -71,8 +71,6 @@ enum { ...@@ -71,8 +71,6 @@ enum {
int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
bool ept);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{ {
......
...@@ -702,8 +702,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr); ...@@ -702,8 +702,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{ {
unsigned long old_cr4 = kvm_read_cr4(vcpu); unsigned long old_cr4 = kvm_read_cr4(vcpu);
unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
X86_CR4_PAE | X86_CR4_SMEP; X86_CR4_SMEP | X86_CR4_SMAP;
if (cr4 & CR4_RESERVED_BITS) if (cr4 & CR4_RESERVED_BITS)
return 1; return 1;
...@@ -744,9 +745,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) ...@@ -744,9 +745,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
kvm_mmu_reset_context(vcpu); kvm_mmu_reset_context(vcpu);
if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
kvm_update_cpuid(vcpu); kvm_update_cpuid(vcpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment