Commit 37406aaa authored by Nadav Har'El's avatar Nadav Har'El Committed by Paolo Bonzini

nEPT: Add EPT tables support to paging_tmpl.h

This is the first patch in a series which adds nested EPT support to KVM's
nested VMX. Nested EPT means emulating EPT for an L1 guest so that L1 can use
EPT when running a nested guest L2. When L1 uses EPT, it allows the L2 guest
to set its own cr3 and take its own page faults without either of L0 or L1
getting involved. This often significanlty improves L2's performance over the
previous two alternatives (shadow page tables over EPT, and shadow page
tables over shadow page tables).

This patch adds EPT support to paging_tmpl.h.

paging_tmpl.h contains the code for reading and writing page tables. The code
for 32-bit and 64-bit tables is very similar, but not identical, so
paging_tmpl.h is #include'd twice in mmu.c, once with PTTTYPE=32 and once
with PTTYPE=64, and this generates the two sets of similar functions.

There are subtle but important differences between the format of EPT tables
and that of ordinary x86 64-bit page tables, so for nested EPT we need a
third set of functions to read the guest EPT table and to write the shadow
EPT table.

So this patch adds third PTTYPE, PTTYPE_EPT, which creates functions (prefixed
with "EPT") which correctly read and write EPT tables.
Reviewed-by: default avatarXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: default avatarNadav Har'El <nyh@il.ibm.com>
Signed-off-by: default avatarJun Nakajima <jun.nakajima@intel.com>
Signed-off-by: default avatarXinhao Xu <xinhao.xu@intel.com>
Signed-off-by: default avatarYang Zhang <yang.z.zhang@Intel.com>
Signed-off-by: default avatarGleb Natapov <gleb@redhat.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 61719a8f
...@@ -3494,6 +3494,11 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gp ...@@ -3494,6 +3494,11 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gp
return mmu->last_pte_bitmap & (1 << index); return mmu->last_pte_bitmap & (1 << index);
} }
#define PTTYPE_EPT 18 /* arbitrary */
#define PTTYPE PTTYPE_EPT
#include "paging_tmpl.h"
#undef PTTYPE
#define PTTYPE 64 #define PTTYPE 64
#include "paging_tmpl.h" #include "paging_tmpl.h"
#undef PTTYPE #undef PTTYPE
......
...@@ -23,6 +23,13 @@ ...@@ -23,6 +23,13 @@
* so the code in this file is compiled twice, once per pte size. * so the code in this file is compiled twice, once per pte size.
*/ */
/*
* This is used to catch non optimized PT_GUEST_(DIRTY|ACCESS)_SHIFT macro
* uses for EPT without A/D paging type.
*/
extern u64 __pure __using_nonexistent_pte_bit(void)
__compiletime_error("wrong use of PT_GUEST_(DIRTY|ACCESS)_SHIFT");
#if PTTYPE == 64 #if PTTYPE == 64
#define pt_element_t u64 #define pt_element_t u64
#define guest_walker guest_walker64 #define guest_walker guest_walker64
...@@ -58,6 +65,21 @@ ...@@ -58,6 +65,21 @@
#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
#define CMPXCHG cmpxchg #define CMPXCHG cmpxchg
#elif PTTYPE == PTTYPE_EPT
#define pt_element_t u64
#define guest_walker guest_walkerEPT
#define FNAME(name) ept_##name
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_BITS PT64_LEVEL_BITS
#define PT_GUEST_ACCESSED_MASK 0
#define PT_GUEST_DIRTY_MASK 0
#define PT_GUEST_DIRTY_SHIFT __using_nonexistent_pte_bit()
#define PT_GUEST_ACCESSED_SHIFT __using_nonexistent_pte_bit()
#define CMPXCHG cmpxchg64
#define PT_MAX_FULL_LEVELS 4
#else #else
#error Invalid PTTYPE value #error Invalid PTTYPE value
#endif #endif
...@@ -115,7 +137,11 @@ static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level) ...@@ -115,7 +137,11 @@ static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
static inline int FNAME(is_present_gpte)(unsigned long pte) static inline int FNAME(is_present_gpte)(unsigned long pte)
{ {
#if PTTYPE != PTTYPE_EPT
return is_present_gpte(pte); return is_present_gpte(pte);
#else
return pte & 7;
#endif
} }
static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
...@@ -165,9 +191,14 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, ...@@ -165,9 +191,14 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte) static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
{ {
unsigned access; unsigned access;
#if PTTYPE == PTTYPE_EPT
access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
ACC_USER_MASK;
#else
access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
access &= ~(gpte >> PT64_NX_SHIFT); access &= ~(gpte >> PT64_NX_SHIFT);
#endif
return access; return access;
} }
...@@ -369,6 +400,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, ...@@ -369,6 +400,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
access); access);
} }
#if PTTYPE != PTTYPE_EPT
static int FNAME(walk_addr_nested)(struct guest_walker *walker, static int FNAME(walk_addr_nested)(struct guest_walker *walker,
struct kvm_vcpu *vcpu, gva_t addr, struct kvm_vcpu *vcpu, gva_t addr,
u32 access) u32 access)
...@@ -376,6 +408,7 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker, ...@@ -376,6 +408,7 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker,
return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu, return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu,
addr, access); addr, access);
} }
#endif
static bool static bool
FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
...@@ -803,6 +836,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, ...@@ -803,6 +836,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
return gpa; return gpa;
} }
#if PTTYPE != PTTYPE_EPT
static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
u32 access, u32 access,
struct x86_exception *exception) struct x86_exception *exception)
...@@ -821,6 +855,7 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, ...@@ -821,6 +855,7 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
return gpa; return gpa;
} }
#endif
/* /*
* Using the cached information from sp->gfns is safe because: * Using the cached information from sp->gfns is safe because:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment