Commit 8dcf5782 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'kvm-updates-2.6.26' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm

* 'kvm-updates-2.6.26' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm:
  x86: KVM geust: make setup_secondary_clock definition dependent on local apic
  KVM: MMU: Allow more than PAGES_PER_HPAGE write protections per large page
  KVM: avoid fx_init() schedule in atomic
  KVM: Avoid spurious execeptions after setting registers
  KVM: PIT: support mode 4
  KVM: x86 emulator: disable writeback on lmsw
  KVM: ppc: deliver INTERRUPT_FP_UNAVAIL to the guest
  KVM: ppc: Handle guest idle by emulating MSR[WE] writes
  KVM: x86: task switch: fix wrong bit setting for the busy flag
  KVM: VMX: Enable EPT feature for KVM
  KVM: VMX: Prepare an identity page table for EPT in real mode
  KVM: Export necessary function for EPT
  KVM: MMU: Remove #ifdef CONFIG_X86_64 to support 4 level EPT
  KVM: MMU: Add EPT support
  KVM: Add kvm_x86_ops get_tdp_level()
  KVM: MMU: Move some definitions to a header file
  KVM: VMX: EPT Feature Detection
parents e73b65f1 b8ba5f10
...@@ -49,6 +49,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { ...@@ -49,6 +49,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "inst_emu", VCPU_STAT(emulated_inst_exits) }, { "inst_emu", VCPU_STAT(emulated_inst_exits) },
{ "dec", VCPU_STAT(dec_exits) }, { "dec", VCPU_STAT(dec_exits) },
{ "ext_intr", VCPU_STAT(ext_intr_exits) }, { "ext_intr", VCPU_STAT(ext_intr_exits) },
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ NULL } { NULL }
}; };
...@@ -338,6 +339,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -338,6 +339,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
} }
break; break;
case BOOKE_INTERRUPT_FP_UNAVAIL:
kvmppc_queue_exception(vcpu, exit_nr);
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_DATA_STORAGE: case BOOKE_INTERRUPT_DATA_STORAGE:
vcpu->arch.dear = vcpu->arch.fault_dear; vcpu->arch.dear = vcpu->arch.fault_dear;
vcpu->arch.esr = vcpu->arch.fault_esr; vcpu->arch.esr = vcpu->arch.fault_esr;
......
...@@ -36,13 +36,12 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) ...@@ -36,13 +36,12 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
int kvm_cpu_has_interrupt(struct kvm_vcpu *v) int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{ {
/* XXX implement me */ return !!(v->arch.pending_exceptions);
return 0;
} }
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{ {
return 1; return !(v->arch.msr & MSR_WE);
} }
...@@ -214,6 +213,11 @@ static void kvmppc_decrementer_func(unsigned long data) ...@@ -214,6 +213,11 @@ static void kvmppc_decrementer_func(unsigned long data)
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
vcpu->stat.halt_wakeup++;
}
} }
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
...@@ -339,6 +343,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ...@@ -339,6 +343,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
int r; int r;
sigset_t sigsaved; sigset_t sigsaved;
vcpu_load(vcpu);
if (vcpu->sigset_active) if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
...@@ -363,12 +369,20 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ...@@ -363,12 +369,20 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (vcpu->sigset_active) if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL); sigprocmask(SIG_SETMASK, &sigsaved, NULL);
vcpu_put(vcpu);
return r; return r;
} }
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
{ {
kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
vcpu->stat.halt_wakeup++;
}
return 0; return 0;
} }
......
...@@ -133,6 +133,7 @@ static int kvm_register_clock(void) ...@@ -133,6 +133,7 @@ static int kvm_register_clock(void)
return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
} }
#ifdef CONFIG_X86_LOCAL_APIC
static void kvm_setup_secondary_clock(void) static void kvm_setup_secondary_clock(void)
{ {
/* /*
...@@ -143,6 +144,7 @@ static void kvm_setup_secondary_clock(void) ...@@ -143,6 +144,7 @@ static void kvm_setup_secondary_clock(void)
/* ok, done with our trickery, call native */ /* ok, done with our trickery, call native */
setup_secondary_APIC_clock(); setup_secondary_APIC_clock();
} }
#endif
/* /*
* After the clock is registered, the host will keep writing to the * After the clock is registered, the host will keep writing to the
...@@ -177,7 +179,9 @@ void __init kvmclock_init(void) ...@@ -177,7 +179,9 @@ void __init kvmclock_init(void)
pv_time_ops.get_wallclock = kvm_get_wallclock; pv_time_ops.get_wallclock = kvm_get_wallclock;
pv_time_ops.set_wallclock = kvm_set_wallclock; pv_time_ops.set_wallclock = kvm_set_wallclock;
pv_time_ops.sched_clock = kvm_clock_read; pv_time_ops.sched_clock = kvm_clock_read;
#ifdef CONFIG_X86_LOCAL_APIC
pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
#endif
machine_ops.shutdown = kvm_shutdown; machine_ops.shutdown = kvm_shutdown;
#ifdef CONFIG_KEXEC #ifdef CONFIG_KEXEC
machine_ops.crash_shutdown = kvm_crash_shutdown; machine_ops.crash_shutdown = kvm_crash_shutdown;
......
...@@ -288,6 +288,8 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) ...@@ -288,6 +288,8 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
* mode 1 is one shot, mode 2 is period, otherwise del timer */ * mode 1 is one shot, mode 2 is period, otherwise del timer */
switch (ps->channels[0].mode) { switch (ps->channels[0].mode) {
case 1: case 1:
/* FIXME: enhance mode 4 precision */
case 4:
create_pit_timer(&ps->pit_timer, val, 0); create_pit_timer(&ps->pit_timer, val, 0);
break; break;
case 2: case 2:
......
...@@ -79,36 +79,6 @@ static int dbg = 1; ...@@ -79,36 +79,6 @@ static int dbg = 1;
} }
#endif #endif
#define PT64_PT_BITS 9
#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
#define PT32_PT_BITS 10
#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
#define PT_WRITABLE_SHIFT 1
#define PT_PRESENT_MASK (1ULL << 0)
#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
#define PT_USER_MASK (1ULL << 2)
#define PT_PWT_MASK (1ULL << 3)
#define PT_PCD_MASK (1ULL << 4)
#define PT_ACCESSED_MASK (1ULL << 5)
#define PT_DIRTY_MASK (1ULL << 6)
#define PT_PAGE_SIZE_MASK (1ULL << 7)
#define PT_PAT_MASK (1ULL << 7)
#define PT_GLOBAL_MASK (1ULL << 8)
#define PT64_NX_SHIFT 63
#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
#define PT_PAT_SHIFT 7
#define PT_DIR_PAT_SHIFT 12
#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
#define PT32_DIR_PSE36_SIZE 4
#define PT32_DIR_PSE36_SHIFT 13
#define PT32_DIR_PSE36_MASK \
(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
#define PT_FIRST_AVAIL_BITS_SHIFT 9 #define PT_FIRST_AVAIL_BITS_SHIFT 9
#define PT64_SECOND_AVAIL_BITS_SHIFT 52 #define PT64_SECOND_AVAIL_BITS_SHIFT 52
...@@ -154,10 +124,6 @@ static int dbg = 1; ...@@ -154,10 +124,6 @@ static int dbg = 1;
#define PFERR_USER_MASK (1U << 2) #define PFERR_USER_MASK (1U << 2)
#define PFERR_FETCH_MASK (1U << 4) #define PFERR_FETCH_MASK (1U << 4)
#define PT64_ROOT_LEVEL 4
#define PT32_ROOT_LEVEL 2
#define PT32E_ROOT_LEVEL 3
#define PT_DIRECTORY_LEVEL 2 #define PT_DIRECTORY_LEVEL 2
#define PT_PAGE_TABLE_LEVEL 1 #define PT_PAGE_TABLE_LEVEL 1
...@@ -186,6 +152,12 @@ static struct kmem_cache *mmu_page_header_cache; ...@@ -186,6 +152,12 @@ static struct kmem_cache *mmu_page_header_cache;
static u64 __read_mostly shadow_trap_nonpresent_pte; static u64 __read_mostly shadow_trap_nonpresent_pte;
static u64 __read_mostly shadow_notrap_nonpresent_pte; static u64 __read_mostly shadow_notrap_nonpresent_pte;
static u64 __read_mostly shadow_base_present_pte;
static u64 __read_mostly shadow_nx_mask;
static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
static u64 __read_mostly shadow_user_mask;
static u64 __read_mostly shadow_accessed_mask;
static u64 __read_mostly shadow_dirty_mask;
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
{ {
...@@ -194,6 +166,23 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) ...@@ -194,6 +166,23 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
} }
EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes);
void kvm_mmu_set_base_ptes(u64 base_pte)
{
shadow_base_present_pte = base_pte;
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask)
{
shadow_user_mask = user_mask;
shadow_accessed_mask = accessed_mask;
shadow_dirty_mask = dirty_mask;
shadow_nx_mask = nx_mask;
shadow_x_mask = x_mask;
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
static int is_write_protection(struct kvm_vcpu *vcpu) static int is_write_protection(struct kvm_vcpu *vcpu)
{ {
return vcpu->arch.cr0 & X86_CR0_WP; return vcpu->arch.cr0 & X86_CR0_WP;
...@@ -232,7 +221,7 @@ static int is_writeble_pte(unsigned long pte) ...@@ -232,7 +221,7 @@ static int is_writeble_pte(unsigned long pte)
static int is_dirty_pte(unsigned long pte) static int is_dirty_pte(unsigned long pte)
{ {
return pte & PT_DIRTY_MASK; return pte & shadow_dirty_mask;
} }
static int is_rmap_pte(u64 pte) static int is_rmap_pte(u64 pte)
...@@ -387,7 +376,6 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) ...@@ -387,7 +376,6 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
*write_count += 1; *write_count += 1;
WARN_ON(*write_count > KVM_PAGES_PER_HPAGE);
} }
static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
...@@ -547,7 +535,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) ...@@ -547,7 +535,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
return; return;
sp = page_header(__pa(spte)); sp = page_header(__pa(spte));
pfn = spte_to_pfn(*spte); pfn = spte_to_pfn(*spte);
if (*spte & PT_ACCESSED_MASK) if (*spte & shadow_accessed_mask)
kvm_set_pfn_accessed(pfn); kvm_set_pfn_accessed(pfn);
if (is_writeble_pte(*spte)) if (is_writeble_pte(*spte))
kvm_release_pfn_dirty(pfn); kvm_release_pfn_dirty(pfn);
...@@ -1073,17 +1061,17 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, ...@@ -1073,17 +1061,17 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
* whether the guest actually used the pte (in order to detect * whether the guest actually used the pte (in order to detect
* demand paging). * demand paging).
*/ */
spte = PT_PRESENT_MASK | PT_DIRTY_MASK; spte = shadow_base_present_pte | shadow_dirty_mask;
if (!speculative) if (!speculative)
pte_access |= PT_ACCESSED_MASK; pte_access |= PT_ACCESSED_MASK;
if (!dirty) if (!dirty)
pte_access &= ~ACC_WRITE_MASK; pte_access &= ~ACC_WRITE_MASK;
if (!(pte_access & ACC_EXEC_MASK)) if (pte_access & ACC_EXEC_MASK)
spte |= PT64_NX_MASK; spte |= shadow_x_mask;
else
spte |= PT_PRESENT_MASK; spte |= shadow_nx_mask;
if (pte_access & ACC_USER_MASK) if (pte_access & ACC_USER_MASK)
spte |= PT_USER_MASK; spte |= shadow_user_mask;
if (largepage) if (largepage)
spte |= PT_PAGE_SIZE_MASK; spte |= PT_PAGE_SIZE_MASK;
...@@ -1188,8 +1176,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, ...@@ -1188,8 +1176,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
return -ENOMEM; return -ENOMEM;
} }
table[index] = __pa(new_table->spt) | PT_PRESENT_MASK table[index] = __pa(new_table->spt)
| PT_WRITABLE_MASK | PT_USER_MASK; | PT_PRESENT_MASK | PT_WRITABLE_MASK
| shadow_user_mask | shadow_x_mask;
} }
table_addr = table[index] & PT64_BASE_ADDR_MASK; table_addr = table[index] & PT64_BASE_ADDR_MASK;
} }
...@@ -1244,7 +1233,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) ...@@ -1244,7 +1233,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return; return;
spin_lock(&vcpu->kvm->mmu_lock); spin_lock(&vcpu->kvm->mmu_lock);
#ifdef CONFIG_X86_64
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa; hpa_t root = vcpu->arch.mmu.root_hpa;
...@@ -1256,7 +1244,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) ...@@ -1256,7 +1244,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
spin_unlock(&vcpu->kvm->mmu_lock); spin_unlock(&vcpu->kvm->mmu_lock);
return; return;
} }
#endif
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i]; hpa_t root = vcpu->arch.mmu.pae_root[i];
...@@ -1282,7 +1269,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) ...@@ -1282,7 +1269,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
#ifdef CONFIG_X86_64
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa; hpa_t root = vcpu->arch.mmu.root_hpa;
...@@ -1297,7 +1283,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) ...@@ -1297,7 +1283,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.root_hpa = root; vcpu->arch.mmu.root_hpa = root;
return; return;
} }
#endif
metaphysical = !is_paging(vcpu); metaphysical = !is_paging(vcpu);
if (tdp_enabled) if (tdp_enabled)
metaphysical = 1; metaphysical = 1;
...@@ -1377,7 +1362,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, ...@@ -1377,7 +1362,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
spin_lock(&vcpu->kvm->mmu_lock); spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu); kvm_mmu_free_some_pages(vcpu);
r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
largepage, gfn, pfn, TDP_ROOT_LEVEL); largepage, gfn, pfn, kvm_x86_ops->get_tdp_level());
spin_unlock(&vcpu->kvm->mmu_lock); spin_unlock(&vcpu->kvm->mmu_lock);
return r; return r;
...@@ -1484,7 +1469,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) ...@@ -1484,7 +1469,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->page_fault = tdp_page_fault; context->page_fault = tdp_page_fault;
context->free = nonpaging_free; context->free = nonpaging_free;
context->prefetch_page = nonpaging_prefetch_page; context->prefetch_page = nonpaging_prefetch_page;
context->shadow_root_level = TDP_ROOT_LEVEL; context->shadow_root_level = kvm_x86_ops->get_tdp_level();
context->root_hpa = INVALID_PAGE; context->root_hpa = INVALID_PAGE;
if (!is_paging(vcpu)) { if (!is_paging(vcpu)) {
...@@ -1633,7 +1618,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) ...@@ -1633,7 +1618,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
{ {
u64 *spte = vcpu->arch.last_pte_updated; u64 *spte = vcpu->arch.last_pte_updated;
return !!(spte && (*spte & PT_ACCESSED_MASK)); return !!(spte && (*spte & shadow_accessed_mask));
} }
static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
......
...@@ -3,11 +3,38 @@ ...@@ -3,11 +3,38 @@
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
#ifdef CONFIG_X86_64 #define PT64_PT_BITS 9
#define TDP_ROOT_LEVEL PT64_ROOT_LEVEL #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
#else #define PT32_PT_BITS 10
#define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL #define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
#endif
#define PT_WRITABLE_SHIFT 1
#define PT_PRESENT_MASK (1ULL << 0)
#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
#define PT_USER_MASK (1ULL << 2)
#define PT_PWT_MASK (1ULL << 3)
#define PT_PCD_MASK (1ULL << 4)
#define PT_ACCESSED_MASK (1ULL << 5)
#define PT_DIRTY_MASK (1ULL << 6)
#define PT_PAGE_SIZE_MASK (1ULL << 7)
#define PT_PAT_MASK (1ULL << 7)
#define PT_GLOBAL_MASK (1ULL << 8)
#define PT64_NX_SHIFT 63
#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
#define PT_PAT_SHIFT 7
#define PT_DIR_PAT_SHIFT 12
#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
#define PT32_DIR_PSE36_SIZE 4
#define PT32_DIR_PSE36_SHIFT 13
#define PT32_DIR_PSE36_MASK \
(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
#define PT64_ROOT_LEVEL 4
#define PT32_ROOT_LEVEL 2
#define PT32E_ROOT_LEVEL 3
static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
{ {
......
...@@ -1863,6 +1863,15 @@ static bool svm_cpu_has_accelerated_tpr(void) ...@@ -1863,6 +1863,15 @@ static bool svm_cpu_has_accelerated_tpr(void)
return false; return false;
} }
static int get_npt_level(void)
{
#ifdef CONFIG_X86_64
return PT64_ROOT_LEVEL;
#else
return PT32E_ROOT_LEVEL;
#endif
}
static struct kvm_x86_ops svm_x86_ops = { static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm, .cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled, .disabled_by_bios = is_disabled,
...@@ -1920,6 +1929,7 @@ static struct kvm_x86_ops svm_x86_ops = { ...@@ -1920,6 +1929,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.inject_pending_vectors = do_interrupt_requests, .inject_pending_vectors = do_interrupt_requests,
.set_tss_addr = svm_set_tss_addr, .set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
}; };
static int __init svm_init(void) static int __init svm_init(void)
......
This diff is collapsed.
...@@ -35,6 +35,8 @@ ...@@ -35,6 +35,8 @@
#define CPU_BASED_MWAIT_EXITING 0x00000400 #define CPU_BASED_MWAIT_EXITING 0x00000400
#define CPU_BASED_RDPMC_EXITING 0x00000800 #define CPU_BASED_RDPMC_EXITING 0x00000800
#define CPU_BASED_RDTSC_EXITING 0x00001000 #define CPU_BASED_RDTSC_EXITING 0x00001000
#define CPU_BASED_CR3_LOAD_EXITING 0x00008000
#define CPU_BASED_CR3_STORE_EXITING 0x00010000
#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 #define CPU_BASED_CR8_LOAD_EXITING 0x00080000
#define CPU_BASED_CR8_STORE_EXITING 0x00100000 #define CPU_BASED_CR8_STORE_EXITING 0x00100000
#define CPU_BASED_TPR_SHADOW 0x00200000 #define CPU_BASED_TPR_SHADOW 0x00200000
...@@ -49,6 +51,7 @@ ...@@ -49,6 +51,7 @@
* Definitions of Secondary Processor-Based VM-Execution Controls. * Definitions of Secondary Processor-Based VM-Execution Controls.
*/ */
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 #define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
...@@ -100,10 +103,22 @@ enum vmcs_field { ...@@ -100,10 +103,22 @@ enum vmcs_field {
VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
APIC_ACCESS_ADDR = 0x00002014, APIC_ACCESS_ADDR = 0x00002014,
APIC_ACCESS_ADDR_HIGH = 0x00002015, APIC_ACCESS_ADDR_HIGH = 0x00002015,
EPT_POINTER = 0x0000201a,
EPT_POINTER_HIGH = 0x0000201b,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
VMCS_LINK_POINTER = 0x00002800, VMCS_LINK_POINTER = 0x00002800,
VMCS_LINK_POINTER_HIGH = 0x00002801, VMCS_LINK_POINTER_HIGH = 0x00002801,
GUEST_IA32_DEBUGCTL = 0x00002802, GUEST_IA32_DEBUGCTL = 0x00002802,
GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
GUEST_PDPTR0 = 0x0000280a,
GUEST_PDPTR0_HIGH = 0x0000280b,
GUEST_PDPTR1 = 0x0000280c,
GUEST_PDPTR1_HIGH = 0x0000280d,
GUEST_PDPTR2 = 0x0000280e,
GUEST_PDPTR2_HIGH = 0x0000280f,
GUEST_PDPTR3 = 0x00002810,
GUEST_PDPTR3_HIGH = 0x00002811,
PIN_BASED_VM_EXEC_CONTROL = 0x00004000, PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
CPU_BASED_VM_EXEC_CONTROL = 0x00004002, CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
EXCEPTION_BITMAP = 0x00004004, EXCEPTION_BITMAP = 0x00004004,
...@@ -226,6 +241,8 @@ enum vmcs_field { ...@@ -226,6 +241,8 @@ enum vmcs_field {
#define EXIT_REASON_MWAIT_INSTRUCTION 36 #define EXIT_REASON_MWAIT_INSTRUCTION 36
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44 #define EXIT_REASON_APIC_ACCESS 44
#define EXIT_REASON_EPT_VIOLATION 48
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_WBINVD 54 #define EXIT_REASON_WBINVD 54
/* /*
...@@ -316,15 +333,36 @@ enum vmcs_field { ...@@ -316,15 +333,36 @@ enum vmcs_field {
#define MSR_IA32_VMX_CR4_FIXED1 0x489 #define MSR_IA32_VMX_CR4_FIXED1 0x489
#define MSR_IA32_VMX_VMCS_ENUM 0x48a #define MSR_IA32_VMX_VMCS_ENUM 0x48a
#define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b #define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b
#define MSR_IA32_VMX_EPT_VPID_CAP 0x48c
#define MSR_IA32_FEATURE_CONTROL 0x3a #define MSR_IA32_FEATURE_CONTROL 0x3a
#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 #define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1
#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4
#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9
#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10
#define VMX_NR_VPIDS (1 << 16) #define VMX_NR_VPIDS (1 << 16)
#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
#define VMX_VPID_EXTENT_ALL_CONTEXT 2 #define VMX_VPID_EXTENT_ALL_CONTEXT 2
#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0
#define VMX_EPT_EXTENT_CONTEXT 1
#define VMX_EPT_EXTENT_GLOBAL 2
#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
#define VMX_EPT_DEFAULT_GAW 3
#define VMX_EPT_MAX_GAW 0x4
#define VMX_EPT_MT_EPTE_SHIFT 3
#define VMX_EPT_GAW_EPTP_SHIFT 3
#define VMX_EPT_DEFAULT_MT 0x6ull
#define VMX_EPT_READABLE_MASK 0x1ull
#define VMX_EPT_WRITABLE_MASK 0x2ull
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
#define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62)
#define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63)
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
#endif #endif
...@@ -2417,6 +2417,9 @@ int kvm_arch_init(void *opaque) ...@@ -2417,6 +2417,9 @@ int kvm_arch_init(void *opaque)
kvm_x86_ops = ops; kvm_x86_ops = ops;
kvm_mmu_set_nonpresent_ptes(0ull, 0ull); kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0);
return 0; return 0;
out: out:
...@@ -3019,6 +3022,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) ...@@ -3019,6 +3022,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
kvm_x86_ops->decache_regs(vcpu); kvm_x86_ops->decache_regs(vcpu);
vcpu->arch.exception.pending = false;
vcpu_put(vcpu); vcpu_put(vcpu);
return 0; return 0;
...@@ -3481,7 +3486,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) ...@@ -3481,7 +3486,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
} }
if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
cseg_desc.type &= ~(1 << 8); //clear the B flag cseg_desc.type &= ~(1 << 1); //clear the B flag
save_guest_segment_descriptor(vcpu, tr_seg.selector, save_guest_segment_descriptor(vcpu, tr_seg.selector,
&cseg_desc); &cseg_desc);
} }
...@@ -3507,7 +3512,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) ...@@ -3507,7 +3512,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
} }
if (reason != TASK_SWITCH_IRET) { if (reason != TASK_SWITCH_IRET) {
nseg_desc.type |= (1 << 8); nseg_desc.type |= (1 << 1);
save_guest_segment_descriptor(vcpu, tss_selector, save_guest_segment_descriptor(vcpu, tss_selector,
&nseg_desc); &nseg_desc);
} }
...@@ -3698,10 +3703,19 @@ void fx_init(struct kvm_vcpu *vcpu) ...@@ -3698,10 +3703,19 @@ void fx_init(struct kvm_vcpu *vcpu)
{ {
unsigned after_mxcsr_mask; unsigned after_mxcsr_mask;
/*
* Touch the fpu the first time in non atomic context as if
* this is the first fpu instruction the exception handler
* will fire before the instruction returns and it'll have to
* allocate ram with GFP_KERNEL.
*/
if (!used_math())
fx_save(&vcpu->arch.host_fx_image);
/* Initialize guest FPU by resetting ours and saving into guest's */ /* Initialize guest FPU by resetting ours and saving into guest's */
preempt_disable(); preempt_disable();
fx_save(&vcpu->arch.host_fx_image); fx_save(&vcpu->arch.host_fx_image);
fpu_init(); fx_finit();
fx_save(&vcpu->arch.guest_fx_image); fx_save(&vcpu->arch.guest_fx_image);
fx_restore(&vcpu->arch.host_fx_image); fx_restore(&vcpu->arch.host_fx_image);
preempt_enable(); preempt_enable();
...@@ -3906,6 +3920,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) ...@@ -3906,6 +3920,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_free_physmem(kvm); kvm_free_physmem(kvm);
if (kvm->arch.apic_access_page) if (kvm->arch.apic_access_page)
put_page(kvm->arch.apic_access_page); put_page(kvm->arch.apic_access_page);
if (kvm->arch.ept_identity_pagetable)
put_page(kvm->arch.ept_identity_pagetable);
kfree(kvm); kfree(kvm);
} }
......
...@@ -1761,6 +1761,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) ...@@ -1761,6 +1761,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
case 6: /* lmsw */ case 6: /* lmsw */
realmode_lmsw(ctxt->vcpu, (u16)c->src.val, realmode_lmsw(ctxt->vcpu, (u16)c->src.val,
&ctxt->eflags); &ctxt->eflags);
c->dst.type = OP_NONE;
break; break;
case 7: /* invlpg*/ case 7: /* invlpg*/
emulate_invlpg(ctxt->vcpu, memop); emulate_invlpg(ctxt->vcpu, memop);
......
...@@ -59,6 +59,7 @@ struct kvm_vcpu_stat { ...@@ -59,6 +59,7 @@ struct kvm_vcpu_stat {
u32 emulated_inst_exits; u32 emulated_inst_exits;
u32 dec_exits; u32 dec_exits;
u32 ext_intr_exits; u32 ext_intr_exits;
u32 halt_wakeup;
}; };
struct tlbe { struct tlbe {
......
...@@ -77,12 +77,17 @@ static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception) ...@@ -77,12 +77,17 @@ static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception)
clear_bit(priority, &vcpu->arch.pending_exceptions); clear_bit(priority, &vcpu->arch.pending_exceptions);
} }
/* Helper function for "full" MSR writes. No need to call this if only EE is
* changing. */
static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
{ {
if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
vcpu->arch.msr = new_msr; vcpu->arch.msr = new_msr;
if (vcpu->arch.msr & MSR_WE)
kvm_vcpu_block(vcpu);
} }
#endif /* __POWERPC_KVM_PPC_H__ */ #endif /* __POWERPC_KVM_PPC_H__ */
...@@ -314,6 +314,9 @@ struct kvm_arch{ ...@@ -314,6 +314,9 @@ struct kvm_arch{
struct page *apic_access_page; struct page *apic_access_page;
gpa_t wall_clock; gpa_t wall_clock;
struct page *ept_identity_pagetable;
bool ept_identity_pagetable_done;
}; };
struct kvm_vm_stat { struct kvm_vm_stat {
...@@ -422,6 +425,7 @@ struct kvm_x86_ops { ...@@ -422,6 +425,7 @@ struct kvm_x86_ops {
struct kvm_run *run); struct kvm_run *run);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
}; };
extern struct kvm_x86_ops *kvm_x86_ops; extern struct kvm_x86_ops *kvm_x86_ops;
...@@ -433,6 +437,9 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu); ...@@ -433,6 +437,9 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu);
int kvm_mmu_setup(struct kvm_vcpu *vcpu); int kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
void kvm_mmu_set_base_ptes(u64 base_pte);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask);
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
...@@ -620,7 +627,7 @@ static inline void fx_restore(struct i387_fxsave_struct *image) ...@@ -620,7 +627,7 @@ static inline void fx_restore(struct i387_fxsave_struct *image)
asm("fxrstor (%0)":: "r" (image)); asm("fxrstor (%0)":: "r" (image));
} }
static inline void fpu_init(void) static inline void fx_finit(void)
{ {
asm("finit"); asm("finit");
} }
...@@ -644,6 +651,7 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) ...@@ -644,6 +651,7 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" #define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" #define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" #define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
#define MSR_IA32_TIME_STAMP_COUNTER 0x010 #define MSR_IA32_TIME_STAMP_COUNTER 0x010
......
...@@ -522,6 +522,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) ...@@ -522,6 +522,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
return bad_hva(); return bad_hva();
return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
} }
EXPORT_SYMBOL_GPL(gfn_to_hva);
/* /*
* Requires current->mm->mmap_sem to be held * Requires current->mm->mmap_sem to be held
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment