Commit 96971e9a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "This is a pretty large update.  I think it is roughly as big as what I
  usually had for the _whole_ rc period.

  There are a few bad bugs where the guest can OOPS or crash the host.
  We have also started looking at attack models for nested
  virtualization; bugs that usually result in the guest ring 0 crashing
  itself become more worrisome if you have nested virtualization,
  because the nested guest might bring down the non-nested guest as
  well.  For current uses of nested virtualization these do not really
  have a security impact, but you never know and bugs are bugs
  nevertheless.

  A lot of these bugs are in 3.17 too, resulting in a large number of
  stable@ Ccs.  I checked that all the patches apply there with no
  conflicts"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  kvm: vfio: fix unregister kvm_device_ops of vfio
  KVM: x86: Wrong assertion on paging_tmpl.h
  kvm: fix excessive pages un-pinning in kvm_iommu_map error path.
  KVM: x86: PREFETCH and HINT_NOP should have SrcMem flag
  KVM: x86: Emulator does not decode clflush well
  KVM: emulate: avoid accessing NULL ctxt->memopp
  KVM: x86: Decoding guest instructions which cross page boundary may fail
  kvm: x86: don't kill guest on unknown exit reason
  kvm: vmx: handle invvpid vm exit gracefully
  KVM: x86: Handle errors when RIP is set during far jumps
  KVM: x86: Emulator fixes for eip canonical checks on near branches
  KVM: x86: Fix wrong masking on relative jump/call
  KVM: x86: Improve thread safety in pit
  KVM: x86: Prevent host from panicking on shared MSR writes.
  KVM: x86: Check non-canonical addresses upon WRMSR
parents 20ca57cd 571ee1b6
...@@ -989,6 +989,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) ...@@ -989,6 +989,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
} }
static inline u64 get_canonical(u64 la)
{
return ((int64_t)la << 16) >> 16;
}
static inline bool is_noncanonical_address(u64 la)
{
#ifdef CONFIG_X86_64
return get_canonical(la) != la;
#else
return false;
#endif
}
#define TSS_IOPB_BASE_OFFSET 0x66 #define TSS_IOPB_BASE_OFFSET 0x66
#define TSS_BASE_SIZE 0x68 #define TSS_BASE_SIZE 0x68
#define TSS_IOPB_SIZE (65536 / 8) #define TSS_IOPB_SIZE (65536 / 8)
...@@ -1050,7 +1064,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, ...@@ -1050,7 +1064,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
unsigned long address); unsigned long address);
void kvm_define_shared_msr(unsigned index, u32 msr); void kvm_define_shared_msr(unsigned index, u32 msr);
void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
......
...@@ -67,6 +67,7 @@ ...@@ -67,6 +67,7 @@
#define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_INVEPT 50 #define EXIT_REASON_INVEPT 50
#define EXIT_REASON_PREEMPTION_TIMER 52 #define EXIT_REASON_PREEMPTION_TIMER 52
#define EXIT_REASON_INVVPID 53
#define EXIT_REASON_WBINVD 54 #define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55 #define EXIT_REASON_XSETBV 55
#define EXIT_REASON_APIC_WRITE 56 #define EXIT_REASON_APIC_WRITE 56
...@@ -114,6 +115,7 @@ ...@@ -114,6 +115,7 @@
{ EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
{ EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
{ EXIT_REASON_INVD, "INVD" }, \ { EXIT_REASON_INVD, "INVD" }, \
{ EXIT_REASON_INVVPID, "INVVPID" }, \
{ EXIT_REASON_INVPCID, "INVPCID" } { EXIT_REASON_INVPCID, "INVPCID" }
#endif /* _UAPIVMX_H */ #endif /* _UAPIVMX_H */
...@@ -504,11 +504,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) ...@@ -504,11 +504,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
} }
static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
{
register_address_increment(ctxt, &ctxt->_eip, rel);
}
static u32 desc_limit_scaled(struct desc_struct *desc) static u32 desc_limit_scaled(struct desc_struct *desc)
{ {
u32 limit = get_desc_limit(desc); u32 limit = get_desc_limit(desc);
...@@ -569,6 +564,38 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) ...@@ -569,6 +564,38 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
return emulate_exception(ctxt, NM_VECTOR, 0, false); return emulate_exception(ctxt, NM_VECTOR, 0, false);
} }
static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
int cs_l)
{
switch (ctxt->op_bytes) {
case 2:
ctxt->_eip = (u16)dst;
break;
case 4:
ctxt->_eip = (u32)dst;
break;
case 8:
if ((cs_l && is_noncanonical_address(dst)) ||
(!cs_l && (dst & ~(u32)-1)))
return emulate_gp(ctxt, 0);
ctxt->_eip = dst;
break;
default:
WARN(1, "unsupported eip assignment size\n");
}
return X86EMUL_CONTINUE;
}
static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
{
return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
}
static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
{
return assign_eip_near(ctxt, ctxt->_eip + rel);
}
static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
{ {
u16 selector; u16 selector;
...@@ -751,8 +778,10 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) ...@@ -751,8 +778,10 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
unsigned size) unsigned size)
{ {
if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size)) unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
return __do_insn_fetch_bytes(ctxt, size);
if (unlikely(done_size < size))
return __do_insn_fetch_bytes(ctxt, size - done_size);
else else
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
} }
...@@ -1416,7 +1445,9 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, ...@@ -1416,7 +1445,9 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
/* Does not support long mode */ /* Does not support long mode */
static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
u16 selector, int seg, u8 cpl, bool in_task_switch) u16 selector, int seg, u8 cpl,
bool in_task_switch,
struct desc_struct *desc)
{ {
struct desc_struct seg_desc, old_desc; struct desc_struct seg_desc, old_desc;
u8 dpl, rpl; u8 dpl, rpl;
...@@ -1557,6 +1588,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, ...@@ -1557,6 +1588,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
} }
load: load:
ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
if (desc)
*desc = seg_desc;
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
exception: exception:
return emulate_exception(ctxt, err_vec, err_code, true); return emulate_exception(ctxt, err_vec, err_code, true);
...@@ -1566,7 +1599,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, ...@@ -1566,7 +1599,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
u16 selector, int seg) u16 selector, int seg)
{ {
u8 cpl = ctxt->ops->cpl(ctxt); u8 cpl = ctxt->ops->cpl(ctxt);
return __load_segment_descriptor(ctxt, selector, seg, cpl, false); return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL);
} }
static void write_register_operand(struct operand *op) static void write_register_operand(struct operand *op)
...@@ -1960,17 +1993,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt) ...@@ -1960,17 +1993,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
static int em_jmp_far(struct x86_emulate_ctxt *ctxt) static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
{ {
int rc; int rc;
unsigned short sel; unsigned short sel, old_sel;
struct desc_struct old_desc, new_desc;
const struct x86_emulate_ops *ops = ctxt->ops;
u8 cpl = ctxt->ops->cpl(ctxt);
/* Assignment of RIP may only fail in 64-bit mode */
if (ctxt->mode == X86EMUL_MODE_PROT64)
ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
VCPU_SREG_CS);
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
&new_desc);
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
return rc; return rc;
ctxt->_eip = 0; rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) {
return X86EMUL_CONTINUE; WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
/* assigning eip failed; restore the old cs */
ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
return rc;
}
return rc;
} }
static int em_grp45(struct x86_emulate_ctxt *ctxt) static int em_grp45(struct x86_emulate_ctxt *ctxt)
...@@ -1981,13 +2028,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) ...@@ -1981,13 +2028,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
case 2: /* call near abs */ { case 2: /* call near abs */ {
long int old_eip; long int old_eip;
old_eip = ctxt->_eip; old_eip = ctxt->_eip;
ctxt->_eip = ctxt->src.val; rc = assign_eip_near(ctxt, ctxt->src.val);
if (rc != X86EMUL_CONTINUE)
break;
ctxt->src.val = old_eip; ctxt->src.val = old_eip;
rc = em_push(ctxt); rc = em_push(ctxt);
break; break;
} }
case 4: /* jmp abs */ case 4: /* jmp abs */
ctxt->_eip = ctxt->src.val; rc = assign_eip_near(ctxt, ctxt->src.val);
break; break;
case 5: /* jmp far */ case 5: /* jmp far */
rc = em_jmp_far(ctxt); rc = em_jmp_far(ctxt);
...@@ -2022,30 +2071,47 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) ...@@ -2022,30 +2071,47 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
static int em_ret(struct x86_emulate_ctxt *ctxt) static int em_ret(struct x86_emulate_ctxt *ctxt)
{ {
ctxt->dst.type = OP_REG; int rc;
ctxt->dst.addr.reg = &ctxt->_eip; unsigned long eip;
ctxt->dst.bytes = ctxt->op_bytes;
return em_pop(ctxt); rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
return assign_eip_near(ctxt, eip);
} }
static int em_ret_far(struct x86_emulate_ctxt *ctxt) static int em_ret_far(struct x86_emulate_ctxt *ctxt)
{ {
int rc; int rc;
unsigned long cs; unsigned long eip, cs;
u16 old_cs;
int cpl = ctxt->ops->cpl(ctxt); int cpl = ctxt->ops->cpl(ctxt);
struct desc_struct old_desc, new_desc;
const struct x86_emulate_ops *ops = ctxt->ops;
rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); if (ctxt->mode == X86EMUL_MODE_PROT64)
ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
VCPU_SREG_CS);
rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
return rc; return rc;
if (ctxt->op_bytes == 4)
ctxt->_eip = (u32)ctxt->_eip;
rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
return rc; return rc;
/* Outer-privilege level return is not implemented */ /* Outer-privilege level return is not implemented */
if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
return X86EMUL_UNHANDLEABLE; return X86EMUL_UNHANDLEABLE;
rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
&new_desc);
if (rc != X86EMUL_CONTINUE)
return rc;
rc = assign_eip_far(ctxt, eip, new_desc.l);
if (rc != X86EMUL_CONTINUE) {
WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
}
return rc; return rc;
} }
...@@ -2306,7 +2372,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ...@@ -2306,7 +2372,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
{ {
const struct x86_emulate_ops *ops = ctxt->ops; const struct x86_emulate_ops *ops = ctxt->ops;
struct desc_struct cs, ss; struct desc_struct cs, ss;
u64 msr_data; u64 msr_data, rcx, rdx;
int usermode; int usermode;
u16 cs_sel = 0, ss_sel = 0; u16 cs_sel = 0, ss_sel = 0;
...@@ -2322,6 +2388,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ...@@ -2322,6 +2388,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
else else
usermode = X86EMUL_MODE_PROT32; usermode = X86EMUL_MODE_PROT32;
rcx = reg_read(ctxt, VCPU_REGS_RCX);
rdx = reg_read(ctxt, VCPU_REGS_RDX);
cs.dpl = 3; cs.dpl = 3;
ss.dpl = 3; ss.dpl = 3;
ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
...@@ -2339,6 +2408,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ...@@ -2339,6 +2408,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ss_sel = cs_sel + 8; ss_sel = cs_sel + 8;
cs.d = 0; cs.d = 0;
cs.l = 1; cs.l = 1;
if (is_noncanonical_address(rcx) ||
is_noncanonical_address(rdx))
return emulate_gp(ctxt, 0);
break; break;
} }
cs_sel |= SELECTOR_RPL_MASK; cs_sel |= SELECTOR_RPL_MASK;
...@@ -2347,8 +2419,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ...@@ -2347,8 +2419,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); ctxt->_eip = rdx;
*reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
} }
...@@ -2466,19 +2538,24 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, ...@@ -2466,19 +2538,24 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
* Now load segment descriptors. If fault happens at this stage * Now load segment descriptors. If fault happens at this stage
* it is handled in a context of new task * it is handled in a context of new task
*/ */
ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true); ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE) if (ret != X86EMUL_CONTINUE)
return ret; return ret;
ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE) if (ret != X86EMUL_CONTINUE)
return ret; return ret;
ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE) if (ret != X86EMUL_CONTINUE)
return ret; return ret;
ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE) if (ret != X86EMUL_CONTINUE)
return ret; return ret;
ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE) if (ret != X86EMUL_CONTINUE)
return ret; return ret;
...@@ -2603,25 +2680,32 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, ...@@ -2603,25 +2680,32 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
* Now load segment descriptors. If fault happenes at this stage * Now load segment descriptors. If fault happenes at this stage
* it is handled in a context of new task * it is handled in a context of new task
*/ */
ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true); ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
cpl, true, NULL);
if (ret != X86EMUL_CONTINUE) if (ret != X86EMUL_CONTINUE)
return ret; return ret;
ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE) if (ret != X86EMUL_CONTINUE)
return ret; return ret;
ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE) if (ret != X86EMUL_CONTINUE)
return ret; return ret;
ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE) if (ret != X86EMUL_CONTINUE)
return ret; return ret;
ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE) if (ret != X86EMUL_CONTINUE)
return ret; return ret;
ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true); ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE) if (ret != X86EMUL_CONTINUE)
return ret; return ret;
ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true); ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE) if (ret != X86EMUL_CONTINUE)
return ret; return ret;
...@@ -2888,10 +2972,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt) ...@@ -2888,10 +2972,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt)
static int em_call(struct x86_emulate_ctxt *ctxt) static int em_call(struct x86_emulate_ctxt *ctxt)
{ {
int rc;
long rel = ctxt->src.val; long rel = ctxt->src.val;
ctxt->src.val = (unsigned long)ctxt->_eip; ctxt->src.val = (unsigned long)ctxt->_eip;
jmp_rel(ctxt, rel); rc = jmp_rel(ctxt, rel);
if (rc != X86EMUL_CONTINUE)
return rc;
return em_push(ctxt); return em_push(ctxt);
} }
...@@ -2900,34 +2987,50 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) ...@@ -2900,34 +2987,50 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
u16 sel, old_cs; u16 sel, old_cs;
ulong old_eip; ulong old_eip;
int rc; int rc;
struct desc_struct old_desc, new_desc;
const struct x86_emulate_ops *ops = ctxt->ops;
int cpl = ctxt->ops->cpl(ctxt);
old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
old_eip = ctxt->_eip; old_eip = ctxt->_eip;
ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS)) rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
&new_desc);
if (rc != X86EMUL_CONTINUE)
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
ctxt->_eip = 0; rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE)
goto fail;
ctxt->src.val = old_cs; ctxt->src.val = old_cs;
rc = em_push(ctxt); rc = em_push(ctxt);
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
return rc; goto fail;
ctxt->src.val = old_eip; ctxt->src.val = old_eip;
return em_push(ctxt); rc = em_push(ctxt);
/* If we failed, we tainted the memory, but the very least we should
restore cs */
if (rc != X86EMUL_CONTINUE)
goto fail;
return rc;
fail:
ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
return rc;
} }
static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
{ {
int rc; int rc;
unsigned long eip;
ctxt->dst.type = OP_REG; rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
ctxt->dst.addr.reg = &ctxt->_eip; if (rc != X86EMUL_CONTINUE)
ctxt->dst.bytes = ctxt->op_bytes; return rc;
rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); rc = assign_eip_near(ctxt, eip);
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
return rc; return rc;
rsp_increment(ctxt, ctxt->src.val); rsp_increment(ctxt, ctxt->src.val);
...@@ -3254,20 +3357,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt) ...@@ -3254,20 +3357,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
static int em_loop(struct x86_emulate_ctxt *ctxt) static int em_loop(struct x86_emulate_ctxt *ctxt)
{ {
int rc = X86EMUL_CONTINUE;
register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
(ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
jmp_rel(ctxt, ctxt->src.val); rc = jmp_rel(ctxt, ctxt->src.val);
return X86EMUL_CONTINUE; return rc;
} }
static int em_jcxz(struct x86_emulate_ctxt *ctxt) static int em_jcxz(struct x86_emulate_ctxt *ctxt)
{ {
int rc = X86EMUL_CONTINUE;
if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
jmp_rel(ctxt, ctxt->src.val); rc = jmp_rel(ctxt, ctxt->src.val);
return X86EMUL_CONTINUE; return rc;
} }
static int em_in(struct x86_emulate_ctxt *ctxt) static int em_in(struct x86_emulate_ctxt *ctxt)
...@@ -3355,6 +3462,12 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt) ...@@ -3355,6 +3462,12 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
} }
static int em_clflush(struct x86_emulate_ctxt *ctxt)
{
/* emulating clflush regardless of cpuid */
return X86EMUL_CONTINUE;
}
static bool valid_cr(int nr) static bool valid_cr(int nr)
{ {
switch (nr) { switch (nr) {
...@@ -3693,6 +3806,16 @@ static const struct opcode group11[] = { ...@@ -3693,6 +3806,16 @@ static const struct opcode group11[] = {
X7(D(Undefined)), X7(D(Undefined)),
}; };
static const struct gprefix pfx_0f_ae_7 = {
I(SrcMem | ByteOp, em_clflush), N, N, N,
};
static const struct group_dual group15 = { {
N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7),
}, {
N, N, N, N, N, N, N, N,
} };
static const struct gprefix pfx_0f_6f_0f_7f = { static const struct gprefix pfx_0f_6f_0f_7f = {
I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
}; };
...@@ -3901,10 +4024,11 @@ static const struct opcode twobyte_table[256] = { ...@@ -3901,10 +4024,11 @@ static const struct opcode twobyte_table[256] = {
N, I(ImplicitOps | EmulateOnUD, em_syscall), N, I(ImplicitOps | EmulateOnUD, em_syscall),
II(ImplicitOps | Priv, em_clts, clts), N, II(ImplicitOps | Priv, em_clts, clts), N,
DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
N, D(ImplicitOps | ModRM), N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
/* 0x10 - 0x1F */ /* 0x10 - 0x1F */
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM), D(ImplicitOps | ModRM | SrcMem | NoAccess),
N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
/* 0x20 - 0x2F */ /* 0x20 - 0x2F */
DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
...@@ -3956,7 +4080,7 @@ static const struct opcode twobyte_table[256] = { ...@@ -3956,7 +4080,7 @@ static const struct opcode twobyte_table[256] = {
F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
D(ModRM), F(DstReg | SrcMem | ModRM, em_imul), GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
/* 0xB0 - 0xB7 */ /* 0xB0 - 0xB7 */
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
...@@ -4473,10 +4597,10 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ...@@ -4473,10 +4597,10 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
/* Decode and fetch the destination operand: register or memory. */ /* Decode and fetch the destination operand: register or memory. */
rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
done:
if (ctxt->rip_relative) if (ctxt->rip_relative)
ctxt->memopp->addr.mem.ea += ctxt->_eip; ctxt->memopp->addr.mem.ea += ctxt->_eip;
done:
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
} }
...@@ -4726,7 +4850,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) ...@@ -4726,7 +4850,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
break; break;
case 0x70 ... 0x7f: /* jcc (short) */ case 0x70 ... 0x7f: /* jcc (short) */
if (test_cc(ctxt->b, ctxt->eflags)) if (test_cc(ctxt->b, ctxt->eflags))
jmp_rel(ctxt, ctxt->src.val); rc = jmp_rel(ctxt, ctxt->src.val);
break; break;
case 0x8d: /* lea r16/r32, m */ case 0x8d: /* lea r16/r32, m */
ctxt->dst.val = ctxt->src.addr.mem.ea; ctxt->dst.val = ctxt->src.addr.mem.ea;
...@@ -4756,7 +4880,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) ...@@ -4756,7 +4880,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
break; break;
case 0xe9: /* jmp rel */ case 0xe9: /* jmp rel */
case 0xeb: /* jmp rel short */ case 0xeb: /* jmp rel short */
jmp_rel(ctxt, ctxt->src.val); rc = jmp_rel(ctxt, ctxt->src.val);
ctxt->dst.type = OP_NONE; /* Disable writeback. */ ctxt->dst.type = OP_NONE; /* Disable writeback. */
break; break;
case 0xf4: /* hlt */ case 0xf4: /* hlt */
...@@ -4881,13 +5005,11 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) ...@@ -4881,13 +5005,11 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
break; break;
case 0x80 ... 0x8f: /* jnz rel, etc*/ case 0x80 ... 0x8f: /* jnz rel, etc*/
if (test_cc(ctxt->b, ctxt->eflags)) if (test_cc(ctxt->b, ctxt->eflags))
jmp_rel(ctxt, ctxt->src.val); rc = jmp_rel(ctxt, ctxt->src.val);
break; break;
case 0x90 ... 0x9f: /* setcc r/m8 */ case 0x90 ... 0x9f: /* setcc r/m8 */
ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
break; break;
case 0xae: /* clflush */
break;
case 0xb6 ... 0xb7: /* movzx */ case 0xb6 ... 0xb7: /* movzx */
ctxt->dst.bytes = ctxt->op_bytes; ctxt->dst.bytes = ctxt->op_bytes;
ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
......
...@@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) ...@@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
return; return;
timer = &pit->pit_state.timer; timer = &pit->pit_state.timer;
mutex_lock(&pit->pit_state.lock);
if (hrtimer_cancel(timer)) if (hrtimer_cancel(timer))
hrtimer_start_expires(timer, HRTIMER_MODE_ABS); hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
mutex_unlock(&pit->pit_state.lock);
} }
static void destroy_pit_timer(struct kvm_pit *pit) static void destroy_pit_timer(struct kvm_pit *pit)
......
...@@ -298,7 +298,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -298,7 +298,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
} }
#endif #endif
walker->max_level = walker->level; walker->max_level = walker->level;
ASSERT(!is_long_mode(vcpu) && is_pae(vcpu)); ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
accessed_dirty = PT_GUEST_ACCESSED_MASK; accessed_dirty = PT_GUEST_ACCESSED_MASK;
pt_access = pte_access = ACC_ALL; pt_access = pte_access = ACC_ALL;
......
...@@ -3251,7 +3251,7 @@ static int wrmsr_interception(struct vcpu_svm *svm) ...@@ -3251,7 +3251,7 @@ static int wrmsr_interception(struct vcpu_svm *svm)
msr.host_initiated = false; msr.host_initiated = false;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
if (svm_set_msr(&svm->vcpu, &msr)) { if (kvm_set_msr(&svm->vcpu, &msr)) {
trace_kvm_msr_write_ex(ecx, data); trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(&svm->vcpu, 0); kvm_inject_gp(&svm->vcpu, 0);
} else { } else {
...@@ -3551,9 +3551,9 @@ static int handle_exit(struct kvm_vcpu *vcpu) ...@@ -3551,9 +3551,9 @@ static int handle_exit(struct kvm_vcpu *vcpu)
if (exit_code >= ARRAY_SIZE(svm_exit_handlers) if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|| !svm_exit_handlers[exit_code]) { || !svm_exit_handlers[exit_code]) {
kvm_run->exit_reason = KVM_EXIT_UNKNOWN; WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
kvm_run->hw.hardware_exit_reason = exit_code; kvm_queue_exception(vcpu, UD_VECTOR);
return 0; return 1;
} }
return svm_exit_handlers[exit_code](svm); return svm_exit_handlers[exit_code](svm);
......
...@@ -2659,12 +2659,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -2659,12 +2659,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
default: default:
msr = find_msr_entry(vmx, msr_index); msr = find_msr_entry(vmx, msr_index);
if (msr) { if (msr) {
u64 old_msr_data = msr->data;
msr->data = data; msr->data = data;
if (msr - vmx->guest_msrs < vmx->save_nmsrs) { if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
preempt_disable(); preempt_disable();
kvm_set_shared_msr(msr->index, msr->data, ret = kvm_set_shared_msr(msr->index, msr->data,
msr->mask); msr->mask);
preempt_enable(); preempt_enable();
if (ret)
msr->data = old_msr_data;
} }
break; break;
} }
...@@ -5291,7 +5294,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu) ...@@ -5291,7 +5294,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
msr.data = data; msr.data = data;
msr.index = ecx; msr.index = ecx;
msr.host_initiated = false; msr.host_initiated = false;
if (vmx_set_msr(vcpu, &msr) != 0) { if (kvm_set_msr(vcpu, &msr) != 0) {
trace_kvm_msr_write_ex(ecx, data); trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(vcpu, 0); kvm_inject_gp(vcpu, 0);
return 1; return 1;
...@@ -6743,6 +6746,12 @@ static int handle_invept(struct kvm_vcpu *vcpu) ...@@ -6743,6 +6746,12 @@ static int handle_invept(struct kvm_vcpu *vcpu)
return 1; return 1;
} }
static int handle_invvpid(struct kvm_vcpu *vcpu)
{
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
/* /*
* The exit handlers return 1 if the exit was handled fully and guest execution * The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs * may resume. Otherwise they set the kvm_run parameter to indicate what needs
...@@ -6788,6 +6797,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { ...@@ -6788,6 +6797,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
[EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
[EXIT_REASON_INVEPT] = handle_invept, [EXIT_REASON_INVEPT] = handle_invept,
[EXIT_REASON_INVVPID] = handle_invvpid,
}; };
static const int kvm_vmx_max_exit_handlers = static const int kvm_vmx_max_exit_handlers =
...@@ -7023,7 +7033,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) ...@@ -7023,7 +7033,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
case EXIT_REASON_INVEPT: case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
/* /*
* VMX instructions trap unconditionally. This allows L1 to * VMX instructions trap unconditionally. This allows L1 to
* emulate them for its L2 guest, i.e., allows 3-level nesting! * emulate them for its L2 guest, i.e., allows 3-level nesting!
...@@ -7164,10 +7174,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) ...@@ -7164,10 +7174,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
&& kvm_vmx_exit_handlers[exit_reason]) && kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu); return kvm_vmx_exit_handlers[exit_reason](vcpu);
else { else {
vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
vcpu->run->hw.hardware_exit_reason = exit_reason; kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
} }
return 0;
} }
static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
......
...@@ -229,20 +229,25 @@ static void kvm_shared_msr_cpu_online(void) ...@@ -229,20 +229,25 @@ static void kvm_shared_msr_cpu_online(void)
shared_msr_update(i, shared_msrs_global.msrs[i]); shared_msr_update(i, shared_msrs_global.msrs[i]);
} }
void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
{ {
unsigned int cpu = smp_processor_id(); unsigned int cpu = smp_processor_id();
struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
int err;
if (((value ^ smsr->values[slot].curr) & mask) == 0) if (((value ^ smsr->values[slot].curr) & mask) == 0)
return; return 0;
smsr->values[slot].curr = value; smsr->values[slot].curr = value;
wrmsrl(shared_msrs_global.msrs[slot], value); err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
if (err)
return 1;
if (!smsr->registered) { if (!smsr->registered) {
smsr->urn.on_user_return = kvm_on_user_return; smsr->urn.on_user_return = kvm_on_user_return;
user_return_notifier_register(&smsr->urn); user_return_notifier_register(&smsr->urn);
smsr->registered = true; smsr->registered = true;
} }
return 0;
} }
EXPORT_SYMBOL_GPL(kvm_set_shared_msr); EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
...@@ -987,7 +992,6 @@ void kvm_enable_efer_bits(u64 mask) ...@@ -987,7 +992,6 @@ void kvm_enable_efer_bits(u64 mask)
} }
EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
/* /*
* Writes msr value into into the appropriate "register". * Writes msr value into into the appropriate "register".
* Returns 0 on success, non-0 otherwise. * Returns 0 on success, non-0 otherwise.
...@@ -995,8 +999,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); ...@@ -995,8 +999,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
*/ */
int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
{ {
switch (msr->index) {
case MSR_FS_BASE:
case MSR_GS_BASE:
case MSR_KERNEL_GS_BASE:
case MSR_CSTAR:
case MSR_LSTAR:
if (is_noncanonical_address(msr->data))
return 1;
break;
case MSR_IA32_SYSENTER_EIP:
case MSR_IA32_SYSENTER_ESP:
/*
* IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
* non-canonical address is written on Intel but not on
* AMD (which ignores the top 32-bits, because it does
* not implement 64-bit SYSENTER).
*
* 64-bit code should hence be able to write a non-canonical
* value on AMD. Making the address canonical ensures that
* vmentry does not fail on Intel after writing a non-canonical
* value, and that something deterministic happens if the guest
* invokes 64-bit SYSENTER.
*/
msr->data = get_canonical(msr->data);
}
return kvm_x86_ops->set_msr(vcpu, msr); return kvm_x86_ops->set_msr(vcpu, msr);
} }
EXPORT_SYMBOL_GPL(kvm_set_msr);
/* /*
* Adapt set_msr() to msr_io()'s calling convention * Adapt set_msr() to msr_io()'s calling convention
......
...@@ -1080,6 +1080,7 @@ void kvm_device_get(struct kvm_device *dev); ...@@ -1080,6 +1080,7 @@ void kvm_device_get(struct kvm_device *dev);
void kvm_device_put(struct kvm_device *dev); void kvm_device_put(struct kvm_device *dev);
struct kvm_device *kvm_device_from_filp(struct file *filp); struct kvm_device *kvm_device_from_filp(struct file *filp);
int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
void kvm_unregister_device_ops(u32 type);
extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_mpic_ops;
extern struct kvm_device_ops kvm_xics_ops; extern struct kvm_device_ops kvm_xics_ops;
......
...@@ -43,13 +43,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm, ...@@ -43,13 +43,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
gfn_t base_gfn, unsigned long npages); gfn_t base_gfn, unsigned long npages);
static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
unsigned long size) unsigned long npages)
{ {
gfn_t end_gfn; gfn_t end_gfn;
pfn_t pfn; pfn_t pfn;
pfn = gfn_to_pfn_memslot(slot, gfn); pfn = gfn_to_pfn_memslot(slot, gfn);
end_gfn = gfn + (size >> PAGE_SHIFT); end_gfn = gfn + npages;
gfn += 1; gfn += 1;
if (is_error_noslot_pfn(pfn)) if (is_error_noslot_pfn(pfn))
...@@ -119,7 +119,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) ...@@ -119,7 +119,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
* Pin all pages we are about to map in memory. This is * Pin all pages we are about to map in memory. This is
* important because we unmap and unpin in 4kb steps later. * important because we unmap and unpin in 4kb steps later.
*/ */
pfn = kvm_pin_pages(slot, gfn, page_size); pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
if (is_error_noslot_pfn(pfn)) { if (is_error_noslot_pfn(pfn)) {
gfn += 1; gfn += 1;
continue; continue;
...@@ -131,7 +131,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) ...@@ -131,7 +131,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
if (r) { if (r) {
printk(KERN_ERR "kvm_iommu_map_address:" printk(KERN_ERR "kvm_iommu_map_address:"
"iommu failed to map pfn=%llx\n", pfn); "iommu failed to map pfn=%llx\n", pfn);
kvm_unpin_pages(kvm, pfn, page_size); kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
goto unmap_pages; goto unmap_pages;
} }
......
...@@ -2354,6 +2354,12 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type) ...@@ -2354,6 +2354,12 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
return 0; return 0;
} }
void kvm_unregister_device_ops(u32 type)
{
if (kvm_device_ops_table[type] != NULL)
kvm_device_ops_table[type] = NULL;
}
static int kvm_ioctl_create_device(struct kvm *kvm, static int kvm_ioctl_create_device(struct kvm *kvm,
struct kvm_create_device *cd) struct kvm_create_device *cd)
{ {
...@@ -3328,5 +3334,6 @@ void kvm_exit(void) ...@@ -3328,5 +3334,6 @@ void kvm_exit(void)
kvm_arch_exit(); kvm_arch_exit();
kvm_irqfd_exit(); kvm_irqfd_exit();
free_cpumask_var(cpus_hardware_enabled); free_cpumask_var(cpus_hardware_enabled);
kvm_vfio_ops_exit();
} }
EXPORT_SYMBOL_GPL(kvm_exit); EXPORT_SYMBOL_GPL(kvm_exit);
...@@ -283,3 +283,8 @@ int kvm_vfio_ops_init(void) ...@@ -283,3 +283,8 @@ int kvm_vfio_ops_init(void)
{ {
return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO); return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
} }
void kvm_vfio_ops_exit(void)
{
kvm_unregister_device_ops(KVM_DEV_TYPE_VFIO);
}
...@@ -3,11 +3,15 @@ ...@@ -3,11 +3,15 @@
#ifdef CONFIG_KVM_VFIO #ifdef CONFIG_KVM_VFIO
int kvm_vfio_ops_init(void); int kvm_vfio_ops_init(void);
void kvm_vfio_ops_exit(void);
#else #else
static inline int kvm_vfio_ops_init(void) static inline int kvm_vfio_ops_init(void)
{ {
return 0; return 0;
} }
static inline void kvm_vfio_ops_exit(void)
{
}
#endif #endif
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment