Commit ceffb459 authored by Gleb Natapov's avatar Gleb Natapov Committed by Avi Kivity

KVM: Use task switch from emulator.c

Remove old task switch code from x86.c
Signed-off-by: default avatarGleb Natapov <gleb@redhat.com>
Signed-off-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
parent 2e873022
......@@ -2291,6 +2291,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);
ulong old_tss_base =
get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR);
u32 desc_limit;
/* FIXME: old_tss_base == ~0 ? */
......@@ -2311,7 +2312,10 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
}
}
if (!next_tss_desc.p || desc_limit_scaled(&next_tss_desc) < 0x67) {
desc_limit = desc_limit_scaled(&next_tss_desc);
if (!next_tss_desc.p ||
((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
desc_limit < 0x2b)) {
kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR,
tss_selector & 0xfffc);
return X86EMUL_PROPAGATE_FAULT;
......
......@@ -4832,557 +4832,30 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
return 0;
}
static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
struct kvm_segment *kvm_desct)
{
kvm_desct->base = get_desc_base(seg_desc);
kvm_desct->limit = get_desc_limit(seg_desc);
if (seg_desc->g) {
kvm_desct->limit <<= 12;
kvm_desct->limit |= 0xfff;
}
kvm_desct->selector = selector;
kvm_desct->type = seg_desc->type;
kvm_desct->present = seg_desc->p;
kvm_desct->dpl = seg_desc->dpl;
kvm_desct->db = seg_desc->d;
kvm_desct->s = seg_desc->s;
kvm_desct->l = seg_desc->l;
kvm_desct->g = seg_desc->g;
kvm_desct->avl = seg_desc->avl;
if (!selector)
kvm_desct->unusable = 1;
else
kvm_desct->unusable = 0;
kvm_desct->padding = 0;
}
static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
u16 selector,
struct desc_ptr *dtable)
{
if (selector & 1 << 2) {
struct kvm_segment kvm_seg;
kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
if (kvm_seg.unusable)
dtable->size = 0;
else
dtable->size = kvm_seg.limit;
dtable->address = kvm_seg.base;
}
else
kvm_x86_ops->get_gdt(vcpu, dtable);
}
/* allowed just for 8 bytes segments */
static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
struct desc_struct *seg_desc)
{
struct desc_ptr dtable;
u16 index = selector >> 3;
int ret;
u32 err;
gva_t addr;
get_segment_descriptor_dtable(vcpu, selector, &dtable);
if (dtable.size < index * 8 + 7) {
kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
return X86EMUL_PROPAGATE_FAULT;
}
addr = dtable.base + index * 8;
ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc),
vcpu, &err);
if (ret == X86EMUL_PROPAGATE_FAULT)
kvm_inject_page_fault(vcpu, addr, err);
return ret;
}
/* allowed just for 8 bytes segments */
static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
struct desc_struct *seg_desc)
{
struct desc_ptr dtable;
u16 index = selector >> 3;
get_segment_descriptor_dtable(vcpu, selector, &dtable);
if (dtable.size < index * 8 + 7)
return 1;
return kvm_write_guest_virt(dtable.address + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL);
}
static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu,
struct desc_struct *seg_desc)
{
u32 base_addr = get_desc_base(seg_desc);
return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL);
}
static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu,
struct desc_struct *seg_desc)
{
u32 base_addr = get_desc_base(seg_desc);
return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL);
}
static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
{
struct kvm_segment kvm_seg;
kvm_get_segment(vcpu, &kvm_seg, seg);
return kvm_seg.selector;
}
static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
{
struct kvm_segment segvar = {
.base = selector << 4,
.limit = 0xffff,
.selector = selector,
.type = 3,
.present = 1,
.dpl = 3,
.db = 0,
.s = 1,
.l = 0,
.g = 0,
.avl = 0,
.unusable = 0,
};
kvm_x86_ops->set_segment(vcpu, &segvar, seg);
return X86EMUL_CONTINUE;
}
static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
{
return (seg != VCPU_SREG_LDTR) &&
(seg != VCPU_SREG_TR) &&
(kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
}
int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg)
{
struct kvm_segment kvm_seg;
struct desc_struct seg_desc;
u8 dpl, rpl, cpl;
unsigned err_vec = GP_VECTOR;
u32 err_code = 0;
bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
int ret;
if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu))
return kvm_load_realmode_segment(vcpu, selector, seg);
/* NULL selector is not valid for TR, CS and SS */
if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
&& null_selector)
goto exception;
/* TR should be in GDT only */
if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
goto exception;
ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc);
if (ret)
return ret;
seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg);
if (null_selector) { /* for NULL selector skip all following checks */
kvm_seg.unusable = 1;
goto load;
}
err_code = selector & 0xfffc;
err_vec = GP_VECTOR;
/* can't load system descriptor into segment selecor */
if (seg <= VCPU_SREG_GS && !kvm_seg.s)
goto exception;
if (!kvm_seg.present) {
err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
goto exception;
}
rpl = selector & 3;
dpl = kvm_seg.dpl;
cpl = kvm_x86_ops->get_cpl(vcpu);
switch (seg) {
case VCPU_SREG_SS:
/*
* segment is not a writable data segment or segment
* selector's RPL != CPL or segment selector's RPL != CPL
*/
if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl)
goto exception;
break;
case VCPU_SREG_CS:
if (!(kvm_seg.type & 8))
goto exception;
if (kvm_seg.type & 4) {
/* conforming */
if (dpl > cpl)
goto exception;
} else {
/* nonconforming */
if (rpl > cpl || dpl != cpl)
goto exception;
}
/* CS(RPL) <- CPL */
selector = (selector & 0xfffc) | cpl;
break;
case VCPU_SREG_TR:
if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9))
goto exception;
break;
case VCPU_SREG_LDTR:
if (kvm_seg.s || kvm_seg.type != 2)
goto exception;
break;
default: /* DS, ES, FS, or GS */
/*
* segment is not a data or readable code segment or
* ((segment is a data or nonconforming code segment)
* and (both RPL and CPL > DPL))
*/
if ((kvm_seg.type & 0xa) == 0x8 ||
(((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl)))
goto exception;
break;
}
if (!kvm_seg.unusable && kvm_seg.s) {
/* mark segment as accessed */
kvm_seg.type |= 1;
seg_desc.type |= 1;
save_guest_segment_descriptor(vcpu, selector, &seg_desc);
}
load:
kvm_set_segment(vcpu, &kvm_seg, seg);
return X86EMUL_CONTINUE;
exception:
kvm_queue_exception_e(vcpu, err_vec, err_code);
return X86EMUL_PROPAGATE_FAULT;
}
static void save_state_to_tss32(struct kvm_vcpu *vcpu,
struct tss_segment_32 *tss)
{
tss->cr3 = vcpu->arch.cr3;
tss->eip = kvm_rip_read(vcpu);
tss->eflags = kvm_get_rflags(vcpu);
tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP);
tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP);
tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI);
tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI);
tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
}
static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg)
{
struct kvm_segment kvm_seg;
kvm_get_segment(vcpu, &kvm_seg, seg);
kvm_seg.selector = sel;
kvm_set_segment(vcpu, &kvm_seg, seg);
}
static int load_state_from_tss32(struct kvm_vcpu *vcpu,
struct tss_segment_32 *tss)
{
kvm_set_cr3(vcpu, tss->cr3);
kvm_rip_write(vcpu, tss->eip);
kvm_set_rflags(vcpu, tss->eflags | 2);
kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx);
kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx);
kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp);
kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp);
kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
/*
* SDM says that segment selectors are loaded before segment
* descriptors
*/
kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR);
kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS);
kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS);
/*
* Now load segment descriptors. If fault happenes at this stage
* it is handled in a context of new task
*/
if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR))
return 1;
if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
return 1;
if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
return 1;
if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
return 1;
if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
return 1;
if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS))
return 1;
if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS))
return 1;
return 0;
}
static void save_state_to_tss16(struct kvm_vcpu *vcpu,
struct tss_segment_16 *tss)
{
tss->ip = kvm_rip_read(vcpu);
tss->flag = kvm_get_rflags(vcpu);
tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX);
tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP);
tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP);
tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI);
tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI);
tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
}
static int load_state_from_tss16(struct kvm_vcpu *vcpu,
struct tss_segment_16 *tss)
{
kvm_rip_write(vcpu, tss->ip);
kvm_set_rflags(vcpu, tss->flag | 2);
kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx);
kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp);
kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp);
kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
/*
* SDM says that segment selectors are loaded before segment
* descriptors
*/
kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR);
kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
/*
* Now load segment descriptors. If fault happenes at this stage
* it is handled in a context of new task
*/
if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR))
return 1;
if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
return 1;
if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
return 1;
if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
return 1;
if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
return 1;
return 0;
}
static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
u16 old_tss_sel, u32 old_tss_base,
struct desc_struct *nseg_desc)
{
struct tss_segment_16 tss_segment_16;
int ret = 0;
if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
sizeof tss_segment_16))
goto out;
save_state_to_tss16(vcpu, &tss_segment_16);
if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
sizeof tss_segment_16))
goto out;
if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
&tss_segment_16, sizeof tss_segment_16))
goto out;
if (old_tss_sel != 0xffff) {
tss_segment_16.prev_task_link = old_tss_sel;
if (kvm_write_guest(vcpu->kvm,
get_tss_base_addr_write(vcpu, nseg_desc),
&tss_segment_16.prev_task_link,
sizeof tss_segment_16.prev_task_link))
goto out;
}
if (load_state_from_tss16(vcpu, &tss_segment_16))
goto out;
ret = 1;
out:
return ret;
}
static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
u16 old_tss_sel, u32 old_tss_base,
struct desc_struct *nseg_desc)
{
struct tss_segment_32 tss_segment_32;
int ret = 0;
if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
sizeof tss_segment_32))
goto out;
save_state_to_tss32(vcpu, &tss_segment_32);
if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
sizeof tss_segment_32))
goto out;
if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
&tss_segment_32, sizeof tss_segment_32))
goto out;
if (old_tss_sel != 0xffff) {
tss_segment_32.prev_task_link = old_tss_sel;
if (kvm_write_guest(vcpu->kvm,
get_tss_base_addr_write(vcpu, nseg_desc),
&tss_segment_32.prev_task_link,
sizeof tss_segment_32.prev_task_link))
goto out;
}
if (load_state_from_tss32(vcpu, &tss_segment_32))
goto out;
ret = 1;
out:
return ret;
}
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
{
struct kvm_segment tr_seg;
struct desc_struct cseg_desc;
struct desc_struct nseg_desc;
int ret = 0;
u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
u32 desc_limit;
old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
/* FIXME: Handle errors. Failure to read either TSS or their
* descriptors should generate a pagefault.
*/
if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
goto out;
if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc))
goto out;
if (reason != TASK_SWITCH_IRET) {
int cpl;
cpl = kvm_x86_ops->get_cpl(vcpu);
if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
return 1;
}
}
desc_limit = get_desc_limit(&nseg_desc);
if (!nseg_desc.p ||
((desc_limit < 0x67 && (nseg_desc.type & 8)) ||
desc_limit < 0x2b)) {
kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
return 1;
}
if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
cseg_desc.type &= ~(1 << 1); //clear the B flag
save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc);
}
if (reason == TASK_SWITCH_IRET) {
u32 eflags = kvm_get_rflags(vcpu);
kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
}
int cs_db, cs_l, ret;
cache_all_regs(vcpu);
/* set back link to prev task only if NT bit is set in eflags
note that old_tss_sel is not used afetr this point */
if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
old_tss_sel = 0xffff;
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
if (nseg_desc.type & 8)
ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
old_tss_base, &nseg_desc);
else
ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel,
old_tss_base, &nseg_desc);
vcpu->arch.emulate_ctxt.vcpu = vcpu;
vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
vcpu->arch.emulate_ctxt.mode =
(!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
(vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
? X86EMUL_MODE_VM86 : cs_l
? X86EMUL_MODE_PROT64 : cs_db
? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
u32 eflags = kvm_get_rflags(vcpu);
kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT);
}
ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops,
tss_selector, reason);
if (reason != TASK_SWITCH_IRET) {
nseg_desc.type |= (1 << 1);
save_guest_segment_descriptor(vcpu, tss_selector,
&nseg_desc);
}
if (ret == X86EMUL_CONTINUE)
kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS);
seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
tr_seg.type = 11;
kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
out:
return ret;
return (ret != X86EMUL_CONTINUE);
}
EXPORT_SYMBOL_GPL(kvm_task_switch);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment