Commit d75cd22f authored by Jeremy Fitzhardinge's avatar Jeremy Fitzhardinge Committed by Ingo Molnar

x86/paravirt: split sysret and sysexit

Don't conflate sysret and sysexit; they're different instructions with
different semantics, and may be in use at the same time (at least
within the same kernel, depending on whether its an Intel or AMD
system).

sysexit - just return to userspace, does no register restoration of
    any kind; must explicitly atomically enable interrupts.

sysret - reloads flags from r11, so no need to explicitly enable
    interrupts on 64-bit, responsible for restoring usermode %gs
Signed-off-by: default avatarJeremy Fitzhardinge <jeremy.fitzhardinge@citirx.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent e04e0a63
...@@ -111,7 +111,7 @@ void foo(void) ...@@ -111,7 +111,7 @@ void foo(void)
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret); OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret); OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
#endif #endif
......
...@@ -62,7 +62,7 @@ int main(void) ...@@ -62,7 +62,7 @@ int main(void)
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret); OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret); OFFSET(PV_CPU_usersp_sysret, pv_cpu_ops, usersp_sysret);
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
#endif #endif
......
...@@ -58,7 +58,7 @@ ...@@ -58,7 +58,7 @@
* for paravirtualization. The following will never clobber any registers: * for paravirtualization. The following will never clobber any registers:
* INTERRUPT_RETURN (aka. "iret") * INTERRUPT_RETURN (aka. "iret")
* GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
* ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit"). * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
* *
* For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
* specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
...@@ -349,7 +349,7 @@ sysenter_past_esp: ...@@ -349,7 +349,7 @@ sysenter_past_esp:
xorl %ebp,%ebp xorl %ebp,%ebp
TRACE_IRQS_ON TRACE_IRQS_ON
1: mov PT_FS(%esp), %fs 1: mov PT_FS(%esp), %fs
ENABLE_INTERRUPTS_SYSCALL_RET ENABLE_INTERRUPTS_SYSEXIT
CFI_ENDPROC CFI_ENDPROC
.pushsection .fixup,"ax" .pushsection .fixup,"ax"
2: movl $0,PT_FS(%esp) 2: movl $0,PT_FS(%esp)
...@@ -874,10 +874,10 @@ ENTRY(native_iret) ...@@ -874,10 +874,10 @@ ENTRY(native_iret)
.previous .previous
END(native_iret) END(native_iret)
ENTRY(native_irq_enable_syscall_ret) ENTRY(native_irq_enable_sysexit)
sti sti
sysexit sysexit
END(native_irq_enable_syscall_ret) END(native_irq_enable_sysexit)
#endif #endif
KPROBE_ENTRY(int3) KPROBE_ENTRY(int3)
......
...@@ -59,7 +59,7 @@ ...@@ -59,7 +59,7 @@
#endif #endif
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
ENTRY(native_irq_enable_syscall_ret) ENTRY(native_usersp_sysret)
movq %gs:pda_oldrsp,%rsp movq %gs:pda_oldrsp,%rsp
swapgs swapgs
sysretq sysretq
...@@ -275,7 +275,7 @@ sysret_check: ...@@ -275,7 +275,7 @@ sysret_check:
CFI_REGISTER rip,rcx CFI_REGISTER rip,rcx
RESTORE_ARGS 0,-ARG_SKIP,1 RESTORE_ARGS 0,-ARG_SKIP,1
/*CFI_REGISTER rflags,r11*/ /*CFI_REGISTER rflags,r11*/
ENABLE_INTERRUPTS_SYSCALL_RET USERSP_SYSRET
CFI_RESTORE_STATE CFI_RESTORE_STATE
/* Handle reschedules */ /* Handle reschedules */
......
...@@ -140,7 +140,8 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, ...@@ -140,7 +140,8 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
/* If the operation is a nop, then nop the callsite */ /* If the operation is a nop, then nop the callsite */
ret = paravirt_patch_nop(); ret = paravirt_patch_nop();
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret)) type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usersp_sysret))
/* If operation requires a jmp, then jmp */ /* If operation requires a jmp, then jmp */
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
else else
...@@ -191,7 +192,8 @@ static void native_flush_tlb_single(unsigned long addr) ...@@ -191,7 +192,8 @@ static void native_flush_tlb_single(unsigned long addr)
/* These are in entry.S */ /* These are in entry.S */
extern void native_iret(void); extern void native_iret(void);
extern void native_irq_enable_syscall_ret(void); extern void native_irq_enable_sysexit(void);
extern void native_usersp_sysret(void);
static int __init print_banner(void) static int __init print_banner(void)
{ {
...@@ -327,7 +329,11 @@ struct pv_cpu_ops pv_cpu_ops = { ...@@ -327,7 +329,11 @@ struct pv_cpu_ops pv_cpu_ops = {
.write_idt_entry = native_write_idt_entry, .write_idt_entry = native_write_idt_entry,
.load_sp0 = native_load_sp0, .load_sp0 = native_load_sp0,
.irq_enable_syscall_ret = native_irq_enable_syscall_ret, #ifdef CONFIG_X86_32
.irq_enable_sysexit = native_irq_enable_sysexit,
#else
.usersp_sysret = native_usersp_sysret,
#endif
.iret = native_iret, .iret = native_iret,
.swapgs = native_swapgs, .swapgs = native_swapgs,
......
...@@ -5,7 +5,7 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); ...@@ -5,7 +5,7 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
DEF_NATIVE(pv_cpu_ops, iret, "iret"); DEF_NATIVE(pv_cpu_ops, iret, "iret");
DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit"); DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
...@@ -29,7 +29,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, ...@@ -29,7 +29,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, restore_fl); PATCH_SITE(pv_irq_ops, restore_fl);
PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_cpu_ops, iret); PATCH_SITE(pv_cpu_ops, iret);
PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret); PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_mmu_ops, read_cr2); PATCH_SITE(pv_mmu_ops, read_cr2);
PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, read_cr3);
PATCH_SITE(pv_mmu_ops, write_cr3); PATCH_SITE(pv_mmu_ops, write_cr3);
......
...@@ -15,7 +15,7 @@ DEF_NATIVE(pv_cpu_ops, clts, "clts"); ...@@ -15,7 +15,7 @@ DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
/* the three commands give us more control to how to return from a syscall */ /* the three commands give us more control to how to return from a syscall */
DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;"); DEF_NATIVE(pv_cpu_ops, usersp_sysret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;");
DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
...@@ -35,7 +35,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, ...@@ -35,7 +35,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_enable);
PATCH_SITE(pv_irq_ops, irq_disable); PATCH_SITE(pv_irq_ops, irq_disable);
PATCH_SITE(pv_cpu_ops, iret); PATCH_SITE(pv_cpu_ops, iret);
PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret); PATCH_SITE(pv_cpu_ops, usersp_sysret);
PATCH_SITE(pv_cpu_ops, swapgs); PATCH_SITE(pv_cpu_ops, swapgs);
PATCH_SITE(pv_mmu_ops, read_cr2); PATCH_SITE(pv_mmu_ops, read_cr2);
PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, read_cr3);
......
...@@ -151,7 +151,7 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, ...@@ -151,7 +151,7 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
insns, ip); insns, ip);
case PARAVIRT_PATCH(pv_cpu_ops.iret): case PARAVIRT_PATCH(pv_cpu_ops.iret):
return patch_internal(VMI_CALL_IRET, len, insns, ip); return patch_internal(VMI_CALL_IRET, len, insns, ip);
case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret): case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip); return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip);
default: default:
break; break;
...@@ -896,7 +896,7 @@ static inline int __init activate_vmi(void) ...@@ -896,7 +896,7 @@ static inline int __init activate_vmi(void)
* the backend. They are performance critical anyway, so requiring * the backend. They are performance critical anyway, so requiring
* a patch is not a big problem. * a patch is not a big problem.
*/ */
pv_cpu_ops.irq_enable_syscall_ret = (void *)0xfeedbab0; pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
pv_cpu_ops.iret = (void *)0xbadbab0; pv_cpu_ops.iret = (void *)0xbadbab0;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
......
...@@ -1089,7 +1089,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { ...@@ -1089,7 +1089,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
.read_pmc = native_read_pmc, .read_pmc = native_read_pmc,
.iret = xen_iret, .iret = xen_iret,
.irq_enable_syscall_ret = xen_sysexit, .irq_enable_sysexit = xen_sysexit,
.load_tr_desc = paravirt_nop, .load_tr_desc = paravirt_nop,
.set_ldt = xen_set_ldt, .set_ldt = xen_set_ldt,
......
...@@ -112,13 +112,13 @@ static inline unsigned long __raw_local_irq_save(void) ...@@ -112,13 +112,13 @@ static inline unsigned long __raw_local_irq_save(void)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#define INTERRUPT_RETURN iretq #define INTERRUPT_RETURN iretq
#define ENABLE_INTERRUPTS_SYSCALL_RET \ #define USERSP_SYSRET \
movq %gs:pda_oldrsp, %rsp; \ movq %gs:pda_oldrsp, %rsp; \
swapgs; \ swapgs; \
sysretq; sysretq;
#else #else
#define INTERRUPT_RETURN iret #define INTERRUPT_RETURN iret
#define ENABLE_INTERRUPTS_SYSCALL_RET sti; sysexit #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
#define GET_CR0_INTO_EAX movl %cr0, %eax #define GET_CR0_INTO_EAX movl %cr0, %eax
#endif #endif
......
...@@ -141,8 +141,9 @@ struct pv_cpu_ops { ...@@ -141,8 +141,9 @@ struct pv_cpu_ops {
u64 (*read_pmc)(int counter); u64 (*read_pmc)(int counter);
unsigned long long (*read_tscp)(unsigned int *aux); unsigned long long (*read_tscp)(unsigned int *aux);
/* These two are jmp to, not actually called. */ /* These three are jmp to, not actually called. */
void (*irq_enable_syscall_ret)(void); void (*irq_enable_sysexit)(void);
void (*usersp_sysret)(void);
void (*iret)(void); void (*iret)(void);
void (*swapgs)(void); void (*swapgs)(void);
...@@ -1480,10 +1481,10 @@ static inline unsigned long __raw_local_irq_save(void) ...@@ -1480,10 +1481,10 @@ static inline unsigned long __raw_local_irq_save(void)
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
PV_RESTORE_REGS;) PV_RESTORE_REGS;)
#define ENABLE_INTERRUPTS_SYSCALL_RET \ #define ENABLE_INTERRUPTS_SYSEXIT \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
CLBR_NONE, \ CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_syscall_ret)) jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
...@@ -1504,6 +1505,10 @@ static inline unsigned long __raw_local_irq_save(void) ...@@ -1504,6 +1505,10 @@ static inline unsigned long __raw_local_irq_save(void)
movq %rax, %rcx; \ movq %rax, %rcx; \
xorq %rax, %rax; xorq %rax, %rax;
#define USERSP_SYSRET \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usersp_sysret), \
CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usersp_sysret))
#endif #endif
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment