Commit 76f5df43 authored by Denys Vlasenko's avatar Denys Vlasenko Committed by Ingo Molnar

x86/asm/entry/64: Always allocate a complete "struct pt_regs" on the kernel stack

The 64-bit entry code was using six stack slots less by not
saving/restoring registers which are callee-preserved according
to the C ABI, and was not allocating space for them.

Only when syscalls needed a complete "struct pt_regs" was
the complete area allocated and filled in.

As an additional twist, on interrupt entry a "slightly less
truncated pt_regs" trick is used, to make nested interrupt
stacks easier to unwind.

This proved to be a source of significant obfuscation and subtle
bugs. For example, 'stub_fork' had to pop the return address,
extend the struct, save registers, and push return address back.
Ugly. 'ia32_ptregs_common' pops return address and "returns" via
jmp insn, throwing a wrench into CPU return stack cache.

This patch changes the code to always allocate a complete
"struct pt_regs" on the kernel stack. The saving of registers
is still done lazily.

"Partial pt_regs" trick on interrupt stack is retained.

Macros which manipulate "struct pt_regs" on stack are reworked:

 - ALLOC_PT_GPREGS_ON_STACK allocates the structure.

 - SAVE_C_REGS saves to it those registers which are clobbered
   by C code.

 - SAVE_EXTRA_REGS saves to it all other registers.

 - Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros
   reverse it.

'ia32_ptregs_common', 'stub_fork' and friends lost their ugly dance
with the return pointer.

LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
instead of magic numbers.

'error_entry' and 'save_paranoid' now use SAVE_C_REGS +
SAVE_EXTRA_REGS instead of having it open-coded yet again.

Patch was run-tested: 64-bit executables, 32-bit executables,
strace works.

Timing tests did not show measurable difference in 32-bit
and 64-bit syscalls.
Signed-off-by: default avatarDenys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: default avatarAndy Lutomirski <luto@amacapital.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1423778052-21038-2-git-send-email-dvlasenk@redhat.com
Link: http://lkml.kernel.org/r/b89763d354aa23e670b9bdf3a40ae320320a7c2e.1424989793.git.luto@amacapital.netSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 6e1327bd
...@@ -62,12 +62,12 @@ ...@@ -62,12 +62,12 @@
*/ */
.macro LOAD_ARGS32 offset, _r9=0 .macro LOAD_ARGS32 offset, _r9=0
.if \_r9 .if \_r9
movl \offset+16(%rsp),%r9d movl \offset+R9(%rsp),%r9d
.endif .endif
movl \offset+40(%rsp),%ecx movl \offset+RCX(%rsp),%ecx
movl \offset+48(%rsp),%edx movl \offset+RDX(%rsp),%edx
movl \offset+56(%rsp),%esi movl \offset+RSI(%rsp),%esi
movl \offset+64(%rsp),%edi movl \offset+RDI(%rsp),%edi
movl %eax,%eax /* zero extension */ movl %eax,%eax /* zero extension */
.endm .endm
...@@ -144,7 +144,8 @@ ENTRY(ia32_sysenter_target) ...@@ -144,7 +144,8 @@ ENTRY(ia32_sysenter_target)
CFI_REL_OFFSET rip,0 CFI_REL_OFFSET rip,0
pushq_cfi %rax pushq_cfi %rax
cld cld
SAVE_ARGS 0,1,0 ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS_EXCEPT_R891011
/* no need to do an access_ok check here because rbp has been /* no need to do an access_ok check here because rbp has been
32bit zero extended */ 32bit zero extended */
ASM_STAC ASM_STAC
...@@ -182,7 +183,8 @@ sysexit_from_sys_call: ...@@ -182,7 +183,8 @@ sysexit_from_sys_call:
andl $~0x200,EFLAGS-ARGOFFSET(%rsp) andl $~0x200,EFLAGS-ARGOFFSET(%rsp)
movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */ movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */
CFI_REGISTER rip,rdx CFI_REGISTER rip,rdx
RESTORE_ARGS 0,24,0,0,0,0 RESTORE_RSI_RDI
REMOVE_PT_GPREGS_FROM_STACK 3*8
xorq %r8,%r8 xorq %r8,%r8
xorq %r9,%r9 xorq %r9,%r9
xorq %r10,%r10 xorq %r10,%r10
...@@ -256,13 +258,13 @@ sysenter_tracesys: ...@@ -256,13 +258,13 @@ sysenter_tracesys:
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jz sysenter_auditsys jz sysenter_auditsys
#endif #endif
SAVE_REST SAVE_EXTRA_REGS
CLEAR_RREGS CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */ movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST RESTORE_EXTRA_REGS
cmpq $(IA32_NR_syscalls-1),%rax cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
jmp sysenter_do_call jmp sysenter_do_call
...@@ -304,7 +306,8 @@ ENTRY(ia32_cstar_target) ...@@ -304,7 +306,8 @@ ENTRY(ia32_cstar_target)
* disabled irqs and here we enable it straight after entry: * disabled irqs and here we enable it straight after entry:
*/ */
ENABLE_INTERRUPTS(CLBR_NONE) ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_ARGS 8,0,0 ALLOC_PT_GPREGS_ON_STACK 8
SAVE_C_REGS_EXCEPT_RCX_R891011
movl %eax,%eax /* zero extension */ movl %eax,%eax /* zero extension */
movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp)
...@@ -341,7 +344,7 @@ cstar_dispatch: ...@@ -341,7 +344,7 @@ cstar_dispatch:
jnz sysretl_audit jnz sysretl_audit
sysretl_from_sys_call: sysretl_from_sys_call:
andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
RESTORE_ARGS 0,-ARG_SKIP,0,0,0 RESTORE_RSI_RDI_RDX
movl RIP-ARGOFFSET(%rsp),%ecx movl RIP-ARGOFFSET(%rsp),%ecx
CFI_REGISTER rip,rcx CFI_REGISTER rip,rcx
movl EFLAGS-ARGOFFSET(%rsp),%r11d movl EFLAGS-ARGOFFSET(%rsp),%r11d
...@@ -372,13 +375,13 @@ cstar_tracesys: ...@@ -372,13 +375,13 @@ cstar_tracesys:
jz cstar_auditsys jz cstar_auditsys
#endif #endif
xchgl %r9d,%ebp xchgl %r9d,%ebp
SAVE_REST SAVE_EXTRA_REGS
CLEAR_RREGS 0, r9 CLEAR_RREGS 0, r9
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */ movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
RESTORE_REST RESTORE_EXTRA_REGS
xchgl %ebp,%r9d xchgl %ebp,%r9d
cmpq $(IA32_NR_syscalls-1),%rax cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
...@@ -433,7 +436,8 @@ ENTRY(ia32_syscall) ...@@ -433,7 +436,8 @@ ENTRY(ia32_syscall)
cld cld
/* note the registers are not zero extended to the sf. /* note the registers are not zero extended to the sf.
this could be a problem. */ this could be a problem. */
SAVE_ARGS 0,1,0 ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS_EXCEPT_R891011
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz ia32_tracesys jnz ia32_tracesys
...@@ -446,16 +450,16 @@ ia32_sysret: ...@@ -446,16 +450,16 @@ ia32_sysret:
movq %rax,RAX-ARGOFFSET(%rsp) movq %rax,RAX-ARGOFFSET(%rsp)
ia32_ret_from_sys_call: ia32_ret_from_sys_call:
CLEAR_RREGS -ARGOFFSET CLEAR_RREGS -ARGOFFSET
jmp int_ret_from_sys_call jmp int_ret_from_sys_call
ia32_tracesys: ia32_tracesys:
SAVE_REST SAVE_EXTRA_REGS
CLEAR_RREGS CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */ movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST RESTORE_EXTRA_REGS
cmpq $(IA32_NR_syscalls-1),%rax cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
jmp ia32_do_call jmp ia32_do_call
...@@ -492,7 +496,6 @@ GLOBAL(stub32_clone) ...@@ -492,7 +496,6 @@ GLOBAL(stub32_clone)
ALIGN ALIGN
ia32_ptregs_common: ia32_ptregs_common:
popq %r11
CFI_ENDPROC CFI_ENDPROC
CFI_STARTPROC32 simple CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME CFI_SIGNAL_FRAME
...@@ -507,9 +510,9 @@ ia32_ptregs_common: ...@@ -507,9 +510,9 @@ ia32_ptregs_common:
/* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ /* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
CFI_REL_OFFSET rsp,RSP-ARGOFFSET CFI_REL_OFFSET rsp,RSP-ARGOFFSET
/* CFI_REL_OFFSET ss,SS-ARGOFFSET*/ /* CFI_REL_OFFSET ss,SS-ARGOFFSET*/
SAVE_REST SAVE_EXTRA_REGS 8
call *%rax call *%rax
RESTORE_REST RESTORE_EXTRA_REGS 8
jmp ia32_sysret /* misbalances the return cache */ ret
CFI_ENDPROC CFI_ENDPROC
END(ia32_ptregs_common) END(ia32_ptregs_common)
...@@ -55,143 +55,137 @@ For 32-bit we have the following conventions - kernel is built with ...@@ -55,143 +55,137 @@ For 32-bit we have the following conventions - kernel is built with
* for assembly code: * for assembly code:
*/ */
#define R15 0 /* The layout forms the "struct pt_regs" on the stack: */
#define R14 8 /*
#define R13 16 * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
#define R12 24 * unless syscall needs a complete, fully filled "struct pt_regs".
#define RBP 32 */
#define RBX 40 #define R15 0*8
#define R14 1*8
/* arguments: interrupts/non tracing syscalls only save up to here: */ #define R13 2*8
#define R11 48 #define R12 3*8
#define R10 56 #define RBP 4*8
#define R9 64 #define RBX 5*8
#define R8 72 /* These regs are callee-clobbered. Always saved on kernel entry. */
#define RAX 80 #define R11 6*8
#define RCX 88 #define R10 7*8
#define RDX 96 #define R9 8*8
#define RSI 104 #define R8 9*8
#define RDI 112 #define RAX 10*8
#define ORIG_RAX 120 /* + error_code */ #define RCX 11*8
/* end of arguments */ #define RDX 12*8
#define RSI 13*8
/* cpu exception frame or undefined in case of fast syscall: */ #define RDI 14*8
#define RIP 128 /*
#define CS 136 * On syscall entry, this is syscall#. On CPU exception, this is error code.
#define EFLAGS 144 * On hw interrupt, it's IRQ number:
#define RSP 152 */
#define SS 160 #define ORIG_RAX 15*8
/* Return frame for iretq */
#define ARGOFFSET R11 #define RIP 16*8
#define CS 17*8
.macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 #define EFLAGS 18*8
subq $9*8+\addskip, %rsp #define RSP 19*8
CFI_ADJUST_CFA_OFFSET 9*8+\addskip #define SS 20*8
movq_cfi rdi, 8*8
movq_cfi rsi, 7*8 #define ARGOFFSET 0
movq_cfi rdx, 6*8
.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
.if \save_rcx subq $15*8+\addskip, %rsp
movq_cfi rcx, 5*8 CFI_ADJUST_CFA_OFFSET 15*8+\addskip
.endif .endm
.if \rax_enosys .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8plus=1
movq $-ENOSYS, 4*8(%rsp) .if \r8plus
.else movq_cfi r11, 6*8+\offset
movq_cfi rax, 4*8 movq_cfi r10, 7*8+\offset
movq_cfi r9, 8*8+\offset
movq_cfi r8, 9*8+\offset
.endif .endif
.if \rax
.if \save_r891011 movq_cfi rax, 10*8+\offset
movq_cfi r8, 3*8 .endif
movq_cfi r9, 2*8 .if \rcx
movq_cfi r10, 1*8 movq_cfi rcx, 11*8+\offset
movq_cfi r11, 0*8
.endif .endif
movq_cfi rdx, 12*8+\offset
movq_cfi rsi, 13*8+\offset
movq_cfi rdi, 14*8+\offset
.endm
.macro SAVE_C_REGS offset=0
SAVE_C_REGS_HELPER \offset, 1, 1, 1
.endm
.macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
SAVE_C_REGS_HELPER \offset, 0, 0, 1
.endm
.macro SAVE_C_REGS_EXCEPT_R891011
SAVE_C_REGS_HELPER 0, 1, 1, 0
.endm
.macro SAVE_C_REGS_EXCEPT_RCX_R891011
SAVE_C_REGS_HELPER 0, 1, 0, 0
.endm
.macro SAVE_EXTRA_REGS offset=0
movq_cfi r15, 0*8+\offset
movq_cfi r14, 1*8+\offset
movq_cfi r13, 2*8+\offset
movq_cfi r12, 3*8+\offset
movq_cfi rbp, 4*8+\offset
movq_cfi rbx, 5*8+\offset
.endm
.macro SAVE_EXTRA_REGS_RBP offset=0
movq_cfi rbp, 4*8+\offset
.endm .endm
#define ARG_SKIP (9*8) .macro RESTORE_EXTRA_REGS offset=0
movq_cfi_restore 0*8+\offset, r15
movq_cfi_restore 1*8+\offset, r14
movq_cfi_restore 2*8+\offset, r13
movq_cfi_restore 3*8+\offset, r12
movq_cfi_restore 4*8+\offset, rbp
movq_cfi_restore 5*8+\offset, rbx
.endm
.macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \ .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
rstor_r8910=1, rstor_rdx=1
.if \rstor_r11 .if \rstor_r11
movq_cfi_restore 0*8, r11 movq_cfi_restore 6*8, r11
.endif .endif
.if \rstor_r8910 .if \rstor_r8910
movq_cfi_restore 1*8, r10 movq_cfi_restore 7*8, r10
movq_cfi_restore 2*8, r9 movq_cfi_restore 8*8, r9
movq_cfi_restore 3*8, r8 movq_cfi_restore 9*8, r8
.endif .endif
.if \rstor_rax .if \rstor_rax
movq_cfi_restore 4*8, rax movq_cfi_restore 10*8, rax
.endif .endif
.if \rstor_rcx .if \rstor_rcx
movq_cfi_restore 5*8, rcx movq_cfi_restore 11*8, rcx
.endif .endif
.if \rstor_rdx .if \rstor_rdx
movq_cfi_restore 6*8, rdx movq_cfi_restore 12*8, rdx
.endif
movq_cfi_restore 7*8, rsi
movq_cfi_restore 8*8, rdi
.if ARG_SKIP+\addskip > 0
addq $ARG_SKIP+\addskip, %rsp
CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip)
.endif .endif
movq_cfi_restore 13*8, rsi
movq_cfi_restore 14*8, rdi
.endm .endm
.macro RESTORE_C_REGS
.macro LOAD_ARGS offset, skiprax=0 RESTORE_C_REGS_HELPER 1,1,1,1,1
movq \offset(%rsp), %r11
movq \offset+8(%rsp), %r10
movq \offset+16(%rsp), %r9
movq \offset+24(%rsp), %r8
movq \offset+40(%rsp), %rcx
movq \offset+48(%rsp), %rdx
movq \offset+56(%rsp), %rsi
movq \offset+64(%rsp), %rdi
.if \skiprax
.else
movq \offset+72(%rsp), %rax
.endif
.endm .endm
.macro RESTORE_C_REGS_EXCEPT_RAX
#define REST_SKIP (6*8) RESTORE_C_REGS_HELPER 0,1,1,1,1
.macro SAVE_REST
subq $REST_SKIP, %rsp
CFI_ADJUST_CFA_OFFSET REST_SKIP
movq_cfi rbx, 5*8
movq_cfi rbp, 4*8
movq_cfi r12, 3*8
movq_cfi r13, 2*8
movq_cfi r14, 1*8
movq_cfi r15, 0*8
.endm .endm
.macro RESTORE_C_REGS_EXCEPT_RCX
.macro RESTORE_REST RESTORE_C_REGS_HELPER 1,0,1,1,1
movq_cfi_restore 0*8, r15
movq_cfi_restore 1*8, r14
movq_cfi_restore 2*8, r13
movq_cfi_restore 3*8, r12
movq_cfi_restore 4*8, rbp
movq_cfi_restore 5*8, rbx
addq $REST_SKIP, %rsp
CFI_ADJUST_CFA_OFFSET -(REST_SKIP)
.endm .endm
.macro RESTORE_RSI_RDI
.macro SAVE_ALL RESTORE_C_REGS_HELPER 0,0,0,0,0
SAVE_ARGS .endm
SAVE_REST .macro RESTORE_RSI_RDI_RDX
RESTORE_C_REGS_HELPER 0,0,0,0,1
.endm .endm
.macro RESTORE_ALL addskip=0 .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
RESTORE_REST addq $15*8+\addskip, %rsp
RESTORE_ARGS 1, \addskip CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
.endm .endm
.macro icebp .macro icebp
......
...@@ -171,9 +171,9 @@ static inline int arch_irqs_disabled(void) ...@@ -171,9 +171,9 @@ static inline int arch_irqs_disabled(void)
#define ARCH_LOCKDEP_SYS_EXIT_IRQ \ #define ARCH_LOCKDEP_SYS_EXIT_IRQ \
TRACE_IRQS_ON; \ TRACE_IRQS_ON; \
sti; \ sti; \
SAVE_REST; \ SAVE_EXTRA_REGS; \
LOCKDEP_SYS_EXIT; \ LOCKDEP_SYS_EXIT; \
RESTORE_REST; \ RESTORE_EXTRA_REGS; \
cli; \ cli; \
TRACE_IRQS_OFF; TRACE_IRQS_OFF;
......
...@@ -49,7 +49,6 @@ ...@@ -49,7 +49,6 @@
#define EFLAGS 144 #define EFLAGS 144
#define RSP 152 #define RSP 152
#define SS 160 #define SS 160
#define ARGOFFSET R11
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
/* top of stack page */ /* top of stack page */
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment