Commit bf9ad37d authored by Oleg Nesterov's avatar Oleg Nesterov Committed by Thomas Gleixner

signal, x86: Delay calling signals in atomic on RT enabled kernels

On x86_64 we must disable preemption before we enable interrupts
for stack faults, int3 and debugging, because the current task is using
a per CPU debug stack defined by the IST. If we schedule out, another task
can come in and use the same stack and cause the stack to be corrupted
and crash the kernel on return.

When CONFIG_PREEMPT_RT is enabled, spinlock_t locks become sleeping, and
one of these is the spin lock used in signal handling.

Some of the debug code (int3) causes do_trap() to send a signal.
This function calls a spinlock_t lock that has been converted to a
sleeping lock. If this happens, the above issues with the corrupted
stack is possible.

Instead of calling the signal right away, for PREEMPT_RT and x86,
the signal information is stored on the stacks task_struct and
TIF_NOTIFY_RESUME is set. Then on exit of the trap, the signal resume
code will send the signal when preemption is enabled.

[ rostedt: Switched from #ifdef CONFIG_PREEMPT_RT to
  ARCH_RT_DELAYS_SIGNAL_SEND and added comments to the code. ]
[bigeasy: Add on 32bit as per Yang Shi, minor rewording. ]
[ tglx: Use a config option ]
Signed-off-by: default avatarOleg Nesterov <oleg@redhat.com>
Signed-off-by: default avatarSteven Rostedt <rostedt@goodmis.org>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarSebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/Ygq5aBB/qMQw6aP5@linutronix.de
parent 0ce055f8
...@@ -120,6 +120,7 @@ config X86 ...@@ -120,6 +120,7 @@ config X86
select ARCH_WANTS_NO_INSTR select ARCH_WANTS_NO_INSTR
select ARCH_WANT_HUGE_PMD_SHARE select ARCH_WANT_HUGE_PMD_SHARE
select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANTS_RT_DELAYED_SIGNALS
select ARCH_WANTS_THP_SWAP if X86_64 select ARCH_WANTS_THP_SWAP if X86_64
select ARCH_HAS_PARANOID_L1D_FLUSH select ARCH_HAS_PARANOID_L1D_FLUSH
select BUILDTIME_TABLE_SORT select BUILDTIME_TABLE_SORT
......
...@@ -1087,6 +1087,9 @@ struct task_struct { ...@@ -1087,6 +1087,9 @@ struct task_struct {
/* Restored if set_restore_sigmask() was used: */ /* Restored if set_restore_sigmask() was used: */
sigset_t saved_sigmask; sigset_t saved_sigmask;
struct sigpending pending; struct sigpending pending;
#ifdef CONFIG_RT_DELAYED_SIGNALS
struct kernel_siginfo forced_info;
#endif
unsigned long sas_ss_sp; unsigned long sas_ss_sp;
size_t sas_ss_size; size_t sas_ss_size;
unsigned int sas_ss_flags; unsigned int sas_ss_flags;
......
...@@ -132,4 +132,14 @@ config SCHED_CORE ...@@ -132,4 +132,14 @@ config SCHED_CORE
which is the likely usage by Linux distributions, there should which is the likely usage by Linux distributions, there should
be no measurable impact on performance. be no measurable impact on performance.
config ARCH_WANTS_RT_DELAYED_SIGNALS
bool
help
This option is selected by architectures where raising signals
can happen in atomic contexts on PREEMPT_RT enabled kernels. This
option delays raising the signal until the return to user space
loop where it is also delivered. X86 requires this to deliver
signals from trap handlers which run on IST stacks.
config RT_DELAYED_SIGNALS
def_bool PREEMPT_RT && ARCH_WANTS_RT_DELAYED_SIGNALS
...@@ -148,6 +148,18 @@ static void handle_signal_work(struct pt_regs *regs, unsigned long ti_work) ...@@ -148,6 +148,18 @@ static void handle_signal_work(struct pt_regs *regs, unsigned long ti_work)
arch_do_signal_or_restart(regs, ti_work & _TIF_SIGPENDING); arch_do_signal_or_restart(regs, ti_work & _TIF_SIGPENDING);
} }
#ifdef CONFIG_RT_DELAYED_SIGNALS
static inline void raise_delayed_signal(void)
{
if (unlikely(current->forced_info.si_signo)) {
force_sig_info(&current->forced_info);
current->forced_info.si_signo = 0;
}
}
#else
static inline void raise_delayed_signal(void) { }
#endif
static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
unsigned long ti_work) unsigned long ti_work)
{ {
...@@ -162,6 +174,8 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, ...@@ -162,6 +174,8 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
if (ti_work & _TIF_NEED_RESCHED) if (ti_work & _TIF_NEED_RESCHED)
schedule(); schedule();
raise_delayed_signal();
if (ti_work & _TIF_UPROBE) if (ti_work & _TIF_UPROBE)
uprobe_notify_resume(regs); uprobe_notify_resume(regs);
......
...@@ -1307,6 +1307,43 @@ enum sig_handler { ...@@ -1307,6 +1307,43 @@ enum sig_handler {
HANDLER_EXIT, /* Only visible as the process exit code */ HANDLER_EXIT, /* Only visible as the process exit code */
}; };
/*
* On some archictectures, PREEMPT_RT has to delay sending a signal from a
* trap since it cannot enable preemption, and the signal code's
* spin_locks turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME
* which will send the signal on exit of the trap.
*/
#ifdef CONFIG_RT_DELAYED_SIGNALS
static inline bool force_sig_delayed(struct kernel_siginfo *info,
struct task_struct *t)
{
if (!in_atomic())
return false;
if (WARN_ON_ONCE(t->forced_info.si_signo))
return true;
if (is_si_special(info)) {
WARN_ON_ONCE(info != SEND_SIG_PRIV);
t->forced_info.si_signo = info->si_signo;
t->forced_info.si_errno = 0;
t->forced_info.si_code = SI_KERNEL;
t->forced_info.si_pid = 0;
t->forced_info.si_uid = 0;
} else {
t->forced_info = *info;
}
set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
return true;
}
#else
static inline bool force_sig_delayed(struct kernel_siginfo *info,
struct task_struct *t)
{
return false;
}
#endif
/* /*
* Force a signal that the process can't ignore: if necessary * Force a signal that the process can't ignore: if necessary
* we unblock the signal and change any SIG_IGN to SIG_DFL. * we unblock the signal and change any SIG_IGN to SIG_DFL.
...@@ -1327,6 +1364,9 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, ...@@ -1327,6 +1364,9 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t,
struct k_sigaction *action; struct k_sigaction *action;
int sig = info->si_signo; int sig = info->si_signo;
if (force_sig_delayed(info, t))
return 0;
spin_lock_irqsave(&t->sighand->siglock, flags); spin_lock_irqsave(&t->sighand->siglock, flags);
action = &t->sighand->action[sig-1]; action = &t->sighand->action[sig-1];
ignored = action->sa.sa_handler == SIG_IGN; ignored = action->sa.sa_handler == SIG_IGN;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment