Commit 2a06bf3e authored by Nicholas Piggin's avatar Nicholas Piggin Committed by Michael Ellerman

powerpc/64: context tracking remove _TIF_NOHZ

Add context tracking to the system call handler explicitly, and remove
_TIF_NOHZ.

This improves system call performance when nohz_full is enabled. On a
POWER9, gettid scv system call cost on a nohz_full CPU improves from
1129 cycles to 1004 cycles and on a housekeeping CPU from 550 cycles
to 430 cycles.
Signed-off-by: default avatarNicholas Piggin <npiggin@gmail.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210130130852.2952424-31-npiggin@gmail.com
parent e6f8a6c8
...@@ -196,7 +196,6 @@ config PPC ...@@ -196,7 +196,6 @@ config PPC
select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
select HAVE_CONTEXT_TRACKING if PPC64 select HAVE_CONTEXT_TRACKING if PPC64
select HAVE_TIF_NOHZ if PPC64
select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE
......
...@@ -94,7 +94,6 @@ void arch_setup_new_exec(void); ...@@ -94,7 +94,6 @@ void arch_setup_new_exec(void);
#define TIF_PATCH_PENDING 6 /* pending live patching update */ #define TIF_PATCH_PENDING 6 /* pending live patching update */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SINGLESTEP 8 /* singlestepping active */ #define TIF_SINGLESTEP 8 /* singlestepping active */
#define TIF_NOHZ 9 /* in adaptive nohz mode */
#define TIF_SECCOMP 10 /* secure computing */ #define TIF_SECCOMP 10 /* secure computing */
#define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */
#define TIF_NOERROR 12 /* Force successful syscall return */ #define TIF_NOERROR 12 /* Force successful syscall return */
...@@ -128,11 +127,10 @@ void arch_setup_new_exec(void); ...@@ -128,11 +127,10 @@ void arch_setup_new_exec(void);
#define _TIF_UPROBE (1<<TIF_UPROBE) #define _TIF_UPROBE (1<<TIF_UPROBE)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
#define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
#define _TIF_NOHZ (1<<TIF_NOHZ)
#define _TIF_SYSCALL_EMU (1<<TIF_SYSCALL_EMU) #define _TIF_SYSCALL_EMU (1<<TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
_TIF_NOHZ | _TIF_SYSCALL_EMU) _TIF_SYSCALL_EMU)
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
......
...@@ -262,8 +262,6 @@ long do_syscall_trace_enter(struct pt_regs *regs) ...@@ -262,8 +262,6 @@ long do_syscall_trace_enter(struct pt_regs *regs)
{ {
u32 flags; u32 flags;
user_exit();
flags = READ_ONCE(current_thread_info()->flags) & flags = READ_ONCE(current_thread_info()->flags) &
(_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE); (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
...@@ -340,8 +338,6 @@ void do_syscall_trace_leave(struct pt_regs *regs) ...@@ -340,8 +338,6 @@ void do_syscall_trace_leave(struct pt_regs *regs)
step = test_thread_flag(TIF_SINGLESTEP); step = test_thread_flag(TIF_SINGLESTEP);
if (step || test_thread_flag(TIF_SYSCALL_TRACE)) if (step || test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, step); tracehook_report_syscall_exit(regs, step);
user_enter();
} }
void __init pt_regs_check(void); void __init pt_regs_check(void);
......
...@@ -282,8 +282,6 @@ static void do_signal(struct task_struct *tsk) ...@@ -282,8 +282,6 @@ static void do_signal(struct task_struct *tsk)
void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
{ {
user_exit();
if (thread_info_flags & _TIF_UPROBE) if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs); uprobe_notify_resume(regs);
...@@ -299,8 +297,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) ...@@ -299,8 +297,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
tracehook_notify_resume(regs); tracehook_notify_resume(regs);
rseq_handle_notify_resume(NULL, regs); rseq_handle_notify_resume(NULL, regs);
} }
user_enter();
} }
static unsigned long get_tm_stackpointer(struct task_struct *tsk) static unsigned long get_tm_stackpointer(struct task_struct *tsk)
......
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <linux/context_tracking.h>
#include <linux/err.h> #include <linux/err.h>
#include <asm/asm-prototypes.h> #include <asm/asm-prototypes.h>
#include <asm/kup.h> #include <asm/kup.h>
#include <asm/cputime.h> #include <asm/cputime.h>
#include <asm/interrupt.h>
#include <asm/hw_irq.h> #include <asm/hw_irq.h>
#include <asm/interrupt.h> #include <asm/interrupt.h>
#include <asm/kprobes.h> #include <asm/kprobes.h>
...@@ -28,6 +30,9 @@ notrace long system_call_exception(long r3, long r4, long r5, ...@@ -28,6 +30,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
user_exit_irqoff();
trace_hardirqs_off(); /* finish reconciling */ trace_hardirqs_off(); /* finish reconciling */
if (IS_ENABLED(CONFIG_PPC_BOOK3S)) if (IS_ENABLED(CONFIG_PPC_BOOK3S))
...@@ -144,7 +149,7 @@ notrace long system_call_exception(long r3, long r4, long r5, ...@@ -144,7 +149,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
* enabled when the interrupt handler returns (indicating a process-context / * enabled when the interrupt handler returns (indicating a process-context /
* synchronous interrupt) then irqs_enabled should be true. * synchronous interrupt) then irqs_enabled should be true.
*/ */
static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) static notrace inline bool __prep_irq_for_enabled_exit(bool clear_ri)
{ {
/* This must be done with RI=1 because tracing may touch vmaps */ /* This must be done with RI=1 because tracing may touch vmaps */
trace_hardirqs_on(); trace_hardirqs_on();
...@@ -161,29 +166,6 @@ static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_en ...@@ -161,29 +166,6 @@ static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_en
trace_hardirqs_off(); trace_hardirqs_off();
local_paca->irq_happened |= PACA_IRQ_HARD_DIS; local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
/*
* Must replay pending soft-masked interrupts now. Don't just
* local_irq_enabe(); local_irq_disable(); because if we are
* returning from an asynchronous interrupt here, another one
* might hit after irqs are enabled, and it would exit via this
* same path allowing another to fire, and so on unbounded.
*
* If interrupts were enabled when this interrupt exited,
* indicating a process context (synchronous) interrupt,
* local_irq_enable/disable can be used, which will enable
* interrupts rather than keeping them masked (unclear how
* much benefit this is over just replaying for all cases,
* because we immediately disable again, so all we're really
* doing is allowing hard interrupts to execute directly for
* a very small time, rather than being masked and replayed).
*/
if (irqs_enabled) {
local_irq_enable();
local_irq_disable();
} else {
replay_soft_interrupts();
}
return false; return false;
} }
local_paca->irq_happened = 0; local_paca->irq_happened = 0;
...@@ -192,6 +174,37 @@ static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_en ...@@ -192,6 +174,37 @@ static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_en
return true; return true;
} }
static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled)
{
if (__prep_irq_for_enabled_exit(clear_ri))
return true;
/*
* Must replay pending soft-masked interrupts now. Don't just
* local_irq_enabe(); local_irq_disable(); because if we are
* returning from an asynchronous interrupt here, another one
* might hit after irqs are enabled, and it would exit via this
* same path allowing another to fire, and so on unbounded.
*
* If interrupts were enabled when this interrupt exited,
* indicating a process context (synchronous) interrupt,
* local_irq_enable/disable can be used, which will enable
* interrupts rather than keeping them masked (unclear how
* much benefit this is over just replaying for all cases,
* because we immediately disable again, so all we're really
* doing is allowing hard interrupts to execute directly for
* a very small time, rather than being masked and replayed).
*/
if (irqs_enabled) {
local_irq_enable();
local_irq_disable();
} else {
replay_soft_interrupts();
}
return false;
}
/* /*
* This should be called after a syscall returns, with r3 the return value * This should be called after a syscall returns, with r3 the return value
* from the syscall. If this function returns non-zero, the system call * from the syscall. If this function returns non-zero, the system call
...@@ -209,6 +222,8 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ...@@ -209,6 +222,8 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
unsigned long ti_flags; unsigned long ti_flags;
unsigned long ret = 0; unsigned long ret = 0;
CT_WARN_ON(ct_state() == CONTEXT_USER);
kuap_check_amr(); kuap_check_amr();
regs->result = r3; regs->result = r3;
...@@ -240,9 +255,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ...@@ -240,9 +255,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
ret |= _TIF_RESTOREALL; ret |= _TIF_RESTOREALL;
} }
again:
local_irq_disable(); local_irq_disable();
again:
ti_flags = READ_ONCE(*ti_flagsp); ti_flags = READ_ONCE(*ti_flagsp);
while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
local_irq_enable(); local_irq_enable();
...@@ -286,9 +301,14 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ...@@ -286,9 +301,14 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
} }
} }
user_enter_irqoff();
/* scv need not set RI=0 because SRRs are not used */ /* scv need not set RI=0 because SRRs are not used */
if (unlikely(!prep_irq_for_enabled_exit(!scv, true))) if (unlikely(!__prep_irq_for_enabled_exit(!scv))) {
user_exit_irqoff();
local_irq_enable();
goto again; goto again;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
local_paca->tm_scratch = regs->msr; local_paca->tm_scratch = regs->msr;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment