Commit 0d40a6d8 authored by Sebastian Andrzej Siewior's avatar Sebastian Andrzej Siewior Committed by Peter Zijlstra

perf: Move swevent_htable::recursion into task_struct.

The swevent_htable::recursion counter is used to avoid creating an
swevent while an event is processed to avoid recursion. The counter is
per-CPU and preemption must be disabled to have a stable counter.
perf_pending_task() disables preemption to access the counter and then
signal. This is problematic on PREEMPT_RT because sending a signal uses
a spinlock_t which must not be acquired in atomic on PREEMPT_RT because
it becomes a sleeping lock.

The atomic context can be avoided by moving the counter into the
task_struct. There is a 4 byte hole between futex_state (usually always
on) and the following perf pointer (perf_event_ctxp). After the
recursion lost some weight it fits perfectly.

Move swevent_htable::recursion into task_struct.
Signed-off-by: default avatarSebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: default avatarMarco Elver <elver@google.com>
Link: https://lore.kernel.org/r/20240704170424.1466941-6-bigeasy@linutronix.de
parent 5af42f92
......@@ -970,12 +970,6 @@ struct perf_event_context {
local_t nr_pending;
};
/*
* Number of contexts where an event can trigger:
* task, softirq, hardirq, nmi.
*/
#define PERF_NR_CONTEXTS 4
struct perf_cpu_pmu_context {
struct perf_event_pmu_context epc;
struct perf_event_pmu_context *task_epc;
......
......@@ -734,6 +734,12 @@ enum perf_event_task_context {
perf_nr_task_contexts,
};
/*
* Number of contexts where an event can trigger:
* task, softirq, hardirq, nmi.
*/
#define PERF_NR_CONTEXTS 4
struct wake_q_node {
struct wake_q_node *next;
};
......@@ -1256,6 +1262,7 @@ struct task_struct {
unsigned int futex_state;
#endif
#ifdef CONFIG_PERF_EVENTS
u8 perf_recursion[PERF_NR_CONTEXTS];
struct perf_event_context *perf_event_ctxp;
struct mutex perf_event_mutex;
struct list_head perf_event_list;
......
......@@ -9763,11 +9763,7 @@ struct swevent_htable {
struct swevent_hlist *swevent_hlist;
struct mutex hlist_mutex;
int hlist_refcount;
/* Recursion avoidance in each contexts */
u8 recursion[PERF_NR_CONTEXTS];
};
static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
/*
......@@ -9965,17 +9961,13 @@ DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);
int perf_swevent_get_recursion_context(void)
{
struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
return get_recursion_context(swhash->recursion);
return get_recursion_context(current->perf_recursion);
}
EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
void perf_swevent_put_recursion_context(int rctx)
{
struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
put_recursion_context(swhash->recursion, rctx);
put_recursion_context(current->perf_recursion, rctx);
}
void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
......@@ -13642,6 +13634,7 @@ int perf_event_init_task(struct task_struct *child, u64 clone_flags)
{
int ret;
memset(child->perf_recursion, 0, sizeof(child->perf_recursion));
child->perf_event_ctxp = NULL;
mutex_init(&child->perf_event_mutex);
INIT_LIST_HEAD(&child->perf_event_list);
......
......@@ -221,7 +221,7 @@ static inline int get_recursion_context(u8 *recursion)
return rctx;
}
static inline void put_recursion_context(u8 *recursion, int rctx)
static inline void put_recursion_context(u8 *recursion, unsigned char rctx)
{
barrier();
recursion[rctx]--;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment