Commit 32671e37 authored by Peter Zijlstra's avatar Peter Zijlstra

perf: Disallow mis-matched inherited group reads

Because group consistency is non-atomic between parent (filedesc) and children
(inherited) events, it is possible for PERF_FORMAT_GROUP read() to try and sum
non-matching counter groups -- with non-sensical results.

Add group_generation to distinguish the case where a parent group removes and
adds an event and thus has the same number, but a different configuration of
events as inherited groups.

This became a problem when commit fa8c2693 ("perf/core: Invert
perf_read_group() loops") flipped the order of child_list and sibling_list.
Previously it would iterate the group (sibling_list) first, and for each
sibling traverse the child_list. In this order, only the group composition of
the parent is relevant. By flipping the order the group composition of the
child (inherited) events becomes an issue and the mis-match in group
composition becomes evident.

That said; even prior to this commit, while reading of a group that is not
equally inherited was not broken, it still made no sense.

(Ab)use ECHILD as error return to indicate issues with child process group
composition.

Fixes: fa8c2693 ("perf/core: Invert perf_read_group() loops")
Reported-by: default avatarBudimir Markovic <markovicbudimir@gmail.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20231018115654.GK33217@noisy.programming.kicks-ass.net
parent 58720809
...@@ -704,6 +704,7 @@ struct perf_event { ...@@ -704,6 +704,7 @@ struct perf_event {
/* The cumulative AND of all event_caps for events in this group. */ /* The cumulative AND of all event_caps for events in this group. */
int group_caps; int group_caps;
unsigned int group_generation;
struct perf_event *group_leader; struct perf_event *group_leader;
/* /*
* event->pmu will always point to pmu in which this event belongs. * event->pmu will always point to pmu in which this event belongs.
......
...@@ -1954,6 +1954,7 @@ static void perf_group_attach(struct perf_event *event) ...@@ -1954,6 +1954,7 @@ static void perf_group_attach(struct perf_event *event)
list_add_tail(&event->sibling_list, &group_leader->sibling_list); list_add_tail(&event->sibling_list, &group_leader->sibling_list);
group_leader->nr_siblings++; group_leader->nr_siblings++;
group_leader->group_generation++;
perf_event__header_size(group_leader); perf_event__header_size(group_leader);
...@@ -2144,6 +2145,7 @@ static void perf_group_detach(struct perf_event *event) ...@@ -2144,6 +2145,7 @@ static void perf_group_detach(struct perf_event *event)
if (leader != event) { if (leader != event) {
list_del_init(&event->sibling_list); list_del_init(&event->sibling_list);
event->group_leader->nr_siblings--; event->group_leader->nr_siblings--;
event->group_leader->group_generation++;
goto out; goto out;
} }
...@@ -5440,7 +5442,7 @@ static int __perf_read_group_add(struct perf_event *leader, ...@@ -5440,7 +5442,7 @@ static int __perf_read_group_add(struct perf_event *leader,
u64 read_format, u64 *values) u64 read_format, u64 *values)
{ {
struct perf_event_context *ctx = leader->ctx; struct perf_event_context *ctx = leader->ctx;
struct perf_event *sub; struct perf_event *sub, *parent;
unsigned long flags; unsigned long flags;
int n = 1; /* skip @nr */ int n = 1; /* skip @nr */
int ret; int ret;
...@@ -5450,6 +5452,33 @@ static int __perf_read_group_add(struct perf_event *leader, ...@@ -5450,6 +5452,33 @@ static int __perf_read_group_add(struct perf_event *leader,
return ret; return ret;
raw_spin_lock_irqsave(&ctx->lock, flags); raw_spin_lock_irqsave(&ctx->lock, flags);
/*
* Verify the grouping between the parent and child (inherited)
* events is still in tact.
*
* Specifically:
* - leader->ctx->lock pins leader->sibling_list
* - parent->child_mutex pins parent->child_list
* - parent->ctx->mutex pins parent->sibling_list
*
* Because parent->ctx != leader->ctx (and child_list nests inside
* ctx->mutex), group destruction is not atomic between children, also
* see perf_event_release_kernel(). Additionally, parent can grow the
* group.
*
* Therefore it is possible to have parent and child groups in a
* different configuration and summing over such a beast makes no sense
* what so ever.
*
* Reject this.
*/
parent = leader->parent;
if (parent &&
(parent->group_generation != leader->group_generation ||
parent->nr_siblings != leader->nr_siblings)) {
ret = -ECHILD;
goto unlock;
}
/* /*
* Since we co-schedule groups, {enabled,running} times of siblings * Since we co-schedule groups, {enabled,running} times of siblings
...@@ -5483,8 +5512,9 @@ static int __perf_read_group_add(struct perf_event *leader, ...@@ -5483,8 +5512,9 @@ static int __perf_read_group_add(struct perf_event *leader,
values[n++] = atomic64_read(&sub->lost_samples); values[n++] = atomic64_read(&sub->lost_samples);
} }
unlock:
raw_spin_unlock_irqrestore(&ctx->lock, flags); raw_spin_unlock_irqrestore(&ctx->lock, flags);
return 0; return ret;
} }
static int perf_read_group(struct perf_event *event, static int perf_read_group(struct perf_event *event,
...@@ -5503,10 +5533,6 @@ static int perf_read_group(struct perf_event *event, ...@@ -5503,10 +5533,6 @@ static int perf_read_group(struct perf_event *event,
values[0] = 1 + leader->nr_siblings; values[0] = 1 + leader->nr_siblings;
/*
* By locking the child_mutex of the leader we effectively
* lock the child list of all siblings.. XXX explain how.
*/
mutex_lock(&leader->child_mutex); mutex_lock(&leader->child_mutex);
ret = __perf_read_group_add(leader, read_format, values); ret = __perf_read_group_add(leader, read_format, values);
...@@ -13346,6 +13372,7 @@ static int inherit_group(struct perf_event *parent_event, ...@@ -13346,6 +13372,7 @@ static int inherit_group(struct perf_event *parent_event,
!perf_get_aux_event(child_ctr, leader)) !perf_get_aux_event(child_ctr, leader))
return -EINVAL; return -EINVAL;
} }
leader->group_generation = parent_event->group_generation;
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment