Commit 49de0493 authored by Thomas Gleixner's avatar Thomas Gleixner Committed by Ingo Molnar

x86/perf/intel/cstate: Make cstate hotplug handling actually work

The current implementation aside of being an incomprehensible mess is broken.

  # cat /sys/bus/event_source/devices/cstate_core/cpumask
  0-17

That's on a quad socket machine with 72 physical cores! Qualitee stuff.

So it's not a surprise that event migration in case of CPU hotplug does not
work either.

  # perf stat -e cstate_core/c6-residency/ -C 1 sleep 60 &
  # echo 0 >/sys/devices/system/cpu/cpu1/online

Tracing cstate_pmu_event_update gives me:

 [001] cstate_pmu_event_update <-event_sched_out

After the fix it properly moves the event:

 [001] cstate_pmu_event_update <-event_sched_out
 [073] cstate_pmu_event_update <-__perf_event_read
 [073] cstate_pmu_event_update <-event_sched_out

The migration of pkg events does not work either. Not that I'm surprised.

I really could not be bothered to decode that loop mess and simply replaced it
by querying the proper cpumasks which give us the answer in a comprehensible
way.

This also requires to direct the event to the current active reader CPU in
cstate_pmu_event_init() otherwise the hotplug logic can't work.
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
[ Added event->cpu < 0 test to not explode]
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/20160320185623.422519970@linutronix.deSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 4b6e2571
...@@ -385,7 +385,7 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev, ...@@ -385,7 +385,7 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
static int cstate_pmu_event_init(struct perf_event *event) static int cstate_pmu_event_init(struct perf_event *event)
{ {
u64 cfg = event->attr.config; u64 cfg = event->attr.config;
int ret = 0; int cpu;
if (event->attr.type != event->pmu->type) if (event->attr.type != event->pmu->type)
return -ENOENT; return -ENOENT;
...@@ -400,26 +400,36 @@ static int cstate_pmu_event_init(struct perf_event *event) ...@@ -400,26 +400,36 @@ static int cstate_pmu_event_init(struct perf_event *event)
event->attr.sample_period) /* no sampling */ event->attr.sample_period) /* no sampling */
return -EINVAL; return -EINVAL;
if (event->cpu < 0)
return -EINVAL;
if (event->pmu == &cstate_core_pmu) { if (event->pmu == &cstate_core_pmu) {
if (cfg >= PERF_CSTATE_CORE_EVENT_MAX) if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
return -EINVAL; return -EINVAL;
if (!core_msr[cfg].attr) if (!core_msr[cfg].attr)
return -EINVAL; return -EINVAL;
event->hw.event_base = core_msr[cfg].msr; event->hw.event_base = core_msr[cfg].msr;
cpu = cpumask_any_and(&cstate_core_cpu_mask,
topology_sibling_cpumask(event->cpu));
} else if (event->pmu == &cstate_pkg_pmu) { } else if (event->pmu == &cstate_pkg_pmu) {
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
return -EINVAL; return -EINVAL;
if (!pkg_msr[cfg].attr) if (!pkg_msr[cfg].attr)
return -EINVAL; return -EINVAL;
event->hw.event_base = pkg_msr[cfg].msr; event->hw.event_base = pkg_msr[cfg].msr;
} else cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
topology_core_cpumask(event->cpu));
} else {
return -ENOENT; return -ENOENT;
}
/* must be done before validate_group */ if (cpu >= nr_cpu_ids)
return -ENODEV;
event->cpu = cpu;
event->hw.config = cfg; event->hw.config = cfg;
event->hw.idx = -1; event->hw.idx = -1;
return 0;
return ret;
} }
static inline u64 cstate_pmu_read_counter(struct perf_event *event) static inline u64 cstate_pmu_read_counter(struct perf_event *event)
...@@ -469,102 +479,76 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode) ...@@ -469,102 +479,76 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode)
return 0; return 0;
} }
/*
* Check if exiting cpu is the designated reader. If so migrate the
* events when there is a valid target available
*/
static void cstate_cpu_exit(int cpu) static void cstate_cpu_exit(int cpu)
{ {
int i, id, target; unsigned int target;
/* cpu exit for cstate core */ if (has_cstate_core &&
if (has_cstate_core) { cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
id = topology_core_id(cpu);
target = -1; target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
/* Migrate events if there is a valid target */
for_each_online_cpu(i) { if (target < nr_cpu_ids) {
if (i == cpu)
continue;
if (id == topology_core_id(i)) {
target = i;
break;
}
}
if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
cpumask_set_cpu(target, &cstate_core_cpu_mask); cpumask_set_cpu(target, &cstate_core_cpu_mask);
WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
if (target >= 0)
perf_pmu_migrate_context(&cstate_core_pmu, cpu, target); perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
}
} }
/* cpu exit for cstate pkg */ if (has_cstate_pkg &&
if (has_cstate_pkg) { cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
id = topology_physical_package_id(cpu);
target = -1; target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
/* Migrate events if there is a valid target */
for_each_online_cpu(i) { if (target < nr_cpu_ids) {
if (i == cpu)
continue;
if (id == topology_physical_package_id(i)) {
target = i;
break;
}
}
if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
cpumask_set_cpu(target, &cstate_pkg_cpu_mask); cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
if (target >= 0)
perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target); perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
}
} }
} }
static void cstate_cpu_init(int cpu) static void cstate_cpu_init(int cpu)
{ {
int i, id; unsigned int target;
/* cpu init for cstate core */ /*
if (has_cstate_core) { * If this is the first online thread of that core, set it in
id = topology_core_id(cpu); * the core cpu mask as the designated reader.
for_each_cpu(i, &cstate_core_cpu_mask) { */
if (id == topology_core_id(i)) target = cpumask_any_and(&cstate_core_cpu_mask,
break; topology_sibling_cpumask(cpu));
}
if (i >= nr_cpu_ids)
cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
}
/* cpu init for cstate pkg */ if (has_cstate_core && target >= nr_cpu_ids)
if (has_cstate_pkg) { cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
id = topology_physical_package_id(cpu);
for_each_cpu(i, &cstate_pkg_cpu_mask) { /*
if (id == topology_physical_package_id(i)) * If this is the first online thread of that package, set it
break; * in the package cpu mask as the designated reader.
} */
if (i >= nr_cpu_ids) target = cpumask_any_and(&cstate_pkg_cpu_mask,
cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); topology_core_cpumask(cpu));
} if (has_cstate_pkg && target >= nr_cpu_ids)
cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
} }
static int cstate_cpu_notifier(struct notifier_block *self, static int cstate_cpu_notifier(struct notifier_block *self,
unsigned long action, void *hcpu) unsigned long action, void *hcpu)
{ {
unsigned int cpu = (long)hcpu; unsigned int cpu = (long)hcpu;
switch (action & ~CPU_TASKS_FROZEN) { switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
break;
case CPU_STARTING: case CPU_STARTING:
cstate_cpu_init(cpu); cstate_cpu_init(cpu);
break; break;
case CPU_UP_CANCELED:
case CPU_DYING:
break;
case CPU_ONLINE:
case CPU_DEAD:
break;
case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE:
cstate_cpu_exit(cpu); cstate_cpu_exit(cpu);
break; break;
default: default:
break; break;
} }
return NOTIFY_OK; return NOTIFY_OK;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment