Commit b36817e8 authored by Stephane Eranian's avatar Stephane Eranian Committed by Ingo Molnar

perf/x86: Add Intel LBR sharing logic

The Intel LBR on some recent processor is capable
of filtering branches by type. The filter is configurable
via the LBR_SELECT MSR register.

There are limitation on how this register can be used.

On Nehalem/Westmere, the LBR_SELECT is shared by the two HT threads
when HT is on. It is private to each core when HT is off.

On SandyBridge, the LBR_SELECT register is private to each thread
when HT is on. It is private to each core when HT is off.

The kernel must manage the sharing of LBR_SELECT. It allows
multiple users on the same logical CPU to use LBR_SELECT as
long as they program it with the same value. Across sibling
CPUs (HT threads), the same restriction applies on NHM/WSM.

This patch implements this sharing logic by leveraging the
mechanism put in place for managing the offcore_response
shared MSR.

We modify __intel_shared_reg_get_constraints() to cause
x86_get_event_constraint() to be called because LBR may
be associated with events that may be counter constrained.
Signed-off-by: default avatarStephane Eranian <eranian@google.com>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1328826068-11713-4-git-send-email-eranian@google.comSigned-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 225ce539
...@@ -426,6 +426,10 @@ static int __x86_pmu_event_init(struct perf_event *event) ...@@ -426,6 +426,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
/* mark unused */ /* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE; event->hw.extra_reg.idx = EXTRA_REG_NONE;
/* mark not used */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
event->hw.branch_reg.idx = EXTRA_REG_NONE;
return x86_pmu.hw_config(event); return x86_pmu.hw_config(event);
} }
......
...@@ -33,6 +33,7 @@ enum extra_reg_type { ...@@ -33,6 +33,7 @@ enum extra_reg_type {
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
EXTRA_REG_LBR = 2, /* lbr_select */
EXTRA_REG_MAX /* number of entries needed */ EXTRA_REG_MAX /* number of entries needed */
}; };
...@@ -130,6 +131,7 @@ struct cpu_hw_events { ...@@ -130,6 +131,7 @@ struct cpu_hw_events {
void *lbr_context; void *lbr_context;
struct perf_branch_stack lbr_stack; struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
struct er_account *lbr_sel;
/* /*
* Intel host/guest exclude bits * Intel host/guest exclude bits
...@@ -342,6 +344,8 @@ struct x86_pmu { ...@@ -342,6 +344,8 @@ struct x86_pmu {
*/ */
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
int lbr_nr; /* hardware stack size */ int lbr_nr; /* hardware stack size */
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
const int *lbr_sel_map; /* lbr_select mappings */
/* /*
* Extra registers for events * Extra registers for events
......
...@@ -1123,17 +1123,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx) ...@@ -1123,17 +1123,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
*/ */
static struct event_constraint * static struct event_constraint *
__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event) struct perf_event *event,
struct hw_perf_event_extra *reg)
{ {
struct event_constraint *c = &emptyconstraint; struct event_constraint *c = &emptyconstraint;
struct hw_perf_event_extra *reg = &event->hw.extra_reg;
struct er_account *era; struct er_account *era;
unsigned long flags; unsigned long flags;
int orig_idx = reg->idx; int orig_idx = reg->idx;
/* already allocated shared msr */ /* already allocated shared msr */
if (reg->alloc) if (reg->alloc)
return &unconstrained; return NULL; /* call x86_get_event_constraint() */
again: again:
era = &cpuc->shared_regs->regs[reg->idx]; era = &cpuc->shared_regs->regs[reg->idx];
...@@ -1156,14 +1156,10 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, ...@@ -1156,14 +1156,10 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
reg->alloc = 1; reg->alloc = 1;
/* /*
* All events using extra_reg are unconstrained. * need to call x86_get_event_constraint()
* Avoids calling x86_get_event_constraints() * to check if associated event has constraints
*
* Must revisit if extra_reg controlling events
* ever have constraints. Worst case we go through
* the regular event constraint table.
*/ */
c = &unconstrained; c = NULL;
} else if (intel_try_alt_er(event, orig_idx)) { } else if (intel_try_alt_er(event, orig_idx)) {
raw_spin_unlock_irqrestore(&era->lock, flags); raw_spin_unlock_irqrestore(&era->lock, flags);
goto again; goto again;
...@@ -1200,11 +1196,23 @@ static struct event_constraint * ...@@ -1200,11 +1196,23 @@ static struct event_constraint *
intel_shared_regs_constraints(struct cpu_hw_events *cpuc, intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event) struct perf_event *event)
{ {
struct event_constraint *c = NULL; struct event_constraint *c = NULL, *d;
struct hw_perf_event_extra *xreg, *breg;
if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
c = __intel_shared_reg_get_constraints(cpuc, event);
xreg = &event->hw.extra_reg;
if (xreg->idx != EXTRA_REG_NONE) {
c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
if (c == &emptyconstraint)
return c;
}
breg = &event->hw.branch_reg;
if (breg->idx != EXTRA_REG_NONE) {
d = __intel_shared_reg_get_constraints(cpuc, event, breg);
if (d == &emptyconstraint) {
__intel_shared_reg_put_constraints(cpuc, xreg);
c = d;
}
}
return c; return c;
} }
...@@ -1252,6 +1260,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, ...@@ -1252,6 +1260,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
reg = &event->hw.extra_reg; reg = &event->hw.extra_reg;
if (reg->idx != EXTRA_REG_NONE) if (reg->idx != EXTRA_REG_NONE)
__intel_shared_reg_put_constraints(cpuc, reg); __intel_shared_reg_put_constraints(cpuc, reg);
reg = &event->hw.branch_reg;
if (reg->idx != EXTRA_REG_NONE)
__intel_shared_reg_put_constraints(cpuc, reg);
} }
static void intel_put_event_constraints(struct cpu_hw_events *cpuc, static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
...@@ -1431,7 +1443,7 @@ static int intel_pmu_cpu_prepare(int cpu) ...@@ -1431,7 +1443,7 @@ static int intel_pmu_cpu_prepare(int cpu)
{ {
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
if (!x86_pmu.extra_regs) if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
return NOTIFY_OK; return NOTIFY_OK;
cpuc->shared_regs = allocate_shared_regs(cpu); cpuc->shared_regs = allocate_shared_regs(cpu);
...@@ -1453,9 +1465,12 @@ static void intel_pmu_cpu_starting(int cpu) ...@@ -1453,9 +1465,12 @@ static void intel_pmu_cpu_starting(int cpu)
*/ */
intel_pmu_lbr_reset(); intel_pmu_lbr_reset();
if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING)) cpuc->lbr_sel = NULL;
if (!cpuc->shared_regs)
return; return;
if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
for_each_cpu(i, topology_thread_cpumask(cpu)) { for_each_cpu(i, topology_thread_cpumask(cpu)) {
struct intel_shared_regs *pc; struct intel_shared_regs *pc;
...@@ -1466,9 +1481,12 @@ static void intel_pmu_cpu_starting(int cpu) ...@@ -1466,9 +1481,12 @@ static void intel_pmu_cpu_starting(int cpu)
break; break;
} }
} }
cpuc->shared_regs->core_id = core_id; cpuc->shared_regs->core_id = core_id;
cpuc->shared_regs->refcnt++; cpuc->shared_regs->refcnt++;
}
if (x86_pmu.lbr_sel_map)
cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
} }
static void intel_pmu_cpu_dying(int cpu) static void intel_pmu_cpu_dying(int cpu)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment