Commit b51f86e9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf_urgent_for_v5.18_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Borislav Petkov:

 - A couple of fixes to cgroup-related handling of perf events

 - A couple of fixes to event encoding on Sapphire Rapids

 - Pass event caps of inherited events so that perf doesn't fail wrongly
   at fork()

 - Add support for a new Raptor Lake CPU

* tag 'perf_urgent_for_v5.18_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/core: Always set cpuctx cgrp when enable cgroup event
  perf/core: Fix perf_cgroup_switch()
  perf/core: Use perf_cgroup_info->active to check if cgroup is active
  perf/core: Don't pass task around when ctx sched in
  perf/x86/intel: Update the FRONTEND MSR mask on Sapphire Rapids
  perf/x86/intel: Don't extend the pseudo-encoding to GP counters
  perf/core: Inherit event_caps
  perf/x86/uncore: Add Raptor Lake uncore support
  perf/x86/msr: Add Raptor Lake CPU support
  perf/x86/cstate: Add Raptor Lake support
  perf/x86: Add Intel Raptor Lake support
parents 50c94de6 e19cd0b6
...@@ -302,7 +302,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = { ...@@ -302,7 +302,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE), INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
EVENT_EXTRA_END EVENT_EXTRA_END
...@@ -5536,7 +5536,11 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con ...@@ -5536,7 +5536,11 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con
/* Disabled fixed counters which are not in CPUID */ /* Disabled fixed counters which are not in CPUID */
c->idxmsk64 &= intel_ctrl; c->idxmsk64 &= intel_ctrl;
if (c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) /*
* Don't extend the pseudo-encoding to the
* generic counters
*/
if (!use_fixed_pseudo_encoding(c->code))
c->idxmsk64 |= (1ULL << num_counters) - 1; c->idxmsk64 |= (1ULL << num_counters) - 1;
} }
c->idxmsk64 &= c->idxmsk64 &=
...@@ -6212,6 +6216,7 @@ __init int intel_pmu_init(void) ...@@ -6212,6 +6216,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_ALDERLAKE: case INTEL_FAM6_ALDERLAKE:
case INTEL_FAM6_ALDERLAKE_L: case INTEL_FAM6_ALDERLAKE_L:
case INTEL_FAM6_RAPTORLAKE:
/* /*
* Alder Lake has 2 types of CPU, core and atom. * Alder Lake has 2 types of CPU, core and atom.
* *
......
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
* Model specific counters: * Model specific counters:
* MSR_CORE_C1_RES: CORE C1 Residency Counter * MSR_CORE_C1_RES: CORE C1 Residency Counter
* perf code: 0x00 * perf code: 0x00
* Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL
* Scope: Core (each processor core has a MSR) * Scope: Core (each processor core has a MSR)
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
* perf code: 0x01 * perf code: 0x01
...@@ -51,49 +51,50 @@ ...@@ -51,49 +51,50 @@
* perf code: 0x02 * perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
* TGL,TNT,RKL,ADL * TGL,TNT,RKL,ADL,RPL
* Scope: Core * Scope: Core
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
* perf code: 0x03 * perf code: 0x03
* Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML, * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
* ICL,TGL,RKL,ADL * ICL,TGL,RKL,ADL,RPL
* Scope: Core * Scope: Core
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
* perf code: 0x00 * perf code: 0x00
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
* KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL,
* RPL
* Scope: Package (physical package) * Scope: Package (physical package)
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
* perf code: 0x01 * perf code: 0x01
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
* GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL, * GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL,
* ADL * ADL,RPL
* Scope: Package (physical package) * Scope: Package (physical package)
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
* perf code: 0x02 * perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
* TGL,TNT,RKL,ADL * TGL,TNT,RKL,ADL,RPL
* Scope: Package (physical package) * Scope: Package (physical package)
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
* perf code: 0x03 * perf code: 0x03
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL, * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
* KBL,CML,ICL,TGL,RKL,ADL * KBL,CML,ICL,TGL,RKL,ADL,RPL
* Scope: Package (physical package) * Scope: Package (physical package)
* MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
* perf code: 0x04 * perf code: 0x04
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
* ADL * ADL,RPL
* Scope: Package (physical package) * Scope: Package (physical package)
* MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
* perf code: 0x05 * perf code: 0x05
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
* ADL * ADL,RPL
* Scope: Package (physical package) * Scope: Package (physical package)
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
* perf code: 0x06 * perf code: 0x06
* Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
* TNT,RKL,ADL * TNT,RKL,ADL,RPL
* Scope: Package (physical package) * Scope: Package (physical package)
* *
*/ */
...@@ -680,6 +681,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { ...@@ -680,6 +681,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &icl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates),
{ }, { },
}; };
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
......
...@@ -1828,6 +1828,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { ...@@ -1828,6 +1828,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
{}, {},
......
...@@ -79,6 +79,10 @@ ...@@ -79,6 +79,10 @@
#define PCI_DEVICE_ID_INTEL_ADL_14_IMC 0x4650 #define PCI_DEVICE_ID_INTEL_ADL_14_IMC 0x4650
#define PCI_DEVICE_ID_INTEL_ADL_15_IMC 0x4668 #define PCI_DEVICE_ID_INTEL_ADL_15_IMC 0x4668
#define PCI_DEVICE_ID_INTEL_ADL_16_IMC 0x4670 #define PCI_DEVICE_ID_INTEL_ADL_16_IMC 0x4670
#define PCI_DEVICE_ID_INTEL_RPL_1_IMC 0xA700
#define PCI_DEVICE_ID_INTEL_RPL_2_IMC 0xA702
#define PCI_DEVICE_ID_INTEL_RPL_3_IMC 0xA706
#define PCI_DEVICE_ID_INTEL_RPL_4_IMC 0xA709
/* SNB event control */ /* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
...@@ -1406,6 +1410,22 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = { ...@@ -1406,6 +1410,22 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_16_IMC), PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_16_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
}, },
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_1_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_2_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_3_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_4_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* end: all zeroes */ } { /* end: all zeroes */ }
}; };
......
...@@ -103,6 +103,7 @@ static bool test_intel(int idx, void *data) ...@@ -103,6 +103,7 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_ROCKETLAKE: case INTEL_FAM6_ROCKETLAKE:
case INTEL_FAM6_ALDERLAKE: case INTEL_FAM6_ALDERLAKE:
case INTEL_FAM6_ALDERLAKE_L: case INTEL_FAM6_ALDERLAKE_L:
case INTEL_FAM6_RAPTORLAKE:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true; return true;
break; break;
......
...@@ -241,6 +241,11 @@ struct x86_pmu_capability { ...@@ -241,6 +241,11 @@ struct x86_pmu_capability {
#define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3) #define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3)
#define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS) #define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
static inline bool use_fixed_pseudo_encoding(u64 code)
{
return !(code & 0xff);
}
/* /*
* We model BTS tracing as another fixed-mode PMC. * We model BTS tracing as another fixed-mode PMC.
* *
......
...@@ -574,8 +574,7 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, ...@@ -574,8 +574,7 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
enum event_type_t event_type); enum event_type_t event_type);
static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
enum event_type_t event_type, enum event_type_t event_type);
struct task_struct *task);
static void update_context_time(struct perf_event_context *ctx); static void update_context_time(struct perf_event_context *ctx);
static u64 perf_event_time(struct perf_event *event); static u64 perf_event_time(struct perf_event *event);
...@@ -781,7 +780,6 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, ...@@ -781,7 +780,6 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx,
static inline void update_cgrp_time_from_event(struct perf_event *event) static inline void update_cgrp_time_from_event(struct perf_event *event)
{ {
struct perf_cgroup_info *info; struct perf_cgroup_info *info;
struct perf_cgroup *cgrp;
/* /*
* ensure we access cgroup data only when needed and * ensure we access cgroup data only when needed and
...@@ -790,21 +788,19 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) ...@@ -790,21 +788,19 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
if (!is_cgroup_event(event)) if (!is_cgroup_event(event))
return; return;
cgrp = perf_cgroup_from_task(current, event->ctx); info = this_cpu_ptr(event->cgrp->info);
/* /*
* Do not update time when cgroup is not active * Do not update time when cgroup is not active
*/ */
if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) { if (info->active)
info = this_cpu_ptr(event->cgrp->info);
__update_cgrp_time(info, perf_clock(), true); __update_cgrp_time(info, perf_clock(), true);
}
} }
static inline void static inline void
perf_cgroup_set_timestamp(struct task_struct *task, perf_cgroup_set_timestamp(struct perf_cpu_context *cpuctx)
struct perf_event_context *ctx)
{ {
struct perf_cgroup *cgrp; struct perf_event_context *ctx = &cpuctx->ctx;
struct perf_cgroup *cgrp = cpuctx->cgrp;
struct perf_cgroup_info *info; struct perf_cgroup_info *info;
struct cgroup_subsys_state *css; struct cgroup_subsys_state *css;
...@@ -813,10 +809,10 @@ perf_cgroup_set_timestamp(struct task_struct *task, ...@@ -813,10 +809,10 @@ perf_cgroup_set_timestamp(struct task_struct *task,
* ensure we do not access cgroup data * ensure we do not access cgroup data
* unless we have the cgroup pinned (css_get) * unless we have the cgroup pinned (css_get)
*/ */
if (!task || !ctx->nr_cgroups) if (!cgrp)
return; return;
cgrp = perf_cgroup_from_task(task, ctx); WARN_ON_ONCE(!ctx->nr_cgroups);
for (css = &cgrp->css; css; css = css->parent) { for (css = &cgrp->css; css; css = css->parent) {
cgrp = container_of(css, struct perf_cgroup, css); cgrp = container_of(css, struct perf_cgroup, css);
...@@ -828,17 +824,12 @@ perf_cgroup_set_timestamp(struct task_struct *task, ...@@ -828,17 +824,12 @@ perf_cgroup_set_timestamp(struct task_struct *task,
static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list); static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list);
#define PERF_CGROUP_SWOUT 0x1 /* cgroup switch out every event */
#define PERF_CGROUP_SWIN 0x2 /* cgroup switch in events based on task */
/* /*
* reschedule events based on the cgroup constraint of task. * reschedule events based on the cgroup constraint of task.
*
* mode SWOUT : schedule out everything
* mode SWIN : schedule in based on cgroup for next
*/ */
static void perf_cgroup_switch(struct task_struct *task, int mode) static void perf_cgroup_switch(struct task_struct *task)
{ {
struct perf_cgroup *cgrp;
struct perf_cpu_context *cpuctx, *tmp; struct perf_cpu_context *cpuctx, *tmp;
struct list_head *list; struct list_head *list;
unsigned long flags; unsigned long flags;
...@@ -849,35 +840,31 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) ...@@ -849,35 +840,31 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
*/ */
local_irq_save(flags); local_irq_save(flags);
cgrp = perf_cgroup_from_task(task, NULL);
list = this_cpu_ptr(&cgrp_cpuctx_list); list = this_cpu_ptr(&cgrp_cpuctx_list);
list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) { list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) {
WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
if (READ_ONCE(cpuctx->cgrp) == cgrp)
continue;
perf_ctx_lock(cpuctx, cpuctx->task_ctx); perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(cpuctx->ctx.pmu); perf_pmu_disable(cpuctx->ctx.pmu);
if (mode & PERF_CGROUP_SWOUT) { cpu_ctx_sched_out(cpuctx, EVENT_ALL);
cpu_ctx_sched_out(cpuctx, EVENT_ALL); /*
/* * must not be done before ctxswout due
* must not be done before ctxswout due * to update_cgrp_time_from_cpuctx() in
* to event_filter_match() in event_sched_out() * ctx_sched_out()
*/ */
cpuctx->cgrp = NULL; cpuctx->cgrp = cgrp;
} /*
* set cgrp before ctxsw in to allow
* perf_cgroup_set_timestamp() in ctx_sched_in()
* to not have to pass task around
*/
cpu_ctx_sched_in(cpuctx, EVENT_ALL);
if (mode & PERF_CGROUP_SWIN) {
WARN_ON_ONCE(cpuctx->cgrp);
/*
* set cgrp before ctxsw in to allow
* event_filter_match() to not have to pass
* task around
* we pass the cpuctx->ctx to perf_cgroup_from_task()
* because cgorup events are only per-cpu
*/
cpuctx->cgrp = perf_cgroup_from_task(task,
&cpuctx->ctx);
cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
}
perf_pmu_enable(cpuctx->ctx.pmu); perf_pmu_enable(cpuctx->ctx.pmu);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx); perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
} }
...@@ -885,58 +872,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) ...@@ -885,58 +872,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
local_irq_restore(flags); local_irq_restore(flags);
} }
static inline void perf_cgroup_sched_out(struct task_struct *task,
struct task_struct *next)
{
struct perf_cgroup *cgrp1;
struct perf_cgroup *cgrp2 = NULL;
rcu_read_lock();
/*
* we come here when we know perf_cgroup_events > 0
* we do not need to pass the ctx here because we know
* we are holding the rcu lock
*/
cgrp1 = perf_cgroup_from_task(task, NULL);
cgrp2 = perf_cgroup_from_task(next, NULL);
/*
* only schedule out current cgroup events if we know
* that we are switching to a different cgroup. Otherwise,
* do no touch the cgroup events.
*/
if (cgrp1 != cgrp2)
perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
rcu_read_unlock();
}
static inline void perf_cgroup_sched_in(struct task_struct *prev,
struct task_struct *task)
{
struct perf_cgroup *cgrp1;
struct perf_cgroup *cgrp2 = NULL;
rcu_read_lock();
/*
* we come here when we know perf_cgroup_events > 0
* we do not need to pass the ctx here because we know
* we are holding the rcu lock
*/
cgrp1 = perf_cgroup_from_task(task, NULL);
cgrp2 = perf_cgroup_from_task(prev, NULL);
/*
* only need to schedule in cgroup events if we are changing
* cgroup during ctxsw. Cgroup events were not scheduled
* out of ctxsw out if that was not the case.
*/
if (cgrp1 != cgrp2)
perf_cgroup_switch(task, PERF_CGROUP_SWIN);
rcu_read_unlock();
}
static int perf_cgroup_ensure_storage(struct perf_event *event, static int perf_cgroup_ensure_storage(struct perf_event *event,
struct cgroup_subsys_state *css) struct cgroup_subsys_state *css)
{ {
...@@ -1032,22 +967,10 @@ perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ct ...@@ -1032,22 +967,10 @@ perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ct
*/ */
cpuctx = container_of(ctx, struct perf_cpu_context, ctx); cpuctx = container_of(ctx, struct perf_cpu_context, ctx);
/*
* Since setting cpuctx->cgrp is conditional on the current @cgrp
* matching the event's cgroup, we must do this for every new event,
* because if the first would mismatch, the second would not try again
* and we would leave cpuctx->cgrp unset.
*/
if (ctx->is_active && !cpuctx->cgrp) {
struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
cpuctx->cgrp = cgrp;
}
if (ctx->nr_cgroups++) if (ctx->nr_cgroups++)
return; return;
cpuctx->cgrp = perf_cgroup_from_task(current, ctx);
list_add(&cpuctx->cgrp_cpuctx_entry, list_add(&cpuctx->cgrp_cpuctx_entry,
per_cpu_ptr(&cgrp_cpuctx_list, event->cpu)); per_cpu_ptr(&cgrp_cpuctx_list, event->cpu));
} }
...@@ -1069,9 +992,7 @@ perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *c ...@@ -1069,9 +992,7 @@ perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *c
if (--ctx->nr_cgroups) if (--ctx->nr_cgroups)
return; return;
if (ctx->is_active && cpuctx->cgrp) cpuctx->cgrp = NULL;
cpuctx->cgrp = NULL;
list_del(&cpuctx->cgrp_cpuctx_entry); list_del(&cpuctx->cgrp_cpuctx_entry);
} }
...@@ -1100,16 +1021,6 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, ...@@ -1100,16 +1021,6 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx,
{ {
} }
static inline void perf_cgroup_sched_out(struct task_struct *task,
struct task_struct *next)
{
}
static inline void perf_cgroup_sched_in(struct task_struct *prev,
struct task_struct *task)
{
}
static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event, static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
struct perf_event_attr *attr, struct perf_event_attr *attr,
struct perf_event *group_leader) struct perf_event *group_leader)
...@@ -1118,13 +1029,7 @@ static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event, ...@@ -1118,13 +1029,7 @@ static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
} }
static inline void static inline void
perf_cgroup_set_timestamp(struct task_struct *task, perf_cgroup_set_timestamp(struct perf_cpu_context *cpuctx)
struct perf_event_context *ctx)
{
}
static inline void
perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
{ {
} }
...@@ -1147,6 +1052,10 @@ static inline void ...@@ -1147,6 +1052,10 @@ static inline void
perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx) perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
{ {
} }
static void perf_cgroup_switch(struct task_struct *task)
{
}
#endif #endif
/* /*
...@@ -2713,8 +2622,7 @@ static void ctx_sched_out(struct perf_event_context *ctx, ...@@ -2713,8 +2622,7 @@ static void ctx_sched_out(struct perf_event_context *ctx,
static void static void
ctx_sched_in(struct perf_event_context *ctx, ctx_sched_in(struct perf_event_context *ctx,
struct perf_cpu_context *cpuctx, struct perf_cpu_context *cpuctx,
enum event_type_t event_type, enum event_type_t event_type);
struct task_struct *task);
static void task_ctx_sched_out(struct perf_cpu_context *cpuctx, static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx, struct perf_event_context *ctx,
...@@ -2730,15 +2638,14 @@ static void task_ctx_sched_out(struct perf_cpu_context *cpuctx, ...@@ -2730,15 +2638,14 @@ static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
} }
static void perf_event_sched_in(struct perf_cpu_context *cpuctx, static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx, struct perf_event_context *ctx)
struct task_struct *task)
{ {
cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task); cpu_ctx_sched_in(cpuctx, EVENT_PINNED);
if (ctx) if (ctx)
ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); ctx_sched_in(ctx, cpuctx, EVENT_PINNED);
cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
if (ctx) if (ctx)
ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
} }
/* /*
...@@ -2788,7 +2695,7 @@ static void ctx_resched(struct perf_cpu_context *cpuctx, ...@@ -2788,7 +2695,7 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
else if (ctx_event_type & EVENT_PINNED) else if (ctx_event_type & EVENT_PINNED)
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
perf_event_sched_in(cpuctx, task_ctx, current); perf_event_sched_in(cpuctx, task_ctx);
perf_pmu_enable(cpuctx->ctx.pmu); perf_pmu_enable(cpuctx->ctx.pmu);
} }
...@@ -3011,7 +2918,7 @@ static void __perf_event_enable(struct perf_event *event, ...@@ -3011,7 +2918,7 @@ static void __perf_event_enable(struct perf_event *event,
return; return;
if (!event_filter_match(event)) { if (!event_filter_match(event)) {
ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); ctx_sched_in(ctx, cpuctx, EVENT_TIME);
return; return;
} }
...@@ -3020,7 +2927,7 @@ static void __perf_event_enable(struct perf_event *event, ...@@ -3020,7 +2927,7 @@ static void __perf_event_enable(struct perf_event *event,
* then don't put it on unless the group is on. * then don't put it on unless the group is on.
*/ */
if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) { if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) {
ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); ctx_sched_in(ctx, cpuctx, EVENT_TIME);
return; return;
} }
...@@ -3668,7 +3575,7 @@ void __perf_event_task_sched_out(struct task_struct *task, ...@@ -3668,7 +3575,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
* cgroup event are system-wide mode only * cgroup event are system-wide mode only
*/ */
if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
perf_cgroup_sched_out(task, next); perf_cgroup_switch(next);
} }
/* /*
...@@ -3865,8 +3772,7 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, ...@@ -3865,8 +3772,7 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
static void static void
ctx_sched_in(struct perf_event_context *ctx, ctx_sched_in(struct perf_event_context *ctx,
struct perf_cpu_context *cpuctx, struct perf_cpu_context *cpuctx,
enum event_type_t event_type, enum event_type_t event_type)
struct task_struct *task)
{ {
int is_active = ctx->is_active; int is_active = ctx->is_active;
...@@ -3878,7 +3784,7 @@ ctx_sched_in(struct perf_event_context *ctx, ...@@ -3878,7 +3784,7 @@ ctx_sched_in(struct perf_event_context *ctx,
if (is_active ^ EVENT_TIME) { if (is_active ^ EVENT_TIME) {
/* start ctx time */ /* start ctx time */
__update_context_time(ctx, false); __update_context_time(ctx, false);
perf_cgroup_set_timestamp(task, ctx); perf_cgroup_set_timestamp(cpuctx);
/* /*
* CPU-release for the below ->is_active store, * CPU-release for the below ->is_active store,
* see __load_acquire() in perf_event_time_now() * see __load_acquire() in perf_event_time_now()
...@@ -3909,12 +3815,11 @@ ctx_sched_in(struct perf_event_context *ctx, ...@@ -3909,12 +3815,11 @@ ctx_sched_in(struct perf_event_context *ctx,
} }
static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
enum event_type_t event_type, enum event_type_t event_type)
struct task_struct *task)
{ {
struct perf_event_context *ctx = &cpuctx->ctx; struct perf_event_context *ctx = &cpuctx->ctx;
ctx_sched_in(ctx, cpuctx, event_type, task); ctx_sched_in(ctx, cpuctx, event_type);
} }
static void perf_event_context_sched_in(struct perf_event_context *ctx, static void perf_event_context_sched_in(struct perf_event_context *ctx,
...@@ -3956,7 +3861,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, ...@@ -3956,7 +3861,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
*/ */
if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
perf_event_sched_in(cpuctx, ctx, task); perf_event_sched_in(cpuctx, ctx);
if (cpuctx->sched_cb_usage && pmu->sched_task) if (cpuctx->sched_cb_usage && pmu->sched_task)
pmu->sched_task(cpuctx->task_ctx, true); pmu->sched_task(cpuctx->task_ctx, true);
...@@ -3984,16 +3889,6 @@ void __perf_event_task_sched_in(struct task_struct *prev, ...@@ -3984,16 +3889,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
struct perf_event_context *ctx; struct perf_event_context *ctx;
int ctxn; int ctxn;
/*
* If cgroup events exist on this CPU, then we need to check if we have
* to switch in PMU state; cgroup event are system-wide mode only.
*
* Since cgroup events are CPU events, we must schedule these in before
* we schedule in the task events.
*/
if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
perf_cgroup_sched_in(prev, task);
for_each_task_context_nr(ctxn) { for_each_task_context_nr(ctxn) {
ctx = task->perf_event_ctxp[ctxn]; ctx = task->perf_event_ctxp[ctxn];
if (likely(!ctx)) if (likely(!ctx))
...@@ -4267,7 +4162,7 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx) ...@@ -4267,7 +4162,7 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
if (cpu_event) if (cpu_event)
rotate_ctx(&cpuctx->ctx, cpu_event); rotate_ctx(&cpuctx->ctx, cpu_event);
perf_event_sched_in(cpuctx, task_ctx, current); perf_event_sched_in(cpuctx, task_ctx);
perf_pmu_enable(cpuctx->ctx.pmu); perf_pmu_enable(cpuctx->ctx.pmu);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx); perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
...@@ -4339,7 +4234,7 @@ static void perf_event_enable_on_exec(int ctxn) ...@@ -4339,7 +4234,7 @@ static void perf_event_enable_on_exec(int ctxn)
clone_ctx = unclone_ctx(ctx); clone_ctx = unclone_ctx(ctx);
ctx_resched(cpuctx, ctx, event_type); ctx_resched(cpuctx, ctx, event_type);
} else { } else {
ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); ctx_sched_in(ctx, cpuctx, EVENT_TIME);
} }
perf_ctx_unlock(cpuctx, ctx); perf_ctx_unlock(cpuctx, ctx);
...@@ -11635,6 +11530,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, ...@@ -11635,6 +11530,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
event->state = PERF_EVENT_STATE_INACTIVE; event->state = PERF_EVENT_STATE_INACTIVE;
if (parent_event)
event->event_caps = parent_event->event_caps;
if (event->attr.sigtrap) if (event->attr.sigtrap)
atomic_set(&event->event_limit, 1); atomic_set(&event->event_limit, 1);
...@@ -13562,7 +13460,7 @@ static int __perf_cgroup_move(void *info) ...@@ -13562,7 +13460,7 @@ static int __perf_cgroup_move(void *info)
{ {
struct task_struct *task = info; struct task_struct *task = info;
rcu_read_lock(); rcu_read_lock();
perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN); perf_cgroup_switch(task);
rcu_read_unlock(); rcu_read_unlock();
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment