Commit 79078c53 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
 "Misc race fixes uncovered by fuzzing efforts, a Sparse fix, two PMU
  driver fixes, plus miscellanous tooling fixes"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86: Reject non sampling events with precise_ip
  perf/x86/intel: Account interrupts for PEBS errors
  perf/core: Fix concurrent sys_perf_event_open() vs. 'move_group' race
  perf/core: Fix sys_perf_event_open() vs. hotplug
  perf/x86/intel: Use ULL constant to prevent undefined shift behaviour
  perf/x86/intel/uncore: Fix hardcoded socket 0 assumption in the Haswell init code
  perf/x86: Set pmu->module in Intel PMU modules
  perf probe: Fix to probe on gcc generated symbols for offline kernel
  perf probe: Fix --funcs to show correct symbols for offline module
  perf symbols: Robustify reading of build-id from sysfs
  perf tools: Install tools/lib/traceevent plugins with install-bin
  tools lib traceevent: Fix prev/next_prio for deadline tasks
  perf record: Fix --switch-output documentation and comment
  perf record: Make __record_options static
  tools lib subcmd: Add OPT_STRING_OPTARG_SET option
  perf probe: Fix to get correct modname from elf header
  samples/bpf trace_output_user: Remove duplicate sys/ioctl.h include
  samples/bpf sock_example: Avoid getting ethhdr from two includes
  perf sched timehist: Show total scheduling time
parents 255e6140 18e7a45a
...@@ -505,6 +505,10 @@ int x86_pmu_hw_config(struct perf_event *event) ...@@ -505,6 +505,10 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.precise_ip > precise) if (event->attr.precise_ip > precise)
return -EOPNOTSUPP; return -EOPNOTSUPP;
/* There's no sense in having PEBS for non sampling events: */
if (!is_sampling_event(event))
return -EINVAL;
} }
/* /*
* check that PEBS LBR correction does not conflict with * check that PEBS LBR correction does not conflict with
......
...@@ -3987,7 +3987,7 @@ __init int intel_pmu_init(void) ...@@ -3987,7 +3987,7 @@ __init int intel_pmu_init(void)
x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
} }
x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; x86_pmu.intel_ctrl = (1ULL << x86_pmu.num_counters) - 1;
if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
......
...@@ -434,6 +434,7 @@ static struct pmu cstate_core_pmu = { ...@@ -434,6 +434,7 @@ static struct pmu cstate_core_pmu = {
.stop = cstate_pmu_event_stop, .stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update, .read = cstate_pmu_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT, .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
.module = THIS_MODULE,
}; };
static struct pmu cstate_pkg_pmu = { static struct pmu cstate_pkg_pmu = {
...@@ -447,6 +448,7 @@ static struct pmu cstate_pkg_pmu = { ...@@ -447,6 +448,7 @@ static struct pmu cstate_pkg_pmu = {
.stop = cstate_pmu_event_stop, .stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update, .read = cstate_pmu_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT, .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
.module = THIS_MODULE,
}; };
static const struct cstate_model nhm_cstates __initconst = { static const struct cstate_model nhm_cstates __initconst = {
......
...@@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) ...@@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
continue; continue;
/* log dropped samples number */ /* log dropped samples number */
if (error[bit]) if (error[bit]) {
perf_log_lost_samples(event, error[bit]); perf_log_lost_samples(event, error[bit]);
if (perf_event_account_interrupt(event))
x86_pmu_stop(event, 0);
}
if (counts[bit]) { if (counts[bit]) {
__intel_pmu_pebs_event(event, iregs, base, __intel_pmu_pebs_event(event, iregs, base,
top, bit, counts[bit]); top, bit, counts[bit]);
......
...@@ -697,6 +697,7 @@ static int __init init_rapl_pmus(void) ...@@ -697,6 +697,7 @@ static int __init init_rapl_pmus(void)
rapl_pmus->pmu.start = rapl_pmu_event_start; rapl_pmus->pmu.start = rapl_pmu_event_start;
rapl_pmus->pmu.stop = rapl_pmu_event_stop; rapl_pmus->pmu.stop = rapl_pmu_event_stop;
rapl_pmus->pmu.read = rapl_pmu_event_read; rapl_pmus->pmu.read = rapl_pmu_event_read;
rapl_pmus->pmu.module = THIS_MODULE;
return 0; return 0;
} }
......
...@@ -733,6 +733,7 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu) ...@@ -733,6 +733,7 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
.start = uncore_pmu_event_start, .start = uncore_pmu_event_start,
.stop = uncore_pmu_event_stop, .stop = uncore_pmu_event_stop,
.read = uncore_pmu_event_read, .read = uncore_pmu_event_read,
.module = THIS_MODULE,
}; };
} else { } else {
pmu->pmu = *pmu->type->pmu; pmu->pmu = *pmu->type->pmu;
......
...@@ -2686,7 +2686,7 @@ static struct intel_uncore_type *hswep_msr_uncores[] = { ...@@ -2686,7 +2686,7 @@ static struct intel_uncore_type *hswep_msr_uncores[] = {
void hswep_uncore_cpu_init(void) void hswep_uncore_cpu_init(void)
{ {
int pkg = topology_phys_to_logical_pkg(0); int pkg = boot_cpu_data.logical_proc_id;
if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
......
...@@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event); ...@@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event);
extern void perf_event_disable_local(struct perf_event *event); extern void perf_event_disable_local(struct perf_event *event);
extern void perf_event_disable_inatomic(struct perf_event *event); extern void perf_event_disable_inatomic(struct perf_event *event);
extern void perf_event_task_tick(void); extern void perf_event_task_tick(void);
extern int perf_event_account_interrupt(struct perf_event *event);
#else /* !CONFIG_PERF_EVENTS: */ #else /* !CONFIG_PERF_EVENTS: */
static inline void * static inline void *
perf_aux_output_begin(struct perf_output_handle *handle, perf_aux_output_begin(struct perf_output_handle *handle,
......
...@@ -2249,7 +2249,7 @@ static int __perf_install_in_context(void *info) ...@@ -2249,7 +2249,7 @@ static int __perf_install_in_context(void *info)
struct perf_event_context *ctx = event->ctx; struct perf_event_context *ctx = event->ctx;
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
struct perf_event_context *task_ctx = cpuctx->task_ctx; struct perf_event_context *task_ctx = cpuctx->task_ctx;
bool activate = true; bool reprogram = true;
int ret = 0; int ret = 0;
raw_spin_lock(&cpuctx->ctx.lock); raw_spin_lock(&cpuctx->ctx.lock);
...@@ -2257,27 +2257,26 @@ static int __perf_install_in_context(void *info) ...@@ -2257,27 +2257,26 @@ static int __perf_install_in_context(void *info)
raw_spin_lock(&ctx->lock); raw_spin_lock(&ctx->lock);
task_ctx = ctx; task_ctx = ctx;
/* If we're on the wrong CPU, try again */ reprogram = (ctx->task == current);
if (task_cpu(ctx->task) != smp_processor_id()) {
ret = -ESRCH;
goto unlock;
}
/* /*
* If we're on the right CPU, see if the task we target is * If the task is running, it must be running on this CPU,
* current, if not we don't have to activate the ctx, a future * otherwise we cannot reprogram things.
* context switch will do that for us. *
* If its not running, we don't care, ctx->lock will
* serialize against it becoming runnable.
*/ */
if (ctx->task != current) if (task_curr(ctx->task) && !reprogram) {
activate = false; ret = -ESRCH;
else goto unlock;
WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx); }
WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx);
} else if (task_ctx) { } else if (task_ctx) {
raw_spin_lock(&task_ctx->lock); raw_spin_lock(&task_ctx->lock);
} }
if (activate) { if (reprogram) {
ctx_sched_out(ctx, cpuctx, EVENT_TIME); ctx_sched_out(ctx, cpuctx, EVENT_TIME);
add_event_to_ctx(event, ctx); add_event_to_ctx(event, ctx);
ctx_resched(cpuctx, task_ctx); ctx_resched(cpuctx, task_ctx);
...@@ -2328,13 +2327,36 @@ perf_install_in_context(struct perf_event_context *ctx, ...@@ -2328,13 +2327,36 @@ perf_install_in_context(struct perf_event_context *ctx,
/* /*
* Installing events is tricky because we cannot rely on ctx->is_active * Installing events is tricky because we cannot rely on ctx->is_active
* to be set in case this is the nr_events 0 -> 1 transition. * to be set in case this is the nr_events 0 -> 1 transition.
*
* Instead we use task_curr(), which tells us if the task is running.
* However, since we use task_curr() outside of rq::lock, we can race
* against the actual state. This means the result can be wrong.
*
* If we get a false positive, we retry, this is harmless.
*
* If we get a false negative, things are complicated. If we are after
* perf_event_context_sched_in() ctx::lock will serialize us, and the
* value must be correct. If we're before, it doesn't matter since
* perf_event_context_sched_in() will program the counter.
*
* However, this hinges on the remote context switch having observed
* our task->perf_event_ctxp[] store, such that it will in fact take
* ctx::lock in perf_event_context_sched_in().
*
* We do this by task_function_call(), if the IPI fails to hit the task
* we know any future context switch of task must see the
* perf_event_ctpx[] store.
*/ */
again:
/* /*
* Cannot use task_function_call() because we need to run on the task's * This smp_mb() orders the task->perf_event_ctxp[] store with the
* CPU regardless of whether its current or not. * task_cpu() load, such that if the IPI then does not find the task
* running, a future context switch of that task must observe the
* store.
*/ */
if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event)) smp_mb();
again:
if (!task_function_call(task, __perf_install_in_context, event))
return; return;
raw_spin_lock_irq(&ctx->lock); raw_spin_lock_irq(&ctx->lock);
...@@ -2348,12 +2370,16 @@ perf_install_in_context(struct perf_event_context *ctx, ...@@ -2348,12 +2370,16 @@ perf_install_in_context(struct perf_event_context *ctx,
raw_spin_unlock_irq(&ctx->lock); raw_spin_unlock_irq(&ctx->lock);
return; return;
} }
raw_spin_unlock_irq(&ctx->lock);
/* /*
* Since !ctx->is_active doesn't mean anything, we must IPI * If the task is not running, ctx->lock will avoid it becoming so,
* unconditionally. * thus we can safely install the event.
*/ */
goto again; if (task_curr(task)) {
raw_spin_unlock_irq(&ctx->lock);
goto again;
}
add_event_to_ctx(event, ctx);
raw_spin_unlock_irq(&ctx->lock);
} }
/* /*
...@@ -7034,25 +7060,12 @@ static void perf_log_itrace_start(struct perf_event *event) ...@@ -7034,25 +7060,12 @@ static void perf_log_itrace_start(struct perf_event *event)
perf_output_end(&handle); perf_output_end(&handle);
} }
/* static int
* Generic event overflow handling, sampling. __perf_event_account_interrupt(struct perf_event *event, int throttle)
*/
static int __perf_event_overflow(struct perf_event *event,
int throttle, struct perf_sample_data *data,
struct pt_regs *regs)
{ {
int events = atomic_read(&event->event_limit);
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
u64 seq;
int ret = 0; int ret = 0;
u64 seq;
/*
* Non-sampling counters might still use the PMI to fold short
* hardware counters, ignore those.
*/
if (unlikely(!is_sampling_event(event)))
return 0;
seq = __this_cpu_read(perf_throttled_seq); seq = __this_cpu_read(perf_throttled_seq);
if (seq != hwc->interrupts_seq) { if (seq != hwc->interrupts_seq) {
...@@ -7080,6 +7093,34 @@ static int __perf_event_overflow(struct perf_event *event, ...@@ -7080,6 +7093,34 @@ static int __perf_event_overflow(struct perf_event *event,
perf_adjust_period(event, delta, hwc->last_period, true); perf_adjust_period(event, delta, hwc->last_period, true);
} }
return ret;
}
int perf_event_account_interrupt(struct perf_event *event)
{
return __perf_event_account_interrupt(event, 1);
}
/*
* Generic event overflow handling, sampling.
*/
static int __perf_event_overflow(struct perf_event *event,
int throttle, struct perf_sample_data *data,
struct pt_regs *regs)
{
int events = atomic_read(&event->event_limit);
int ret = 0;
/*
* Non-sampling counters might still use the PMI to fold short
* hardware counters, ignore those.
*/
if (unlikely(!is_sampling_event(event)))
return 0;
ret = __perf_event_account_interrupt(event, throttle);
/* /*
* XXX event_limit might not quite work as expected on inherited * XXX event_limit might not quite work as expected on inherited
* events * events
...@@ -9503,6 +9544,37 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) ...@@ -9503,6 +9544,37 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
return 0; return 0;
} }
/*
* Variation on perf_event_ctx_lock_nested(), except we take two context
* mutexes.
*/
static struct perf_event_context *
__perf_event_ctx_lock_double(struct perf_event *group_leader,
struct perf_event_context *ctx)
{
struct perf_event_context *gctx;
again:
rcu_read_lock();
gctx = READ_ONCE(group_leader->ctx);
if (!atomic_inc_not_zero(&gctx->refcount)) {
rcu_read_unlock();
goto again;
}
rcu_read_unlock();
mutex_lock_double(&gctx->mutex, &ctx->mutex);
if (group_leader->ctx != gctx) {
mutex_unlock(&ctx->mutex);
mutex_unlock(&gctx->mutex);
put_ctx(gctx);
goto again;
}
return gctx;
}
/** /**
* sys_perf_event_open - open a performance event, associate it to a task/cpu * sys_perf_event_open - open a performance event, associate it to a task/cpu
* *
...@@ -9746,12 +9818,31 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -9746,12 +9818,31 @@ SYSCALL_DEFINE5(perf_event_open,
} }
if (move_group) { if (move_group) {
gctx = group_leader->ctx; gctx = __perf_event_ctx_lock_double(group_leader, ctx);
mutex_lock_double(&gctx->mutex, &ctx->mutex);
if (gctx->task == TASK_TOMBSTONE) { if (gctx->task == TASK_TOMBSTONE) {
err = -ESRCH; err = -ESRCH;
goto err_locked; goto err_locked;
} }
/*
* Check if we raced against another sys_perf_event_open() call
* moving the software group underneath us.
*/
if (!(group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
/*
* If someone moved the group out from under us, check
* if this new event wound up on the same ctx, if so
* its the regular !move_group case, otherwise fail.
*/
if (gctx != ctx) {
err = -EINVAL;
goto err_locked;
} else {
perf_event_ctx_unlock(group_leader, gctx);
move_group = 0;
}
}
} else { } else {
mutex_lock(&ctx->mutex); mutex_lock(&ctx->mutex);
} }
...@@ -9853,7 +9944,7 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -9853,7 +9944,7 @@ SYSCALL_DEFINE5(perf_event_open,
perf_unpin_context(ctx); perf_unpin_context(ctx);
if (move_group) if (move_group)
mutex_unlock(&gctx->mutex); perf_event_ctx_unlock(group_leader, gctx);
mutex_unlock(&ctx->mutex); mutex_unlock(&ctx->mutex);
if (task) { if (task) {
...@@ -9879,7 +9970,7 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -9879,7 +9970,7 @@ SYSCALL_DEFINE5(perf_event_open,
err_locked: err_locked:
if (move_group) if (move_group)
mutex_unlock(&gctx->mutex); perf_event_ctx_unlock(group_leader, gctx);
mutex_unlock(&ctx->mutex); mutex_unlock(&ctx->mutex);
/* err_file: */ /* err_file: */
fput(event_file); fput(event_file);
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#include <unistd.h> #include <unistd.h>
#include <string.h> #include <string.h>
#include <errno.h> #include <errno.h>
#include <net/ethernet.h> #include <linux/if_ether.h>
#include <net/if.h> #include <net/if.h>
#include <linux/if_packet.h> #include <linux/if_packet.h>
#include <arpa/inet.h> #include <arpa/inet.h>
......
...@@ -9,7 +9,6 @@ ...@@ -9,7 +9,6 @@
#include <string.h> #include <string.h>
#include <fcntl.h> #include <fcntl.h>
#include <poll.h> #include <poll.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h> #include <linux/perf_event.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <errno.h> #include <errno.h>
......
...@@ -213,6 +213,9 @@ static int get_value(struct parse_opt_ctx_t *p, ...@@ -213,6 +213,9 @@ static int get_value(struct parse_opt_ctx_t *p,
else else
err = get_arg(p, opt, flags, (const char **)opt->value); err = get_arg(p, opt, flags, (const char **)opt->value);
if (opt->set)
*(bool *)opt->set = true;
/* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */ /* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */
if (opt->flags & PARSE_OPT_NOEMPTY) { if (opt->flags & PARSE_OPT_NOEMPTY) {
const char *val = *(const char **)opt->value; const char *val = *(const char **)opt->value;
......
...@@ -137,6 +137,11 @@ struct option { ...@@ -137,6 +137,11 @@ struct option {
{ .type = OPTION_STRING, .short_name = (s), .long_name = (l), \ { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \
.value = check_vtype(v, const char **), (a), .help = (h), \ .value = check_vtype(v, const char **), (a), .help = (h), \
.flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) } .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) }
#define OPT_STRING_OPTARG_SET(s, l, v, os, a, h, d) \
{ .type = OPTION_STRING, .short_name = (s), .long_name = (l), \
.value = check_vtype(v, const char **), (a), .help = (h), \
.flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d), \
.set = check_vtype(os, bool *)}
#define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY} #define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY}
#define OPT_DATE(s, l, v, h) \ #define OPT_DATE(s, l, v, h) \
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }
......
...@@ -111,7 +111,7 @@ static int sched_switch_handler(struct trace_seq *s, ...@@ -111,7 +111,7 @@ static int sched_switch_handler(struct trace_seq *s,
trace_seq_printf(s, "%lld ", val); trace_seq_printf(s, "%lld ", val);
if (pevent_get_field_val(s, event, "prev_prio", record, &val, 0) == 0) if (pevent_get_field_val(s, event, "prev_prio", record, &val, 0) == 0)
trace_seq_printf(s, "[%lld] ", val); trace_seq_printf(s, "[%d] ", (int) val);
if (pevent_get_field_val(s, event, "prev_state", record, &val, 0) == 0) if (pevent_get_field_val(s, event, "prev_state", record, &val, 0) == 0)
write_state(s, val); write_state(s, val);
...@@ -129,7 +129,7 @@ static int sched_switch_handler(struct trace_seq *s, ...@@ -129,7 +129,7 @@ static int sched_switch_handler(struct trace_seq *s,
trace_seq_printf(s, "%lld", val); trace_seq_printf(s, "%lld", val);
if (pevent_get_field_val(s, event, "next_prio", record, &val, 0) == 0) if (pevent_get_field_val(s, event, "next_prio", record, &val, 0) == 0)
trace_seq_printf(s, " [%lld]", val); trace_seq_printf(s, " [%d]", (int) val);
return 0; return 0;
} }
......
...@@ -430,6 +430,10 @@ that gets then processed, possibly via a perf script, to decide if that ...@@ -430,6 +430,10 @@ that gets then processed, possibly via a perf script, to decide if that
particular perf.data snapshot should be kept or not. particular perf.data snapshot should be kept or not.
Implies --timestamp-filename, --no-buildid and --no-buildid-cache. Implies --timestamp-filename, --no-buildid and --no-buildid-cache.
The reason for the latter two is to reduce the data file switching
overhead. You can still switch them on with:
--switch-output --no-no-buildid --no-no-buildid-cache
--dry-run:: --dry-run::
Parse options then exit. --dry-run can be used to detect errors in cmdline Parse options then exit. --dry-run can be used to detect errors in cmdline
......
...@@ -704,9 +704,9 @@ install-tests: all install-gtk ...@@ -704,9 +704,9 @@ install-tests: all install-gtk
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
install-bin: install-tools install-tests install-bin: install-tools install-tests install-traceevent-plugins
install: install-bin try-install-man install-traceevent-plugins install: install-bin try-install-man
install-python_ext: install-python_ext:
$(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)' $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
......
...@@ -1405,7 +1405,7 @@ static bool dry_run; ...@@ -1405,7 +1405,7 @@ static bool dry_run;
* perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
* using pipes, etc. * using pipes, etc.
*/ */
struct option __record_options[] = { static struct option __record_options[] = {
OPT_CALLBACK('e', "event", &record.evlist, "event", OPT_CALLBACK('e', "event", &record.evlist, "event",
"event selector. use 'perf list' to list available events", "event selector. use 'perf list' to list available events",
parse_events_option), parse_events_option),
...@@ -1636,7 +1636,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -1636,7 +1636,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
* overhead. Still generate buildid if they are required * overhead. Still generate buildid if they are required
* explicitly using * explicitly using
* *
* perf record --signal-trigger --no-no-buildid \ * perf record --switch-output --no-no-buildid \
* --no-no-buildid-cache * --no-no-buildid-cache
* *
* Following code equals to: * Following code equals to:
......
...@@ -209,6 +209,7 @@ struct perf_sched { ...@@ -209,6 +209,7 @@ struct perf_sched {
u64 skipped_samples; u64 skipped_samples;
const char *time_str; const char *time_str;
struct perf_time_interval ptime; struct perf_time_interval ptime;
struct perf_time_interval hist_time;
}; };
/* per thread run time data */ /* per thread run time data */
...@@ -2460,6 +2461,11 @@ static int timehist_sched_change_event(struct perf_tool *tool, ...@@ -2460,6 +2461,11 @@ static int timehist_sched_change_event(struct perf_tool *tool,
timehist_print_sample(sched, sample, &al, thread, t); timehist_print_sample(sched, sample, &al, thread, t);
out: out:
if (sched->hist_time.start == 0 && t >= ptime->start)
sched->hist_time.start = t;
if (ptime->end == 0 || t <= ptime->end)
sched->hist_time.end = t;
if (tr) { if (tr) {
/* time of this sched_switch event becomes last time task seen */ /* time of this sched_switch event becomes last time task seen */
tr->last_time = sample->time; tr->last_time = sample->time;
...@@ -2624,6 +2630,7 @@ static void timehist_print_summary(struct perf_sched *sched, ...@@ -2624,6 +2630,7 @@ static void timehist_print_summary(struct perf_sched *sched,
struct thread *t; struct thread *t;
struct thread_runtime *r; struct thread_runtime *r;
int i; int i;
u64 hist_time = sched->hist_time.end - sched->hist_time.start;
memset(&totals, 0, sizeof(totals)); memset(&totals, 0, sizeof(totals));
...@@ -2665,7 +2672,7 @@ static void timehist_print_summary(struct perf_sched *sched, ...@@ -2665,7 +2672,7 @@ static void timehist_print_summary(struct perf_sched *sched,
totals.sched_count += r->run_stats.n; totals.sched_count += r->run_stats.n;
printf(" CPU %2d idle for ", i); printf(" CPU %2d idle for ", i);
print_sched_time(r->total_run_time, 6); print_sched_time(r->total_run_time, 6);
printf(" msec\n"); printf(" msec (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time);
} else } else
printf(" CPU %2d idle entire time window\n", i); printf(" CPU %2d idle entire time window\n", i);
} }
...@@ -2701,12 +2708,16 @@ static void timehist_print_summary(struct perf_sched *sched, ...@@ -2701,12 +2708,16 @@ static void timehist_print_summary(struct perf_sched *sched,
printf("\n" printf("\n"
" Total number of unique tasks: %" PRIu64 "\n" " Total number of unique tasks: %" PRIu64 "\n"
"Total number of context switches: %" PRIu64 "\n" "Total number of context switches: %" PRIu64 "\n",
" Total run time (msec): ",
totals.task_count, totals.sched_count); totals.task_count, totals.sched_count);
printf(" Total run time (msec): ");
print_sched_time(totals.total_run_time, 2); print_sched_time(totals.total_run_time, 2);
printf("\n"); printf("\n");
printf(" Total scheduling time (msec): ");
print_sched_time(hist_time, 2);
printf(" (x %d)\n", sched->max_cpu);
} }
typedef int (*sched_handler)(struct perf_tool *tool, typedef int (*sched_handler)(struct perf_tool *tool,
......
...@@ -163,7 +163,7 @@ static struct map *kernel_get_module_map(const char *module) ...@@ -163,7 +163,7 @@ static struct map *kernel_get_module_map(const char *module)
/* A file path -- this is an offline module */ /* A file path -- this is an offline module */
if (module && strchr(module, '/')) if (module && strchr(module, '/'))
return machine__findnew_module_map(host_machine, 0, module); return dso__new_map(module);
if (!module) if (!module)
module = "kernel"; module = "kernel";
...@@ -173,6 +173,7 @@ static struct map *kernel_get_module_map(const char *module) ...@@ -173,6 +173,7 @@ static struct map *kernel_get_module_map(const char *module)
if (strncmp(pos->dso->short_name + 1, module, if (strncmp(pos->dso->short_name + 1, module,
pos->dso->short_name_len - 2) == 0 && pos->dso->short_name_len - 2) == 0 &&
module[pos->dso->short_name_len - 2] == '\0') { module[pos->dso->short_name_len - 2] == '\0') {
map__get(pos);
return pos; return pos;
} }
} }
...@@ -188,15 +189,6 @@ struct map *get_target_map(const char *target, bool user) ...@@ -188,15 +189,6 @@ struct map *get_target_map(const char *target, bool user)
return kernel_get_module_map(target); return kernel_get_module_map(target);
} }
static void put_target_map(struct map *map, bool user)
{
if (map && user) {
/* Only the user map needs to be released */
map__put(map);
}
}
static int convert_exec_to_group(const char *exec, char **result) static int convert_exec_to_group(const char *exec, char **result)
{ {
char *ptr1, *ptr2, *exec_copy; char *ptr1, *ptr2, *exec_copy;
...@@ -267,21 +259,6 @@ static bool kprobe_warn_out_range(const char *symbol, unsigned long address) ...@@ -267,21 +259,6 @@ static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
return true; return true;
} }
/*
* NOTE:
* '.gnu.linkonce.this_module' section of kernel module elf directly
* maps to 'struct module' from linux/module.h. This section contains
* actual module name which will be used by kernel after loading it.
* But, we cannot use 'struct module' here since linux/module.h is not
* exposed to user-space. Offset of 'name' has remained same from long
* time, so hardcoding it here.
*/
#ifdef __LP64__
#define MOD_NAME_OFFSET 24
#else
#define MOD_NAME_OFFSET 12
#endif
/* /*
* @module can be module name of module file path. In case of path, * @module can be module name of module file path. In case of path,
* inspect elf and find out what is actual module name. * inspect elf and find out what is actual module name.
...@@ -296,6 +273,7 @@ static char *find_module_name(const char *module) ...@@ -296,6 +273,7 @@ static char *find_module_name(const char *module)
Elf_Data *data; Elf_Data *data;
Elf_Scn *sec; Elf_Scn *sec;
char *mod_name = NULL; char *mod_name = NULL;
int name_offset;
fd = open(module, O_RDONLY); fd = open(module, O_RDONLY);
if (fd < 0) if (fd < 0)
...@@ -317,7 +295,21 @@ static char *find_module_name(const char *module) ...@@ -317,7 +295,21 @@ static char *find_module_name(const char *module)
if (!data || !data->d_buf) if (!data || !data->d_buf)
goto ret_err; goto ret_err;
mod_name = strdup((char *)data->d_buf + MOD_NAME_OFFSET); /*
* NOTE:
* '.gnu.linkonce.this_module' section of kernel module elf directly
* maps to 'struct module' from linux/module.h. This section contains
* actual module name which will be used by kernel after loading it.
* But, we cannot use 'struct module' here since linux/module.h is not
* exposed to user-space. Offset of 'name' has remained same from long
* time, so hardcoding it here.
*/
if (ehdr.e_ident[EI_CLASS] == ELFCLASS32)
name_offset = 12;
else /* expect ELFCLASS64 by default */
name_offset = 24;
mod_name = strdup((char *)data->d_buf + name_offset);
ret_err: ret_err:
elf_end(elf); elf_end(elf);
...@@ -412,7 +404,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, ...@@ -412,7 +404,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
} }
out: out:
put_target_map(map, uprobes); map__put(map);
return ret; return ret;
} }
...@@ -618,6 +610,51 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp, ...@@ -618,6 +610,51 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
return ret ? : -ENOENT; return ret ? : -ENOENT;
} }
/*
* Rename DWARF symbols to ELF symbols -- gcc sometimes optimizes functions
* and generate new symbols with suffixes such as .constprop.N or .isra.N
* etc. Since those symbols are not recorded in DWARF, we have to find
* correct generated symbols from offline ELF binary.
* For online kernel or uprobes we don't need this because those are
* rebased on _text, or already a section relative address.
*/
static int
post_process_offline_probe_trace_events(struct probe_trace_event *tevs,
int ntevs, const char *pathname)
{
struct symbol *sym;
struct map *map;
unsigned long stext = 0;
u64 addr;
int i;
/* Prepare a map for offline binary */
map = dso__new_map(pathname);
if (!map || get_text_start_address(pathname, &stext) < 0) {
pr_warning("Failed to get ELF symbols for %s\n", pathname);
return -EINVAL;
}
for (i = 0; i < ntevs; i++) {
addr = tevs[i].point.address + tevs[i].point.offset - stext;
sym = map__find_symbol(map, addr);
if (!sym)
continue;
if (!strcmp(sym->name, tevs[i].point.symbol))
continue;
/* If we have no realname, use symbol for it */
if (!tevs[i].point.realname)
tevs[i].point.realname = tevs[i].point.symbol;
else
free(tevs[i].point.symbol);
tevs[i].point.symbol = strdup(sym->name);
tevs[i].point.offset = addr - sym->start;
}
map__put(map);
return 0;
}
static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs, static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs,
int ntevs, const char *exec) int ntevs, const char *exec)
{ {
...@@ -679,7 +716,8 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs, ...@@ -679,7 +716,8 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
/* Skip post process if the target is an offline kernel */ /* Skip post process if the target is an offline kernel */
if (symbol_conf.ignore_vmlinux_buildid) if (symbol_conf.ignore_vmlinux_buildid)
return 0; return post_process_offline_probe_trace_events(tevs, ntevs,
symbol_conf.vmlinux_name);
reloc_sym = kernel_get_ref_reloc_sym(); reloc_sym = kernel_get_ref_reloc_sym();
if (!reloc_sym) { if (!reloc_sym) {
...@@ -2869,7 +2907,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, ...@@ -2869,7 +2907,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
} }
out: out:
put_target_map(map, pev->uprobes); map__put(map);
free(syms); free(syms);
return ret; return ret;
...@@ -3362,10 +3400,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter, ...@@ -3362,10 +3400,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
return ret; return ret;
/* Get a symbol map */ /* Get a symbol map */
if (user) map = get_target_map(target, user);
map = dso__new_map(target);
else
map = kernel_get_module_map(target);
if (!map) { if (!map) {
pr_err("Failed to get a map for %s\n", (target) ? : "kernel"); pr_err("Failed to get a map for %s\n", (target) ? : "kernel");
return -EINVAL; return -EINVAL;
...@@ -3397,9 +3432,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter, ...@@ -3397,9 +3432,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
} }
end: end:
if (user) { map__put(map);
map__put(map);
}
exit_probe_symbol_maps(); exit_probe_symbol_maps();
return ret; return ret;
......
...@@ -537,6 +537,12 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size) ...@@ -537,6 +537,12 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
break; break;
} else { } else {
int n = namesz + descsz; int n = namesz + descsz;
if (n > (int)sizeof(bf)) {
n = sizeof(bf);
pr_debug("%s: truncating reading of build id in sysfs file %s: n_namesz=%u, n_descsz=%u.\n",
__func__, filename, nhdr.n_namesz, nhdr.n_descsz);
}
if (read(fd, bf, n) != n) if (read(fd, bf, n) != n)
break; break;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment