Commit 4b0c53e9 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo' of...

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

  - Introduce PERF_RECORD_SWITCH(_CPU_WIDE) and use it in 'record' to
    ask for context switches, allowing non priviledged tasks to know
    when they are switched in and out, which wasn't possible with
    the other context switch tracepoint and software events, see the
    patch description for a comprehensive justification (Adrian Hunter)

  - Stop collecting /proc/kallsyms in perf.data files, saving about
    4.5MB on a typical x86-64 system, use the the symbol resolution
    routines used in all the other tools (report, top, etc) now that
    we can ask libtraceevent to use perf's symbol resolution code.
    (Arnaldo Carvalho de Melo)

User visible fixes:

  - Expose perf's symbol resolver to libtraceevent, so that its plugins can
    resolve tracepoint fields to kernel functions, like the 'function' field
    in the "timer:hrtimer_start tracepoint" (Arnaldo Carvalho de Melo)

Infrastructure changes:

  - Map propagation of thread and cpu maps improvements, prep work for
    'perf stat' new features (Jiri Olsa)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents a11c51ac 7c14898b
......@@ -330,7 +330,8 @@ struct perf_event_attr {
mmap2 : 1, /* include mmap with inode data */
comm_exec : 1, /* flag comm events that are due to an exec */
use_clockid : 1, /* use @clockid for time fields */
__reserved_1 : 38;
context_switch : 1, /* context switch data */
__reserved_1 : 37;
union {
__u32 wakeup_events; /* wakeup every n events */
......@@ -572,9 +573,11 @@ struct perf_event_mmap_page {
/*
* PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
* different events so can reuse the same bit position.
* Ditto PERF_RECORD_MISC_SWITCH_OUT.
*/
#define PERF_RECORD_MISC_MMAP_DATA (1 << 13)
#define PERF_RECORD_MISC_COMM_EXEC (1 << 13)
#define PERF_RECORD_MISC_SWITCH_OUT (1 << 13)
/*
* Indicates that the content of PERF_SAMPLE_IP points to
* the actual instruction that triggered the event. See also
......@@ -818,6 +821,32 @@ enum perf_event_type {
*/
PERF_RECORD_LOST_SAMPLES = 13,
/*
* Records a context switch in or out (flagged by
* PERF_RECORD_MISC_SWITCH_OUT). See also
* PERF_RECORD_SWITCH_CPU_WIDE.
*
* struct {
* struct perf_event_header header;
* struct sample_id sample_id;
* };
*/
PERF_RECORD_SWITCH = 14,
/*
* CPU-wide version of PERF_RECORD_SWITCH with next_prev_pid and
* next_prev_tid that are the next (switching out) or previous
* (switching in) pid/tid.
*
* struct {
* struct perf_event_header header;
* u32 next_prev_pid;
* u32 next_prev_tid;
* struct sample_id sample_id;
* };
*/
PERF_RECORD_SWITCH_CPU_WIDE = 15,
PERF_RECORD_MAX, /* non-ABI */
};
......
......@@ -163,6 +163,7 @@ static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
......@@ -2619,6 +2620,9 @@ static void perf_pmu_sched_task(struct task_struct *prev,
local_irq_restore(flags);
}
static void perf_event_switch(struct task_struct *task,
struct task_struct *next_prev, bool sched_in);
#define for_each_task_context_nr(ctxn) \
for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
......@@ -2641,6 +2645,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
if (__this_cpu_read(perf_sched_cb_usages))
perf_pmu_sched_task(task, next, false);
if (atomic_read(&nr_switch_events))
perf_event_switch(task, next, false);
for_each_task_context_nr(ctxn)
perf_event_context_sched_out(task, ctxn, next);
......@@ -2831,6 +2838,9 @@ void __perf_event_task_sched_in(struct task_struct *prev,
if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
perf_cgroup_sched_in(prev, task);
if (atomic_read(&nr_switch_events))
perf_event_switch(task, prev, true);
if (__this_cpu_read(perf_sched_cb_usages))
perf_pmu_sched_task(prev, task, true);
}
......@@ -3454,6 +3464,10 @@ static void unaccount_event(struct perf_event *event)
atomic_dec(&nr_task_events);
if (event->attr.freq)
atomic_dec(&nr_freq_events);
if (event->attr.context_switch) {
static_key_slow_dec_deferred(&perf_sched_events);
atomic_dec(&nr_switch_events);
}
if (is_cgroup_event(event))
static_key_slow_dec_deferred(&perf_sched_events);
if (has_branch_stack(event))
......@@ -5981,6 +5995,91 @@ void perf_log_lost_samples(struct perf_event *event, u64 lost)
perf_output_end(&handle);
}
/*
* context_switch tracking
*/
struct perf_switch_event {
struct task_struct *task;
struct task_struct *next_prev;
struct {
struct perf_event_header header;
u32 next_prev_pid;
u32 next_prev_tid;
} event_id;
};
static int perf_event_switch_match(struct perf_event *event)
{
return event->attr.context_switch;
}
static void perf_event_switch_output(struct perf_event *event, void *data)
{
struct perf_switch_event *se = data;
struct perf_output_handle handle;
struct perf_sample_data sample;
int ret;
if (!perf_event_switch_match(event))
return;
/* Only CPU-wide events are allowed to see next/prev pid/tid */
if (event->ctx->task) {
se->event_id.header.type = PERF_RECORD_SWITCH;
se->event_id.header.size = sizeof(se->event_id.header);
} else {
se->event_id.header.type = PERF_RECORD_SWITCH_CPU_WIDE;
se->event_id.header.size = sizeof(se->event_id);
se->event_id.next_prev_pid =
perf_event_pid(event, se->next_prev);
se->event_id.next_prev_tid =
perf_event_tid(event, se->next_prev);
}
perf_event_header__init_id(&se->event_id.header, &sample, event);
ret = perf_output_begin(&handle, event, se->event_id.header.size);
if (ret)
return;
if (event->ctx->task)
perf_output_put(&handle, se->event_id.header);
else
perf_output_put(&handle, se->event_id);
perf_event__output_id_sample(event, &handle, &sample);
perf_output_end(&handle);
}
static void perf_event_switch(struct task_struct *task,
struct task_struct *next_prev, bool sched_in)
{
struct perf_switch_event switch_event;
/* N.B. caller checks nr_switch_events != 0 */
switch_event = (struct perf_switch_event){
.task = task,
.next_prev = next_prev,
.event_id = {
.header = {
/* .type */
.misc = sched_in ? 0 : PERF_RECORD_MISC_SWITCH_OUT,
/* .size */
},
/* .next_prev_pid */
/* .next_prev_tid */
},
};
perf_event_aux(perf_event_switch_output,
&switch_event,
NULL);
}
/*
* IRQ throttle logging
*/
......@@ -7479,6 +7578,10 @@ static void account_event(struct perf_event *event)
if (atomic_inc_return(&nr_freq_events) == 1)
tick_nohz_full_kick_all();
}
if (event->attr.context_switch) {
atomic_inc(&nr_switch_events);
static_key_slow_inc(&perf_sched_events.key);
}
if (has_branch_stack(event))
static_key_slow_inc(&perf_sched_events.key);
if (is_cgroup_event(event))
......
......@@ -418,7 +418,7 @@ static int func_map_init(struct pevent *pevent)
}
static struct func_map *
find_func(struct pevent *pevent, unsigned long long addr)
__find_func(struct pevent *pevent, unsigned long long addr)
{
struct func_map *func;
struct func_map key;
......@@ -434,6 +434,71 @@ find_func(struct pevent *pevent, unsigned long long addr)
return func;
}
struct func_resolver {
pevent_func_resolver_t *func;
void *priv;
struct func_map map;
};
/**
* pevent_set_function_resolver - set an alternative function resolver
* @pevent: handle for the pevent
* @resolver: function to be used
* @priv: resolver function private state.
*
* Some tools may have already a way to resolve kernel functions, allow them to
* keep using it instead of duplicating all the entries inside
* pevent->funclist.
*/
int pevent_set_function_resolver(struct pevent *pevent,
pevent_func_resolver_t *func, void *priv)
{
struct func_resolver *resolver = malloc(sizeof(*resolver));
if (resolver == NULL)
return -1;
resolver->func = func;
resolver->priv = priv;
free(pevent->func_resolver);
pevent->func_resolver = resolver;
return 0;
}
/**
* pevent_reset_function_resolver - reset alternative function resolver
* @pevent: handle for the pevent
*
* Stop using whatever alternative resolver was set, use the default
* one instead.
*/
void pevent_reset_function_resolver(struct pevent *pevent)
{
free(pevent->func_resolver);
pevent->func_resolver = NULL;
}
static struct func_map *
find_func(struct pevent *pevent, unsigned long long addr)
{
struct func_map *map;
if (!pevent->func_resolver)
return __find_func(pevent, addr);
map = &pevent->func_resolver->map;
map->mod = NULL;
map->addr = addr;
map->func = pevent->func_resolver->func(pevent->func_resolver->priv,
&map->addr, &map->mod);
if (map->func == NULL)
return NULL;
return map;
}
/**
* pevent_find_function - find a function by a given address
* @pevent: handle for the pevent
......@@ -6564,6 +6629,7 @@ void pevent_free(struct pevent *pevent)
free(pevent->trace_clock);
free(pevent->events);
free(pevent->sort_events);
free(pevent->func_resolver);
free(pevent);
}
......
......@@ -453,6 +453,10 @@ struct cmdline_list;
struct func_map;
struct func_list;
struct event_handler;
struct func_resolver;
typedef char *(pevent_func_resolver_t)(void *priv,
unsigned long long *addrp, char **modp);
struct pevent {
int ref_count;
......@@ -481,6 +485,7 @@ struct pevent {
int cmdline_count;
struct func_map *func_map;
struct func_resolver *func_resolver;
struct func_list *funclist;
unsigned int func_count;
......@@ -611,6 +616,9 @@ enum trace_flag_type {
TRACE_FLAG_SOFTIRQ = 0x10,
};
int pevent_set_function_resolver(struct pevent *pevent,
pevent_func_resolver_t *func, void *priv);
void pevent_reset_function_resolver(struct pevent *pevent);
int pevent_register_comm(struct pevent *pevent, const char *comm, int pid);
int pevent_register_trace_clock(struct pevent *pevent, const char *trace_clock);
int pevent_register_function(struct pevent *pevent, char *name,
......
......@@ -293,6 +293,10 @@ When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
because the file may be huge. A time out is needed in such cases.
This option sets the time out limit. The default value is 500 ms.
--switch-events::
Record context switch events i.e. events of type PERF_RECORD_SWITCH or
PERF_RECORD_SWITCH_CPU_WIDE.
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
......@@ -222,6 +222,10 @@ OPTIONS
--show-mmap-events
Display mmap related events (e.g. MMAP, MMAP2).
--show-switch-events
Display context switch events i.e. events of type PERF_RECORD_SWITCH or
PERF_RECORD_SWITCH_CPU_WIDE.
--header
Show perf.data header.
......
......@@ -561,6 +561,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
.lost = perf_event__repipe,
.aux = perf_event__repipe,
.itrace_start = perf_event__repipe,
.context_switch = perf_event__repipe,
.read = perf_event__repipe_sample,
.throttle = perf_event__repipe,
.unthrottle = perf_event__repipe,
......
......@@ -1075,6 +1075,8 @@ struct option __record_options[] = {
"opts", "AUX area tracing Snapshot Mode", ""),
OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
"Record context switch events"),
OPT_END()
};
......@@ -1102,6 +1104,11 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
" system-wide mode\n");
usage_with_options(record_usage, record_options);
}
if (rec->opts.record_switch_events &&
!perf_can_record_switch_events()) {
ui__error("kernel does not support recording context switch events (--switch-events option)\n");
usage_with_options(record_usage, record_options);
}
if (!rec->itr) {
rec->itr = auxtrace_record__init(rec->evlist, &err);
......
......@@ -623,6 +623,7 @@ struct perf_script {
struct perf_session *session;
bool show_task_events;
bool show_mmap_events;
bool show_switch_events;
};
static int process_attr(struct perf_tool *tool, union perf_event *event,
......@@ -661,7 +662,7 @@ static int process_comm_event(struct perf_tool *tool,
struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
struct perf_session *session = script->session;
struct perf_evsel *evsel = perf_evlist__first(session->evlist);
struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
int ret = -1;
thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid);
......@@ -695,7 +696,7 @@ static int process_fork_event(struct perf_tool *tool,
struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
struct perf_session *session = script->session;
struct perf_evsel *evsel = perf_evlist__first(session->evlist);
struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
if (perf_event__process_fork(tool, event, sample, machine) < 0)
return -1;
......@@ -727,7 +728,7 @@ static int process_exit_event(struct perf_tool *tool,
struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
struct perf_session *session = script->session;
struct perf_evsel *evsel = perf_evlist__first(session->evlist);
struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid);
if (thread == NULL) {
......@@ -759,7 +760,7 @@ static int process_mmap_event(struct perf_tool *tool,
struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
struct perf_session *session = script->session;
struct perf_evsel *evsel = perf_evlist__first(session->evlist);
struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
if (perf_event__process_mmap(tool, event, sample, machine) < 0)
return -1;
......@@ -790,7 +791,7 @@ static int process_mmap2_event(struct perf_tool *tool,
struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
struct perf_session *session = script->session;
struct perf_evsel *evsel = perf_evlist__first(session->evlist);
struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
if (perf_event__process_mmap2(tool, event, sample, machine) < 0)
return -1;
......@@ -813,6 +814,32 @@ static int process_mmap2_event(struct perf_tool *tool,
return 0;
}
static int process_switch_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
struct perf_session *session = script->session;
struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
if (perf_event__process_switch(tool, event, sample, machine) < 0)
return -1;
thread = machine__findnew_thread(machine, sample->pid,
sample->tid);
if (thread == NULL) {
pr_debug("problem processing SWITCH event, skipping it.\n");
return -1;
}
print_sample_start(sample, thread, evsel);
perf_event__fprintf(event, stdout);
thread__put(thread);
return 0;
}
static void sig_handler(int sig __maybe_unused)
{
session_done = 1;
......@@ -834,6 +861,8 @@ static int __cmd_script(struct perf_script *script)
script->tool.mmap = process_mmap_event;
script->tool.mmap2 = process_mmap2_event;
}
if (script->show_switch_events)
script->tool.context_switch = process_switch_event;
ret = perf_session__process_events(script->session);
......@@ -1618,6 +1647,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
"Show the fork/comm/exit events"),
OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events,
"Show the mmap events"),
OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events,
"Show context switch events (if recorded)"),
OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
"Instruction Tracing options",
......@@ -1830,6 +1861,13 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
else
symbol_conf.use_callchain = false;
if (pevent_set_function_resolver(session->tevent.pevent,
machine__resolve_kernel_addr,
&session->machines.host) < 0) {
pr_err("%s: failed to set libtraceevent function resolver\n", __func__);
return -1;
}
if (generate_script_lang) {
struct stat perf_stat;
int input;
......
......@@ -1489,6 +1489,9 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
if (trace->host == NULL)
return -ENOMEM;
if (trace_event__register_resolver(trace->host) < 0)
return -errno;
err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
evlist->threads, trace__tool_process, false,
trace->opts.proc_map_timeout);
......
......@@ -57,6 +57,7 @@ struct record_opts {
bool running_time;
bool full_auxtrace;
bool auxtrace_snapshot_mode;
bool record_switch_events;
unsigned int freq;
unsigned int mmap_pages;
unsigned int auxtrace_mmap_pages;
......
......@@ -20,6 +20,8 @@ int test__thread_map(void)
TEST_ASSERT_VAL("wrong comm",
thread_map__comm(map, 0) &&
!strcmp(thread_map__comm(map, 0), "perf"));
TEST_ASSERT_VAL("wrong refcnt",
atomic_read(&map->refcnt) == 1);
thread_map__put(map);
/* test dummy pid */
......@@ -33,6 +35,8 @@ int test__thread_map(void)
TEST_ASSERT_VAL("wrong comm",
thread_map__comm(map, 0) &&
!strcmp(thread_map__comm(map, 0), "dummy"));
TEST_ASSERT_VAL("wrong refcnt",
atomic_read(&map->refcnt) == 1);
thread_map__put(map);
return 0;
}
......@@ -137,6 +137,10 @@ struct dso {
struct rb_node rb_node; /* rbtree node sorted by long name */
struct rb_root symbols[MAP__NR_TYPES];
struct rb_root symbol_names[MAP__NR_TYPES];
struct {
u64 addr;
struct symbol *symbol;
} last_find_result[MAP__NR_TYPES];
void *a2l;
char *symsrc_filename;
unsigned int a2l_fails;
......
......@@ -26,6 +26,8 @@ static const char *perf_event__names[] = {
[PERF_RECORD_AUX] = "AUX",
[PERF_RECORD_ITRACE_START] = "ITRACE_START",
[PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES",
[PERF_RECORD_SWITCH] = "SWITCH",
[PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
......@@ -749,6 +751,14 @@ int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
return machine__process_lost_samples_event(machine, event, sample);
}
int perf_event__process_switch(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
{
return machine__process_switch_event(machine, event);
}
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
{
return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
......@@ -827,6 +837,20 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp)
event->itrace_start.pid, event->itrace_start.tid);
}
size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp)
{
bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
const char *in_out = out ? "OUT" : "IN ";
if (event->header.type == PERF_RECORD_SWITCH)
return fprintf(fp, " %s\n", in_out);
return fprintf(fp, " %s %s pid/tid: %5u/%-5u\n",
in_out, out ? "next" : "prev",
event->context_switch.next_prev_pid,
event->context_switch.next_prev_tid);
}
size_t perf_event__fprintf(union perf_event *event, FILE *fp)
{
size_t ret = fprintf(fp, "PERF_RECORD_%s",
......@@ -852,6 +876,10 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
case PERF_RECORD_ITRACE_START:
ret += perf_event__fprintf_itrace_start(event, fp);
break;
case PERF_RECORD_SWITCH:
case PERF_RECORD_SWITCH_CPU_WIDE:
ret += perf_event__fprintf_switch(event, fp);
break;
default:
ret += fprintf(fp, "\n");
}
......
......@@ -348,6 +348,12 @@ struct itrace_start_event {
u32 pid, tid;
};
struct context_switch_event {
struct perf_event_header header;
u32 next_prev_pid;
u32 next_prev_tid;
};
union perf_event {
struct perf_event_header header;
struct mmap_event mmap;
......@@ -369,6 +375,7 @@ union perf_event {
struct auxtrace_error_event auxtrace_error;
struct aux_event aux;
struct itrace_start_event itrace_start;
struct context_switch_event context_switch;
};
void perf_event__print_totals(void);
......@@ -418,6 +425,10 @@ int perf_event__process_itrace_start(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
int perf_event__process_switch(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
int perf_event__process_mmap(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
......@@ -480,6 +491,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
size_t perf_event__fprintf(union perf_event *event, FILE *fp);
u64 kallsyms__get_function_start(const char *kallsyms_filename,
......
......@@ -1102,7 +1102,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
}
static int perf_evlist__propagate_maps(struct perf_evlist *evlist,
struct target *target)
bool has_user_cpus)
{
struct perf_evsel *evsel;
......@@ -1111,15 +1111,16 @@ static int perf_evlist__propagate_maps(struct perf_evlist *evlist,
* We already have cpus for evsel (via PMU sysfs) so
* keep it, if there's no target cpu list defined.
*/
if (evsel->cpus && target->cpu_list)
if (evsel->cpus && has_user_cpus)
cpu_map__put(evsel->cpus);
if (!evsel->cpus || target->cpu_list)
if (!evsel->cpus || has_user_cpus)
evsel->cpus = cpu_map__get(evlist->cpus);
evsel->threads = thread_map__get(evlist->threads);
if (!evsel->cpus || !evsel->threads)
if ((evlist->cpus && !evsel->cpus) ||
(evlist->threads && !evsel->threads))
return -ENOMEM;
}
......@@ -1142,7 +1143,7 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
if (evlist->cpus == NULL)
goto out_delete_threads;
return perf_evlist__propagate_maps(evlist, target);
return perf_evlist__propagate_maps(evlist, !!target->cpu_list);
out_delete_threads:
thread_map__put(evlist->threads);
......@@ -1150,6 +1151,23 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
return -1;
}
int perf_evlist__set_maps(struct perf_evlist *evlist,
struct cpu_map *cpus,
struct thread_map *threads)
{
if (evlist->cpus)
cpu_map__put(evlist->cpus);
evlist->cpus = cpus;
if (evlist->threads)
thread_map__put(evlist->threads);
evlist->threads = threads;
return perf_evlist__propagate_maps(evlist, false);
}
int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
{
struct perf_evsel *evsel;
......
......@@ -114,6 +114,7 @@ void perf_evlist__close(struct perf_evlist *evlist);
void perf_evlist__set_id_pos(struct perf_evlist *evlist);
bool perf_can_sample_identifier(void);
bool perf_can_record_switch_events(void);
void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts);
int record_opts__config(struct record_opts *opts);
......@@ -152,14 +153,9 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
void perf_evlist__set_selected(struct perf_evlist *evlist,
struct perf_evsel *evsel);
static inline void perf_evlist__set_maps(struct perf_evlist *evlist,
int perf_evlist__set_maps(struct perf_evlist *evlist,
struct cpu_map *cpus,
struct thread_map *threads)
{
evlist->cpus = cpus;
evlist->threads = threads;
}
struct thread_map *threads);
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target);
int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel);
......
......@@ -738,6 +738,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
attr->mmap2 = track && !perf_missing_features.mmap2;
attr->comm = track;
if (opts->record_switch_events)
attr->context_switch = track;
if (opts->sample_transaction)
perf_evsel__set_sample_bit(evsel, TRANSACTION);
......@@ -1127,6 +1130,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(mmap2, p_unsigned);
PRINT_ATTRf(comm_exec, p_unsigned);
PRINT_ATTRf(use_clockid, p_unsigned);
PRINT_ATTRf(context_switch, p_unsigned);
PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
PRINT_ATTRf(bp_type, p_unsigned);
......
......@@ -923,17 +923,13 @@ static void print_cmdline(struct perf_header *ph, int fd __maybe_unused,
FILE *fp)
{
int nr, i;
char *str;
nr = ph->env.nr_cmdline;
str = ph->env.cmdline;
fprintf(fp, "# cmdline : ");
for (i = 0; i < nr; i++) {
fprintf(fp, "%s ", str);
str += strlen(str) + 1;
}
for (i = 0; i < nr; i++)
fprintf(fp, "%s ", ph->env.cmdline_argv[i]);
fputc('\n', fp);
}
......@@ -1541,14 +1537,13 @@ process_event_desc(struct perf_file_section *section __maybe_unused,
return 0;
}
static int process_cmdline(struct perf_file_section *section __maybe_unused,
static int process_cmdline(struct perf_file_section *section,
struct perf_header *ph, int fd,
void *data __maybe_unused)
{
ssize_t ret;
char *str;
u32 nr, i;
struct strbuf sb;
char *str, *cmdline = NULL, **argv = NULL;
u32 nr, i, len = 0;
ret = readn(fd, &nr, sizeof(nr));
if (ret != sizeof(nr))
......@@ -1558,22 +1553,32 @@ static int process_cmdline(struct perf_file_section *section __maybe_unused,
nr = bswap_32(nr);
ph->env.nr_cmdline = nr;
strbuf_init(&sb, 128);
cmdline = zalloc(section->size + nr + 1);
if (!cmdline)
return -1;
argv = zalloc(sizeof(char *) * (nr + 1));
if (!argv)
goto error;
for (i = 0; i < nr; i++) {
str = do_read_string(fd, ph);
if (!str)
goto error;
/* include a NULL character at the end */
strbuf_add(&sb, str, strlen(str) + 1);
argv[i] = cmdline + len;
memcpy(argv[i], str, strlen(str) + 1);
len += strlen(str) + 1;
free(str);
}
ph->env.cmdline = strbuf_detach(&sb, NULL);
ph->env.cmdline = cmdline;
ph->env.cmdline_argv = (const char **) argv;
return 0;
error:
strbuf_release(&sb);
free(argv);
free(cmdline);
return -1;
}
......
......@@ -84,6 +84,7 @@ struct perf_session_env {
int nr_pmu_mappings;
int nr_groups;
char *cmdline;
const char **cmdline_argv;
char *sibling_cores;
char *sibling_threads;
char *numa_nodes;
......
......@@ -550,6 +550,14 @@ int machine__process_itrace_start_event(struct machine *machine __maybe_unused,
return 0;
}
int machine__process_switch_event(struct machine *machine __maybe_unused,
union perf_event *event)
{
if (dump_trace)
perf_event__fprintf_switch(event, stdout);
return 0;
}
struct map *machine__findnew_module_map(struct machine *machine, u64 start,
const char *filename)
{
......@@ -1451,6 +1459,9 @@ int machine__process_event(struct machine *machine, union perf_event *event,
ret = machine__process_itrace_start_event(machine, event); break;
case PERF_RECORD_LOST_SAMPLES:
ret = machine__process_lost_samples_event(machine, event, sample); break;
case PERF_RECORD_SWITCH:
case PERF_RECORD_SWITCH_CPU_WIDE:
ret = machine__process_switch_event(machine, event); break;
default:
ret = -1;
break;
......@@ -1993,3 +2004,17 @@ struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
{
return dsos__findnew(&machine->dsos, filename);
}
char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
{
struct machine *machine = vmachine;
struct map *map;
struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map, NULL);
if (sym == NULL)
return NULL;
*modp = __map__is_kmodule(map) ? (char *)map->dso->short_name : NULL;
*addrp = map->unmap_ip(map, sym->start);
return sym->name;
}
......@@ -87,6 +87,8 @@ int machine__process_aux_event(struct machine *machine,
union perf_event *event);
int machine__process_itrace_start_event(struct machine *machine,
union perf_event *event);
int machine__process_switch_event(struct machine *machine __maybe_unused,
union perf_event *event);
int machine__process_mmap_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
......@@ -237,5 +239,9 @@ int machine__synthesize_threads(struct machine *machine, struct target *target,
pid_t machine__get_current_tid(struct machine *machine, int cpu);
int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
pid_t tid);
/*
* For use with libtraceevent's pevent_set_function_resolver()
*/
char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp);
#endif /* __PERF_MACHINE_H */
......@@ -224,6 +224,20 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
return map;
}
/*
* Use this and __map__is_kmodule() for map instances that are in
* machine->kmaps, and thus have map->groups->machine all properly set, to
* disambiguate between the kernel and modules.
*
* When the need arises, introduce map__is_{kernel,kmodule)() that
* checks (map->groups != NULL && map->groups->machine != NULL &&
* map->dso->kernel) before calling __map__is_{kernel,kmodule}())
*/
bool __map__is_kernel(const struct map *map)
{
return map->groups->machine->vmlinux_maps[map->type] == map;
}
static void map__exit(struct map *map)
{
BUG_ON(!RB_EMPTY_NODE(&map->rb_node));
......
......@@ -256,4 +256,11 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
struct map *map_groups__find_by_name(struct map_groups *mg,
enum map_type type, const char *name);
bool __map__is_kernel(const struct map *map);
static inline bool __map__is_kmodule(const struct map *map)
{
return !__map__is_kernel(map);
}
#endif /* __PERF_MAP_H */
......@@ -85,6 +85,11 @@ static void perf_probe_comm_exec(struct perf_evsel *evsel)
evsel->attr.comm_exec = 1;
}
static void perf_probe_context_switch(struct perf_evsel *evsel)
{
evsel->attr.context_switch = 1;
}
bool perf_can_sample_identifier(void)
{
return perf_probe_api(perf_probe_sample_identifier);
......@@ -95,6 +100,11 @@ static bool perf_can_comm_exec(void)
return perf_probe_api(perf_probe_comm_exec);
}
bool perf_can_record_switch_events(void)
{
return perf_probe_api(perf_probe_context_switch);
}
void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts)
{
struct perf_evsel *evsel;
......
......@@ -180,6 +180,7 @@ static void perf_session_env__delete(struct perf_session_env *env)
zfree(&env->cpuid);
zfree(&env->cmdline);
zfree(&env->cmdline_argv);
zfree(&env->sibling_cores);
zfree(&env->sibling_threads);
zfree(&env->numa_nodes);
......@@ -332,6 +333,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->aux = perf_event__process_aux;
if (tool->itrace_start == NULL)
tool->itrace_start = perf_event__process_itrace_start;
if (tool->context_switch == NULL)
tool->context_switch = perf_event__process_switch;
if (tool->read == NULL)
tool->read = process_event_sample_stub;
if (tool->throttle == NULL)
......@@ -470,6 +473,19 @@ static void perf_event__itrace_start_swap(union perf_event *event,
swap_sample_id_all(event, &event->itrace_start + 1);
}
static void perf_event__switch_swap(union perf_event *event, bool sample_id_all)
{
if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
event->context_switch.next_prev_pid =
bswap_32(event->context_switch.next_prev_pid);
event->context_switch.next_prev_tid =
bswap_32(event->context_switch.next_prev_tid);
}
if (sample_id_all)
swap_sample_id_all(event, &event->context_switch + 1);
}
static void perf_event__throttle_swap(union perf_event *event,
bool sample_id_all)
{
......@@ -632,6 +648,8 @@ static perf_event__swap_op perf_event__swap_ops[] = {
[PERF_RECORD_AUX] = perf_event__aux_swap,
[PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap,
[PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap,
[PERF_RECORD_SWITCH] = perf_event__switch_swap,
[PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap,
[PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
[PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
......@@ -1093,6 +1111,9 @@ static int machines__deliver_event(struct machines *machines,
return tool->aux(tool, event, sample, machine);
case PERF_RECORD_ITRACE_START:
return tool->itrace_start(tool, event, sample, machine);
case PERF_RECORD_SWITCH:
case PERF_RECORD_SWITCH_CPU_WIDE:
return tool->context_switch(tool, event, sample, machine);
default:
++evlist->stats.nr_unknown_events;
return -1;
......
......@@ -444,7 +444,12 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
struct symbol *dso__find_symbol(struct dso *dso,
enum map_type type, u64 addr)
{
return symbols__find(&dso->symbols[type], addr);
if (dso->last_find_result[type].addr != addr) {
dso->last_find_result[type].addr = addr;
dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr);
}
return dso->last_find_result[type].symbol;
}
struct symbol *dso__first_symbol(struct dso *dso, enum map_type type)
......
......@@ -46,6 +46,7 @@ struct perf_tool {
lost_samples,
aux,
itrace_start,
context_switch,
throttle,
unthrottle;
event_attr_op attr;
......
......@@ -341,20 +341,14 @@ static int record_event_files(struct tracepoint_path *tps)
static int record_proc_kallsyms(void)
{
unsigned int size;
const char *path = "/proc/kallsyms";
struct stat st;
int ret, err = 0;
ret = stat(path, &st);
if (ret < 0) {
/* not found */
size = 0;
if (write(output_fd, &size, 4) != 4)
err = -EIO;
return err;
}
return record_file(path, 4);
unsigned long long size = 0;
/*
* Just to keep older perf.data file parsers happy, record a zero
* sized kallsyms file, i.e. do the same thing that was done when
* /proc/kallsyms (or something specified via --kallsyms, in a
* different path) couldn't be read.
*/
return write(output_fd, &size, 4) != 4 ? -EIO : 0;
}
static int record_ftrace_printk(void)
......
......@@ -135,36 +135,6 @@ void event_format__print(struct event_format *event,
return event_format__fprintf(event, cpu, data, size, stdout);
}
void parse_proc_kallsyms(struct pevent *pevent,
char *file, unsigned int size __maybe_unused)
{
unsigned long long addr;
char *func;
char *line;
char *next = NULL;
char *addr_str;
char *mod;
char *fmt = NULL;
line = strtok_r(file, "\n", &next);
while (line) {
mod = NULL;
addr_str = strtok_r(line, " ", &fmt);
addr = strtoull(addr_str, NULL, 16);
/* skip character */
strtok_r(NULL, " ", &fmt);
func = strtok_r(NULL, "\t", &fmt);
mod = strtok_r(NULL, "]", &fmt);
/* truncate the extra '[' */
if (mod)
mod = mod + 1;
pevent_register_function(pevent, func, addr, mod);
line = strtok_r(NULL, "\n", &next);
}
}
void parse_ftrace_printk(struct pevent *pevent,
char *file, unsigned int size __maybe_unused)
{
......
......@@ -162,25 +162,23 @@ static char *read_string(void)
static int read_proc_kallsyms(struct pevent *pevent)
{
unsigned int size;
char *buf;
size = read4(pevent);
if (!size)
return 0;
buf = malloc(size + 1);
if (buf == NULL)
return -1;
if (do_read(buf, size) < 0) {
free(buf);
return -1;
}
buf[size] = '\0';
parse_proc_kallsyms(pevent, buf, size);
free(buf);
/*
* Just skip it, now that we configure libtraceevent to use the
* tools/perf/ symbol resolver.
*
* We need to skip it so that we can continue parsing old perf.data
* files, that contains this /proc/kallsyms payload.
*
* Newer perf.data files will have just the 4-bytes zeros "kallsyms
* payload", so that older tools can continue reading it and interpret
* it as "no kallsyms payload is present".
*/
lseek(input_fd, size, SEEK_CUR);
trace_data_size += size;
return 0;
}
......
......@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <traceevent/event-parse.h>
#include "trace-event.h"
#include "machine.h"
#include "util.h"
/*
......@@ -19,6 +20,7 @@
* there.
*/
static struct trace_event tevent;
static bool tevent_initialized;
int trace_event__init(struct trace_event *t)
{
......@@ -32,6 +34,32 @@ int trace_event__init(struct trace_event *t)
return pevent ? 0 : -1;
}
static int trace_event__init2(void)
{
int be = traceevent_host_bigendian();
struct pevent *pevent;
if (trace_event__init(&tevent))
return -1;
pevent = tevent.pevent;
pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT);
pevent_set_file_bigendian(pevent, be);
pevent_set_host_bigendian(pevent, be);
tevent_initialized = true;
return 0;
}
int trace_event__register_resolver(struct machine *machine)
{
if (!tevent_initialized && trace_event__init2())
return -1;
return pevent_set_function_resolver(tevent.pevent,
machine__resolve_kernel_addr,
machine);
}
void trace_event__cleanup(struct trace_event *t)
{
traceevent_unload_plugins(t->plugin_list, t->pevent);
......@@ -62,21 +90,8 @@ tp_format(const char *sys, const char *name)
struct event_format*
trace_event__tp_format(const char *sys, const char *name)
{
static bool initialized;
if (!initialized) {
int be = traceevent_host_bigendian();
struct pevent *pevent;
if (trace_event__init(&tevent))
if (!tevent_initialized && trace_event__init2())
return NULL;
pevent = tevent.pevent;
pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT);
pevent_set_file_bigendian(pevent, be);
pevent_set_host_bigendian(pevent, be);
initialized = true;
}
return tp_format(sys, name);
}
......@@ -18,6 +18,7 @@ struct trace_event {
int trace_event__init(struct trace_event *t);
void trace_event__cleanup(struct trace_event *t);
int trace_event__register_resolver(struct machine *machine);
struct event_format*
trace_event__tp_format(const char *sys, const char *name);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment