Commit c4937a91 authored by Kan Liang's avatar Kan Liang Committed by Ingo Molnar

perf tools: handle PERF_RECORD_LOST_SAMPLES

This patch modifies the perf tool to handle the new RECORD type,
PERF_RECORD_LOST_SAMPLES.

The number of lost-sample events is stored in
.nr_events[PERF_RECORD_LOST_SAMPLES]. The exact number of samples
which the kernel dropped is stored in total_lost_samples.

When the percentage of dropped samples is greater than 5%, a warning
is printed.

Here are some examples:

Eg 1, Recording different frequently-occurring events is safe with the
      patch. Only a very low drop rate is associated with such actions.

$ perf record -e '{cycles:p,instructions:p}' -c 20003 --no-time ~/tchain ~/tchain

$ perf report -D | tail
          SAMPLE events:     120243
           MMAP2 events:          5
    LOST_SAMPLES events:         24
  FINISHED_ROUND events:         15
cycles:p stats:
           TOTAL events:      59348
          SAMPLE events:      59348
instructions:p stats:
           TOTAL events:      60895
          SAMPLE events:      60895

$ perf report --stdio --group
 # To display the perf.data header info, please use --header/--header-only options.
 #
 #
 # Total Lost Samples: 24
 #
 # Samples: 120K of event 'anon group { cycles:p, instructions:p }'
 # Event count (approx.): 24048600000
 #
 #         Overhead  Command      Shared Object     Symbol
 # ................  ...........  ................
 ..................................
 #
    99.74%  99.86%  tchain_edit  tchain_edit       [.] f3
     0.09%   0.02%  tchain_edit  tchain_edit       [.] f2
     0.04%   0.00%  tchain_edit  [kernel.vmlinux]  [k] ixgbe_read_reg

Eg 2, Recording the same thing multiple times can lead to high drop
      rate, but it is not a useful configuration.

$ perf record -e '{cycles:p,cycles:p}' -c 20003 --no-time ~/tchain
Warning: Processed 600592 samples and lost 99.73% samples!
[perf record: Woken up 148 times to write data]
[perf record: Captured and wrote 36.922 MB perf.data (1206322 samples)]
[perf record: Woken up 1 times to write data]
[perf record: Captured and wrote 0.121 MB perf.data (1629 samples)]
Signed-off-by: default avatarKan Liang <kan.liang@intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1431285195-14269-9-git-send-email-kan.liang@intel.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent f38b0dbb
...@@ -320,6 +320,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, ...@@ -320,6 +320,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
{ {
struct perf_evsel *pos; struct perf_evsel *pos;
fprintf(stdout, "#\n# Total Lost Samples: %lu\n#\n", evlist->stats.total_lost_samples);
evlist__for_each(evlist, pos) { evlist__for_each(evlist, pos) {
struct hists *hists = evsel__hists(pos); struct hists *hists = evsel__hists(pos);
const char *evname = perf_evsel__name(pos); const char *evname = perf_evsel__name(pos);
......
...@@ -25,6 +25,7 @@ static const char *perf_event__names[] = { ...@@ -25,6 +25,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_SAMPLE] = "SAMPLE", [PERF_RECORD_SAMPLE] = "SAMPLE",
[PERF_RECORD_AUX] = "AUX", [PERF_RECORD_AUX] = "AUX",
[PERF_RECORD_ITRACE_START] = "ITRACE_START", [PERF_RECORD_ITRACE_START] = "ITRACE_START",
[PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES",
[PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
...@@ -712,6 +713,14 @@ int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused, ...@@ -712,6 +713,14 @@ int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
return machine__process_itrace_start_event(machine, event); return machine__process_itrace_start_event(machine, event);
} }
int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
return machine__process_lost_samples_event(machine, event, sample);
}
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
{ {
return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n", return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
......
...@@ -52,6 +52,11 @@ struct lost_event { ...@@ -52,6 +52,11 @@ struct lost_event {
u64 lost; u64 lost;
}; };
struct lost_samples_event {
struct perf_event_header header;
u64 lost;
};
/* /*
* PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID
*/ */
...@@ -235,6 +240,12 @@ enum auxtrace_error_type { ...@@ -235,6 +240,12 @@ enum auxtrace_error_type {
* total_lost tells exactly how many events the kernel in fact lost, i.e. it is * total_lost tells exactly how many events the kernel in fact lost, i.e. it is
* the sum of all struct lost_event.lost fields reported. * the sum of all struct lost_event.lost fields reported.
* *
* The kernel discards mixed up samples and sends the number in a
* PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored
* in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells
* exactly how many samples the kernel in fact dropped, i.e. it is the sum of
* all struct lost_samples_event.lost fields reported.
*
* The total_period is needed because by default auto-freq is used, so * The total_period is needed because by default auto-freq is used, so
* multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
* the total number of low level events, it is necessary to to sum all struct * the total number of low level events, it is necessary to to sum all struct
...@@ -244,6 +255,7 @@ struct events_stats { ...@@ -244,6 +255,7 @@ struct events_stats {
u64 total_period; u64 total_period;
u64 total_non_filtered_period; u64 total_non_filtered_period;
u64 total_lost; u64 total_lost;
u64 total_lost_samples;
u64 total_invalid_chains; u64 total_invalid_chains;
u32 nr_events[PERF_RECORD_HEADER_MAX]; u32 nr_events[PERF_RECORD_HEADER_MAX];
u32 nr_non_filtered_samples; u32 nr_non_filtered_samples;
...@@ -342,6 +354,7 @@ union perf_event { ...@@ -342,6 +354,7 @@ union perf_event {
struct comm_event comm; struct comm_event comm;
struct fork_event fork; struct fork_event fork;
struct lost_event lost; struct lost_event lost;
struct lost_samples_event lost_samples;
struct read_event read; struct read_event read;
struct throttle_event throttle; struct throttle_event throttle;
struct sample_event sample; struct sample_event sample;
...@@ -390,6 +403,10 @@ int perf_event__process_lost(struct perf_tool *tool, ...@@ -390,6 +403,10 @@ int perf_event__process_lost(struct perf_tool *tool,
union perf_event *event, union perf_event *event,
struct perf_sample *sample, struct perf_sample *sample,
struct machine *machine); struct machine *machine);
int perf_event__process_lost_samples(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
int perf_event__process_aux(struct perf_tool *tool, int perf_event__process_aux(struct perf_tool *tool,
union perf_event *event, union perf_event *event,
struct perf_sample *sample, struct perf_sample *sample,
......
...@@ -482,6 +482,14 @@ int machine__process_lost_event(struct machine *machine __maybe_unused, ...@@ -482,6 +482,14 @@ int machine__process_lost_event(struct machine *machine __maybe_unused,
return 0; return 0;
} }
int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
union perf_event *event, struct perf_sample *sample)
{
dump_printf(": id:%" PRIu64 ": lost samples :%" PRIu64 "\n",
sample->id, event->lost_samples.lost);
return 0;
}
static struct dso* static struct dso*
machine__module_dso(struct machine *machine, struct kmod_path *m, machine__module_dso(struct machine *machine, struct kmod_path *m,
const char *filename) const char *filename)
...@@ -1419,6 +1427,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, ...@@ -1419,6 +1427,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
ret = machine__process_aux_event(machine, event); break; ret = machine__process_aux_event(machine, event); break;
case PERF_RECORD_ITRACE_START: case PERF_RECORD_ITRACE_START:
ret = machine__process_itrace_start_event(machine, event); ret = machine__process_itrace_start_event(machine, event);
case PERF_RECORD_LOST_SAMPLES:
ret = machine__process_lost_samples_event(machine, event, sample); break;
break; break;
default: default:
ret = -1; ret = -1;
......
...@@ -81,6 +81,8 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event ...@@ -81,6 +81,8 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
struct perf_sample *sample); struct perf_sample *sample);
int machine__process_lost_event(struct machine *machine, union perf_event *event, int machine__process_lost_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample); struct perf_sample *sample);
int machine__process_lost_samples_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
int machine__process_aux_event(struct machine *machine, int machine__process_aux_event(struct machine *machine,
union perf_event *event); union perf_event *event);
int machine__process_itrace_start_event(struct machine *machine, int machine__process_itrace_start_event(struct machine *machine,
......
...@@ -325,6 +325,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) ...@@ -325,6 +325,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->exit = process_event_stub; tool->exit = process_event_stub;
if (tool->lost == NULL) if (tool->lost == NULL)
tool->lost = perf_event__process_lost; tool->lost = perf_event__process_lost;
if (tool->lost_samples == NULL)
tool->lost_samples = perf_event__process_lost_samples;
if (tool->aux == NULL) if (tool->aux == NULL)
tool->aux = perf_event__process_aux; tool->aux = perf_event__process_aux;
if (tool->itrace_start == NULL) if (tool->itrace_start == NULL)
...@@ -606,6 +608,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { ...@@ -606,6 +608,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
[PERF_RECORD_SAMPLE] = perf_event__all64_swap, [PERF_RECORD_SAMPLE] = perf_event__all64_swap,
[PERF_RECORD_AUX] = perf_event__aux_swap, [PERF_RECORD_AUX] = perf_event__aux_swap,
[PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap, [PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap,
[PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap,
[PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
[PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
...@@ -1049,6 +1052,10 @@ static int machines__deliver_event(struct machines *machines, ...@@ -1049,6 +1052,10 @@ static int machines__deliver_event(struct machines *machines,
if (tool->lost == perf_event__process_lost) if (tool->lost == perf_event__process_lost)
evlist->stats.total_lost += event->lost.lost; evlist->stats.total_lost += event->lost.lost;
return tool->lost(tool, event, sample, machine); return tool->lost(tool, event, sample, machine);
case PERF_RECORD_LOST_SAMPLES:
if (tool->lost_samples == perf_event__process_lost_samples)
evlist->stats.total_lost_samples += event->lost_samples.lost;
return tool->lost_samples(tool, event, sample, machine);
case PERF_RECORD_READ: case PERF_RECORD_READ:
return tool->read(tool, event, sample, evsel, machine); return tool->read(tool, event, sample, evsel, machine);
case PERF_RECORD_THROTTLE: case PERF_RECORD_THROTTLE:
...@@ -1286,6 +1293,18 @@ static void perf_session__warn_about_errors(const struct perf_session *session) ...@@ -1286,6 +1293,18 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
stats->nr_events[PERF_RECORD_LOST]); stats->nr_events[PERF_RECORD_LOST]);
} }
if (session->tool->lost_samples == perf_event__process_lost_samples) {
double drop_rate;
drop_rate = (double)stats->total_lost_samples /
(double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
if (drop_rate > 0.05) {
ui__warning("Processed %lu samples and lost %3.2f%% samples!\n\n",
stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
drop_rate * 100.0);
}
}
if (stats->nr_unknown_events != 0) { if (stats->nr_unknown_events != 0) {
ui__warning("Found %u unknown events!\n\n" ui__warning("Found %u unknown events!\n\n"
"Is this an older tool processing a perf.data " "Is this an older tool processing a perf.data "
......
...@@ -43,6 +43,7 @@ struct perf_tool { ...@@ -43,6 +43,7 @@ struct perf_tool {
fork, fork,
exit, exit,
lost, lost,
lost_samples,
aux, aux,
itrace_start, itrace_start,
throttle, throttle,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment