Commit 2f80dd44 authored by Josef Bacik's avatar Josef Bacik Committed by Arnaldo Carvalho de Melo

perf sched: Add option to merge like comms to lat output

Sometimes when debugging large multi-threaded applications it is helpful
to collate all of the latency numbers into one bulk record to get an
idea of what is going on.

This patch does this by merging any entries that belong to the same comm
into one entry and then spits out those totals.

I've also slightly changed the output so you can see how many threads
were merged in the processing.  Here is the new default output format

 -----------------------------------------------------------------------------------------------------------
  Task                 | Runtime ms  | Switches | Average delay ms | Maximum delay ms | Maximum delay at    |
 -----------------------------------------------------------------------------------------------------------
  chrome:(23)          |  740.878 ms |     2612 | avg:    0.022 ms | max:    0.845 ms | max at: 7935.254223 s
  pulseaudio:1523      |   94.440 ms |      597 | avg:    0.027 ms | max:    0.110 ms | max at: 7934.668372 s
  threaded-ml:6042     |   72.554 ms |      386 | avg:    0.035 ms | max:    1.186 ms | max at: 7935.330911 s
  Chrome_IOThread:3832 |   52.388 ms |      456 | avg:    0.021 ms | max:    1.365 ms | max at: 7935.330602 s
  Chrome_ChildIOT:(7)  |   50.694 ms |      743 | avg:    0.021 ms | max:    1.448 ms | max at: 7935.256659 s
  Compositor:5510      |   30.012 ms |      192 | avg:    0.019 ms | max:    0.131 ms | max at: 7936.636815 s
  plugin_audio_th:6043 |   24.828 ms |      314 | avg:    0.018 ms | max:    0.143 ms | max at: 7936.205994 s
  CompositorTileW:(2)  |   14.099 ms |       45 | avg:    0.022 ms | max:    0.153 ms | max at: 7937.521800 s

the (#) after the task is the number of tasks merged, and then if there were
no tasks merged it just shows the pid.  Here is the same trace file with the -p
option to print the per-pid latency numbers

 -----------------------------------------------------------------------------------------------------------
  Task                 | Runtime ms  | Switches | Average delay ms | Maximum delay ms | Maximum delay at    |
 -----------------------------------------------------------------------------------------------------------
  chrome:5500          |  386.872 ms |      387 | avg:    0.023 ms | max:    0.241 ms | max at: 7936.001694 s
  pulseaudio:1523      |   94.440 ms |      597 | avg:    0.027 ms | max:    0.110 ms | max at: 7934.668372 s
  threaded-ml:6042     |   72.554 ms |      386 | avg:    0.035 ms | max:    1.186 ms | max at: 7935.330911 s
  chrome:10226         |   69.710 ms |      251 | avg:    0.023 ms | max:    0.764 ms | max at: 7935.992305 s
  chrome:4267          |   64.551 ms |      418 | avg:    0.021 ms | max:    0.294 ms | max at: 7937.862427 s
  chrome:4827          |   62.268 ms |       54 | avg:    0.029 ms | max:    0.666 ms | max at: 7935.992813 s
  Chrome_IOThread:3832 |   52.388 ms |      456 | avg:    0.021 ms | max:    1.365 ms | max at: 7935.330602 s
  chrome:3776          |   46.150 ms |      349 | avg:    0.023 ms | max:    0.845 ms | max at: 7935.254223 s
Signed-off-by: default avatarJosef Bacik <jbacik@fb.com>
Acked-by: default avatarIngo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: kernel-team@fb.com
Link: http://lkml.kernel.org/r/1432300720-30478-1-git-send-email-jbacik@fb.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent e8b7ea43
...@@ -95,6 +95,7 @@ struct work_atoms { ...@@ -95,6 +95,7 @@ struct work_atoms {
u64 total_lat; u64 total_lat;
u64 nb_atoms; u64 nb_atoms;
u64 total_runtime; u64 total_runtime;
int num_merged;
}; };
typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
...@@ -168,9 +169,10 @@ struct perf_sched { ...@@ -168,9 +169,10 @@ struct perf_sched {
u64 all_runtime; u64 all_runtime;
u64 all_count; u64 all_count;
u64 cpu_last_switched[MAX_CPUS]; u64 cpu_last_switched[MAX_CPUS];
struct rb_root atom_root, sorted_atom_root; struct rb_root atom_root, sorted_atom_root, merged_atom_root;
struct list_head sort_list, cmp_pid; struct list_head sort_list, cmp_pid;
bool force; bool force;
bool skip_merge;
}; };
static u64 get_nsecs(void) static u64 get_nsecs(void)
...@@ -1182,6 +1184,9 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_ ...@@ -1182,6 +1184,9 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
sched->all_runtime += work_list->total_runtime; sched->all_runtime += work_list->total_runtime;
sched->all_count += work_list->nb_atoms; sched->all_count += work_list->nb_atoms;
if (work_list->num_merged > 1)
ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread), work_list->num_merged);
else
ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid); ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
for (i = 0; i < 24 - ret; i++) for (i = 0; i < 24 - ret; i++)
...@@ -1302,17 +1307,22 @@ static int sort_dimension__add(const char *tok, struct list_head *list) ...@@ -1302,17 +1307,22 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
static void perf_sched__sort_lat(struct perf_sched *sched) static void perf_sched__sort_lat(struct perf_sched *sched)
{ {
struct rb_node *node; struct rb_node *node;
struct rb_root *root = &sched->atom_root;
again:
for (;;) { for (;;) {
struct work_atoms *data; struct work_atoms *data;
node = rb_first(&sched->atom_root); node = rb_first(root);
if (!node) if (!node)
break; break;
rb_erase(node, &sched->atom_root); rb_erase(node, root);
data = rb_entry(node, struct work_atoms, node); data = rb_entry(node, struct work_atoms, node);
__thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list);
} }
if (root == &sched->atom_root) {
root = &sched->merged_atom_root;
goto again;
}
} }
static int process_sched_wakeup_event(struct perf_tool *tool, static int process_sched_wakeup_event(struct perf_tool *tool,
...@@ -1572,6 +1582,59 @@ static void print_bad_events(struct perf_sched *sched) ...@@ -1572,6 +1582,59 @@ static void print_bad_events(struct perf_sched *sched)
} }
} }
static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data)
{
struct rb_node **new = &(root->rb_node), *parent = NULL;
struct work_atoms *this;
const char *comm = thread__comm_str(data->thread), *this_comm;
while (*new) {
int cmp;
this = container_of(*new, struct work_atoms, node);
parent = *new;
this_comm = thread__comm_str(this->thread);
cmp = strcmp(comm, this_comm);
if (cmp > 0) {
new = &((*new)->rb_left);
} else if (cmp < 0) {
new = &((*new)->rb_right);
} else {
this->num_merged++;
this->total_runtime += data->total_runtime;
this->nb_atoms += data->nb_atoms;
this->total_lat += data->total_lat;
list_splice(&data->work_list, &this->work_list);
if (this->max_lat < data->max_lat) {
this->max_lat = data->max_lat;
this->max_lat_at = data->max_lat_at;
}
zfree(&data);
return;
}
}
data->num_merged++;
rb_link_node(&data->node, parent, new);
rb_insert_color(&data->node, root);
}
static void perf_sched__merge_lat(struct perf_sched *sched)
{
struct work_atoms *data;
struct rb_node *node;
if (sched->skip_merge)
return;
while ((node = rb_first(&sched->atom_root))) {
rb_erase(node, &sched->atom_root);
data = rb_entry(node, struct work_atoms, node);
__merge_work_atoms(&sched->merged_atom_root, data);
}
}
static int perf_sched__lat(struct perf_sched *sched) static int perf_sched__lat(struct perf_sched *sched)
{ {
struct rb_node *next; struct rb_node *next;
...@@ -1581,6 +1644,7 @@ static int perf_sched__lat(struct perf_sched *sched) ...@@ -1581,6 +1644,7 @@ static int perf_sched__lat(struct perf_sched *sched)
if (perf_sched__read_events(sched)) if (perf_sched__read_events(sched))
return -1; return -1;
perf_sched__merge_lat(sched);
perf_sched__sort_lat(sched); perf_sched__sort_lat(sched);
printf("\n -----------------------------------------------------------------------------------------------------------------\n"); printf("\n -----------------------------------------------------------------------------------------------------------------\n");
...@@ -1732,6 +1796,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -1732,6 +1796,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
.profile_cpu = -1, .profile_cpu = -1,
.next_shortname1 = 'A', .next_shortname1 = 'A',
.next_shortname2 = '0', .next_shortname2 = '0',
.skip_merge = 0,
}; };
const struct option latency_options[] = { const struct option latency_options[] = {
OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
...@@ -1742,6 +1807,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -1742,6 +1807,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
"CPU to profile on"), "CPU to profile on"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"), "dump raw trace in ASCII"),
OPT_BOOLEAN('p', "pids", &sched.skip_merge,
"latency stats per pid instead of per comm"),
OPT_END() OPT_END()
}; };
const struct option replay_options[] = { const struct option replay_options[] = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment