Commit 9f6f941e authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-4.11-20170117' of...

Merge tag 'perf-core-for-mingo-4.11-20170117' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

 - Account thread wait time (off CPU time) separately: sleep, iowait and
   preempt, based on the prev_state of the last event, show the breakdown
   when using "perf sched timehist --state" (Namhyumg Kim)

Infrastructure changes:

 - Factor out PMU scale conversion code (Andi Kleen)

 - Remove unnecessary feature-dwarf warning (David Carrillo-Cisneros)

 - Add missing member name in OPT_() macros (Soramichi AKIYAMA)

 - Move variables referenced in libperf.a object files from perf's main()
   file, so that other tools can use libperf.a with a different main()
   (Soramichi AKIYAMA)

Documentation changes:

 - Fix 'perf script' man page about --dump-raw-trace option (Michael Petlan)

 - Also allow forcing reading of non-root owned files by root in 'perf
   script' (Yannick Brosseau)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 5b485629 d94386f2
......@@ -133,32 +133,32 @@ struct option {
#define OPT_UINTEGER(s, l, v, h) { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) }
#define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
#define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h) }
#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) }
#define OPT_STRING_OPTARG(s, l, v, a, h, d) \
{ .type = OPTION_STRING, .short_name = (s), .long_name = (l), \
.value = check_vtype(v, const char **), (a), .help = (h), \
.value = check_vtype(v, const char **), .argh =(a), .help = (h), \
.flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) }
#define OPT_STRING_OPTARG_SET(s, l, v, os, a, h, d) \
{ .type = OPTION_STRING, .short_name = (s), .long_name = (l), \
.value = check_vtype(v, const char **), (a), .help = (h), \
.value = check_vtype(v, const char **), .argh = (a), .help = (h), \
.flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d), \
.set = check_vtype(os, bool *)}
#define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY}
#define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h), .flags = PARSE_OPT_NOEMPTY}
#define OPT_DATE(s, l, v, h) \
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }
#define OPT_CALLBACK(s, l, v, a, h, f) \
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f) }
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f) }
#define OPT_CALLBACK_NOOPT(s, l, v, a, h, f) \
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG }
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG }
#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT }
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT }
#define OPT_CALLBACK_DEFAULT_NOOPT(s, l, v, a, h, f, d) \
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l),\
.value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\
.value = (v), .arg = (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\
.flags = PARSE_OPT_LASTARG_DEFAULT | PARSE_OPT_NOARG}
#define OPT_CALLBACK_OPTARG(s, l, v, d, a, h, f) \
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), \
.value = (v), (a), .help = (h), .callback = (f), \
.value = (v), .argh = (a), .help = (h), .callback = (f), \
.flags = PARSE_OPT_OPTARG, .data = (d) }
/* parse_options() will filter out the processed options and leave the
......
......@@ -40,8 +40,7 @@ CFLAGS_builtin-help.o += $(paths)
CFLAGS_builtin-timechart.o += $(paths)
CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" \
-DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" \
-DPREFIX="BUILD_STR($(prefix_SQ))" \
-include $(OUTPUT)PERF-VERSION-FILE
-DPREFIX="BUILD_STR($(prefix_SQ))"
CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_SQ))"
CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))"
CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)"
......
......@@ -143,6 +143,8 @@ OPTIONS for 'perf sched timehist'
stop time is not given (i.e, time string is 'x.y,') then analysis goes
to end of file.
--state::
Show task state when it switched out.
SEE ALSO
--------
......
......@@ -36,7 +36,7 @@ There are several variants of perf script:
'perf script report <script> [args]' to run and display the results
of <script>. <script> is the name displayed in the output of 'perf
trace --list' i.e. the actual script name minus any language
script --list' i.e. the actual script name minus any language
extension. The perf.data output from a previous run of 'perf script
record <script>' is used and should be present for this command to
succeed. [args] refers to the (mainly optional) args expected by
......@@ -76,7 +76,7 @@ OPTIONS
Any command you can specify in a shell.
-D::
--dump-raw-script=::
--dump-raw-trace=::
Display verbose dump of the trace data.
-L::
......
......@@ -291,8 +291,10 @@ else
endif
endif
ifneq ($(feature-dwarf), 1)
ifndef NO_DWARF
msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
NO_DWARF := 1
endif
else
ifneq ($(feature-dwarf_getlocations), 1)
msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.157);
......
......@@ -77,6 +77,22 @@ struct sched_atom {
#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
/* task state bitmask, copied from include/linux/sched.h */
#define TASK_RUNNING 0
#define TASK_INTERRUPTIBLE 1
#define TASK_UNINTERRUPTIBLE 2
#define __TASK_STOPPED 4
#define __TASK_TRACED 8
/* in tsk->exit_state */
#define EXIT_DEAD 16
#define EXIT_ZOMBIE 32
#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD)
/* in tsk->state again */
#define TASK_DEAD 64
#define TASK_WAKEKILL 128
#define TASK_WAKING 256
#define TASK_PARKED 512
enum thread_state {
THREAD_SLEEPING = 0,
THREAD_WAIT_CPU,
......@@ -206,6 +222,7 @@ struct perf_sched {
bool show_cpu_visual;
bool show_wakeups;
bool show_migrations;
bool show_state;
u64 skipped_samples;
const char *time_str;
struct perf_time_interval ptime;
......@@ -216,13 +233,20 @@ struct perf_sched {
struct thread_runtime {
u64 last_time; /* time of previous sched in/out event */
u64 dt_run; /* run time */
u64 dt_wait; /* time between CPU access (off cpu) */
u64 dt_sleep; /* time between CPU access by sleep (off cpu) */
u64 dt_iowait; /* time between CPU access by iowait (off cpu) */
u64 dt_preempt; /* time between CPU access by preempt (off cpu) */
u64 dt_delay; /* time between wakeup and sched-in */
u64 ready_to_run; /* time of wakeup */
struct stats run_stats;
u64 total_run_time;
u64 total_sleep_time;
u64 total_iowait_time;
u64 total_preempt_time;
u64 total_delay_time;
int last_state;
u64 migrations;
};
......@@ -1821,6 +1845,9 @@ static void timehist_header(struct perf_sched *sched)
printf(" %-*s %9s %9s %9s", comm_width,
"task name", "wait time", "sch delay", "run time");
if (sched->show_state)
printf(" %s", "state");
printf("\n");
/*
......@@ -1831,9 +1858,14 @@ static void timehist_header(struct perf_sched *sched)
if (sched->show_cpu_visual)
printf(" %*s ", ncpus, "");
printf(" %-*s %9s %9s %9s\n", comm_width,
printf(" %-*s %9s %9s %9s", comm_width,
"[tid/pid]", "(msec)", "(msec)", "(msec)");
if (sched->show_state)
printf(" %5s", "");
printf("\n");
/*
* separator
*/
......@@ -1846,18 +1878,34 @@ static void timehist_header(struct perf_sched *sched)
graph_dotted_line, graph_dotted_line, graph_dotted_line,
graph_dotted_line);
if (sched->show_state)
printf(" %.5s", graph_dotted_line);
printf("\n");
}
static char task_state_char(struct thread *thread, int state)
{
static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
unsigned bit = state ? ffs(state) : 0;
/* 'I' for idle */
if (thread->tid == 0)
return 'I';
return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
}
static void timehist_print_sample(struct perf_sched *sched,
struct perf_sample *sample,
struct addr_location *al,
struct thread *thread,
u64 t)
u64 t, int state)
{
struct thread_runtime *tr = thread__priv(thread);
u32 max_cpus = sched->max_cpu + 1;
char tstr[64];
u64 wait_time;
timestamp__scnprintf_usec(t, tstr, sizeof(tstr));
printf("%15s [%04d] ", tstr, sample->cpu);
......@@ -1880,10 +1928,15 @@ static void timehist_print_sample(struct perf_sched *sched,
printf(" %-*s ", comm_width, timehist_get_commstr(thread));
print_sched_time(tr->dt_wait, 6);
wait_time = tr->dt_sleep + tr->dt_iowait + tr->dt_preempt;
print_sched_time(wait_time, 6);
print_sched_time(tr->dt_delay, 6);
print_sched_time(tr->dt_run, 6);
if (sched->show_state)
printf(" %5c ", task_state_char(thread, state));
if (sched->show_wakeups)
printf(" %-*s", comm_width, "");
......@@ -1930,8 +1983,11 @@ static void timehist_update_runtime_stats(struct thread_runtime *r,
u64 t, u64 tprev)
{
r->dt_delay = 0;
r->dt_wait = 0;
r->dt_sleep = 0;
r->dt_iowait = 0;
r->dt_preempt = 0;
r->dt_run = 0;
if (tprev) {
r->dt_run = t - tprev;
if (r->ready_to_run) {
......@@ -1943,12 +1999,25 @@ static void timehist_update_runtime_stats(struct thread_runtime *r,
if (r->last_time > tprev)
pr_debug("time travel: last sched out time for task > previous sched_switch event\n");
else if (r->last_time)
r->dt_wait = tprev - r->last_time;
else if (r->last_time) {
u64 dt_wait = tprev - r->last_time;
if (r->last_state == TASK_RUNNING)
r->dt_preempt = dt_wait;
else if (r->last_state == TASK_UNINTERRUPTIBLE)
r->dt_iowait = dt_wait;
else
r->dt_sleep = dt_wait;
}
}
update_stats(&r->run_stats, r->dt_run);
r->total_run_time += r->dt_run;
r->total_delay_time += r->dt_delay;
r->total_sleep_time += r->dt_sleep;
r->total_iowait_time += r->dt_iowait;
r->total_preempt_time += r->dt_preempt;
}
static bool is_idle_sample(struct perf_sample *sample,
......@@ -2373,6 +2442,8 @@ static int timehist_sched_change_event(struct perf_tool *tool,
struct thread_runtime *tr = NULL;
u64 tprev, t = sample->time;
int rc = 0;
int state = perf_evsel__intval(evsel, sample, "prev_state");
if (machine__resolve(machine, &al, sample) < 0) {
pr_err("problem processing %d event. skipping it\n",
......@@ -2447,8 +2518,10 @@ static int timehist_sched_change_event(struct perf_tool *tool,
* time. we only care total run time and run stat.
*/
last_tr->dt_run = 0;
last_tr->dt_wait = 0;
last_tr->dt_delay = 0;
last_tr->dt_sleep = 0;
last_tr->dt_iowait = 0;
last_tr->dt_preempt = 0;
if (itr->cursor.nr)
callchain_append(&itr->callchain, &itr->cursor, t - tprev);
......@@ -2458,7 +2531,7 @@ static int timehist_sched_change_event(struct perf_tool *tool,
}
if (!sched->summary_only)
timehist_print_sample(sched, sample, &al, thread, t);
timehist_print_sample(sched, sample, &al, thread, t, state);
out:
if (sched->hist_time.start == 0 && t >= ptime->start)
......@@ -2470,6 +2543,9 @@ static int timehist_sched_change_event(struct perf_tool *tool,
/* time of this sched_switch event becomes last time task seen */
tr->last_time = sample->time;
/* last state is used to determine where to account wait time */
tr->last_state = state;
/* sched out event for task so reset ready to run time */
tr->ready_to_run = 0;
}
......@@ -2526,7 +2602,26 @@ static void print_thread_runtime(struct thread *t,
printf("\n");
}
static void print_thread_waittime(struct thread *t,
struct thread_runtime *r)
{
printf("%*s %5d %9" PRIu64 " ",
comm_width, timehist_get_commstr(t), t->ppid,
(u64) r->run_stats.n);
print_sched_time(r->total_run_time, 8);
print_sched_time(r->total_sleep_time, 6);
printf(" ");
print_sched_time(r->total_iowait_time, 6);
printf(" ");
print_sched_time(r->total_preempt_time, 6);
printf(" ");
print_sched_time(r->total_delay_time, 6);
printf("\n");
}
struct total_run_stats {
struct perf_sched *sched;
u64 sched_count;
u64 task_count;
u64 total_run_time;
......@@ -2545,6 +2640,10 @@ static int __show_thread_runtime(struct thread *t, void *priv)
stats->task_count++;
stats->sched_count += r->run_stats.n;
stats->total_run_time += r->total_run_time;
if (stats->sched->show_state)
print_thread_waittime(t, r);
else
print_thread_runtime(t, r);
}
......@@ -2633,18 +2732,24 @@ static void timehist_print_summary(struct perf_sched *sched,
u64 hist_time = sched->hist_time.end - sched->hist_time.start;
memset(&totals, 0, sizeof(totals));
totals.sched = sched;
if (sched->idle_hist) {
printf("\nIdle-time summary\n");
printf("%*s parent sched-out ", comm_width, "comm");
printf(" idle-time min-idle avg-idle max-idle stddev migrations\n");
} else if (sched->show_state) {
printf("\nWait-time summary\n");
printf("%*s parent sched-in ", comm_width, "comm");
printf(" run-time sleep iowait preempt delay\n");
} else {
printf("\nRuntime summary\n");
printf("%*s parent sched-in ", comm_width, "comm");
printf(" run-time min-run avg-run max-run stddev migrations\n");
}
printf("%*s (count) ", comm_width, "");
printf(" (msec) (msec) (msec) (msec) %%\n");
printf(" (msec) (msec) (msec) (msec) %s\n",
sched->show_state ? "(msec)" : "%");
printf("%.117s\n", graph_dotted_line);
machine__for_each_thread(m, show_thread_runtime, &totals);
......@@ -3240,6 +3345,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"),
OPT_STRING(0, "time", &sched.time_str, "str",
"Time span for analysis (start,stop)"),
OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"),
OPT_PARENT(sched_options)
};
......
......@@ -2180,7 +2180,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
"Show the mmap events"),
OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events,
"Show context switch events (if recorded)"),
OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
OPT_BOOLEAN(0, "ns", &nanosecs,
"Use 9 decimal places when displaying time"),
OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
......@@ -2212,6 +2212,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
PARSE_OPT_STOP_AT_NON_OPTION);
file.path = input_name;
file.force = symbol_conf.force;
if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {
rec_script_path = get_script_path(argv[1], RECORD_SUFFIX);
......
......@@ -29,7 +29,6 @@ const char perf_usage_string[] =
const char perf_more_info_string[] =
"See 'perf help COMMAND' for more information on a specific command.";
int use_browser = -1;
static int use_pager = -1;
const char *input_name;
......@@ -330,8 +329,6 @@ static int handle_alias(int *argcp, const char ***argv)
return ret;
}
const char perf_version_string[] = PERF_VERSION;
#define RUN_SETUP (1<<0)
#define USE_PAGER (1<<1)
......
......@@ -7,6 +7,7 @@
pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
void *perf_gtk_handle;
int use_browser = -1;
#ifdef HAVE_GTK2_SUPPORT
static int setup_gtk_browser(void)
......
......@@ -162,6 +162,7 @@ CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET
CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_parse-events.o += -Wno-redundant-decls
CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE
$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
$(call rule_mkdir)
......
......@@ -41,6 +41,8 @@ static const u64 __perf_magic2_sw = 0x50455246494c4532ULL;
#define PERF_MAGIC __perf_magic2
const char perf_version_string[] = PERF_VERSION;
struct perf_file_attr {
struct perf_event_attr attr;
struct perf_file_section ids;
......
......@@ -94,32 +94,10 @@ static int pmu_format(const char *name, struct list_head *format)
return 0;
}
static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *name)
static int convert_scale(const char *scale, char **end, double *sval)
{
struct stat st;
ssize_t sret;
char scale[128];
int fd, ret = -1;
char path[PATH_MAX];
char *lc;
snprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
fd = open(path, O_RDONLY);
if (fd == -1)
return -1;
if (fstat(fd, &st) < 0)
goto error;
sret = read(fd, scale, sizeof(scale)-1);
if (sret < 0)
goto error;
if (scale[sret - 1] == '\n')
scale[sret - 1] = '\0';
else
scale[sret] = '\0';
int ret = 0;
/*
* save current locale
......@@ -134,7 +112,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
lc = strdup(lc);
if (!lc) {
ret = -ENOMEM;
goto error;
goto out;
}
/*
......@@ -144,14 +122,42 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
*/
setlocale(LC_NUMERIC, "C");
alias->scale = strtod(scale, NULL);
*sval = strtod(scale, end);
out:
/* restore locale */
setlocale(LC_NUMERIC, lc);
free(lc);
return ret;
}
static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *name)
{
struct stat st;
ssize_t sret;
char scale[128];
int fd, ret = -1;
char path[PATH_MAX];
snprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
fd = open(path, O_RDONLY);
if (fd == -1)
return -1;
if (fstat(fd, &st) < 0)
goto error;
sret = read(fd, scale, sizeof(scale)-1);
if (sret < 0)
goto error;
if (scale[sret - 1] == '\n')
scale[sret - 1] = '\0';
else
scale[sret] = '\0';
ret = 0;
ret = convert_scale(scale, NULL, &alias->scale);
error:
close(fd);
return ret;
......
......@@ -1191,7 +1191,7 @@ static int
u64 sample_type = evsel->attr.sample_type;
u64 read_format = evsel->attr.read_format;
/* Standard sample delievery. */
/* Standard sample delivery. */
if (!(sample_type & PERF_SAMPLE_READ))
return tool->sample(tool, event, sample, evsel, machine);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment