Commit ba11ba65 authored by Adrian Hunter's avatar Adrian Hunter Committed by Arnaldo Carvalho de Melo

perf intel-pt: Add mispred-all config option to aid use with autofdo

autofdo incorrectly expects branch flags to include either mispred or
predicted.  In fact mispred = predicted = 0 is valid and means the flags
are not supported, which they aren't by Intel PT.

To make autofdo work, add a config option which will cause Intel PT
decoder to set the mispred flag on all branches.

Below is an example of using Intel PT with autofdo.  The example is
also added to the Intel PT documentation.  It requires autofdo
(https://github.com/google/autofdo) and gcc version 5.  The bubble
sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial)
amended to take the number of elements as a parameter.

	$ gcc-5 -O3 sort.c -o sort_optimized
	$ ./sort_optimized 30000
	Bubble sorting array of 30000 elements
	2254 ms

	$ cat ~/.perfconfig
	[intel-pt]
		mispred-all

	$ perf record -e intel_pt//u ./sort 3000
	Bubble sorting array of 3000 elements
	58 ms
	[ perf record: Woken up 2 times to write data ]
	[ perf record: Captured and wrote 3.939 MB perf.data ]
	$ perf inject -i perf.data -o inj --itrace=i100usle --strip
	$ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1
	$ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
	$ ./sort_autofdo 30000
	Bubble sorting array of 30000 elements
	2155 ms

Note there is currently no advantage to using Intel PT instead of LBR,
but that may change in the future if greater use is made of the data.
Signed-off-by: default avatarAdrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1443186956-18718-26-git-send-email-adrian.hunter@intel.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent f56fb986
...@@ -764,3 +764,32 @@ perf inject also accepts the --itrace option in which case tracing data is ...@@ -764,3 +764,32 @@ perf inject also accepts the --itrace option in which case tracing data is
removed and replaced with the synthesized events. e.g. removed and replaced with the synthesized events. e.g.
perf inject --itrace -i perf.data -o perf.data.new perf inject --itrace -i perf.data -o perf.data.new
Below is an example of using Intel PT with autofdo. It requires autofdo
(https://github.com/google/autofdo) and gcc version 5. The bubble
sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial)
amended to take the number of elements as a parameter.
$ gcc-5 -O3 sort.c -o sort_optimized
$ ./sort_optimized 30000
Bubble sorting array of 30000 elements
2254 ms
$ cat ~/.perfconfig
[intel-pt]
mispred-all
$ perf record -e intel_pt//u ./sort 3000
Bubble sorting array of 3000 elements
58 ms
[ perf record: Woken up 2 times to write data ]
[ perf record: Captured and wrote 3.939 MB perf.data ]
$ perf inject -i perf.data -o inj --itrace=i100usle --strip
$ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1
$ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
$ ./sort_autofdo 30000
Bubble sorting array of 30000 elements
2155 ms
Note there is currently no advantage to using Intel PT instead of LBR, but
that may change in the future if greater use is made of the data.
...@@ -64,6 +64,7 @@ struct intel_pt { ...@@ -64,6 +64,7 @@ struct intel_pt {
bool data_queued; bool data_queued;
bool est_tsc; bool est_tsc;
bool sync_switch; bool sync_switch;
bool mispred_all;
int have_sched_switch; int have_sched_switch;
u32 pmu_type; u32 pmu_type;
u64 kernel_start; u64 kernel_start;
...@@ -943,6 +944,7 @@ static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) ...@@ -943,6 +944,7 @@ static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
/* No support for mispredict */ /* No support for mispredict */
be->flags.mispred = ptq->pt->mispred_all;
if (bs->nr < ptq->pt->synth_opts.last_branch_sz) if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
bs->nr += 1; bs->nr += 1;
...@@ -1967,6 +1969,16 @@ static bool intel_pt_find_switch(struct perf_evlist *evlist) ...@@ -1967,6 +1969,16 @@ static bool intel_pt_find_switch(struct perf_evlist *evlist)
return false; return false;
} }
static int intel_pt_perf_config(const char *var, const char *value, void *data)
{
struct intel_pt *pt = data;
if (!strcmp(var, "intel-pt.mispred-all"))
pt->mispred_all = perf_config_bool(var, value);
return 0;
}
static const char * const intel_pt_info_fmts[] = { static const char * const intel_pt_info_fmts[] = {
[INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
[INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
...@@ -2011,6 +2023,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, ...@@ -2011,6 +2023,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
if (!pt) if (!pt)
return -ENOMEM; return -ENOMEM;
perf_config(intel_pt_perf_config, pt);
err = auxtrace_queues__init(&pt->queues); err = auxtrace_queues__init(&pt->queues);
if (err) if (err)
goto err_free; goto err_free;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment