Commit 770e9612 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-4.14-20170901' of...

Merge tag 'perf-core-for-mingo-4.14-20170901' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

- Support syscall name glob matching in 'perf trace' (Arnaldo Carvalho de Melo)

  e.g.:

   # perf trace -e pkey_*
   32.784 (0.006 ms): pkey/16018 pkey_alloc(init_val: DISABLE_WRITE) = -1 EINVAL Invalid argument
   32.795 (0.004 ms): pkey/16018 pkey_mprotect(start: 0x7f380d0a6000, len: 4096, prot: READ|WRITE, pkey: -1) = 0
   32.801 (0.002 ms): pkey/16018 pkey_free(pkey: -1                ) = -1 EINVAL Invalid argument
   ^C#

- Do not auto merge counts for explicitely specified events in
  'perf stat' (Arnaldo Carvalho de Melo)

- Fix syntax in documentation of .perfconfig intel-pt option (Jack Henschel)

- Calculate the average cycles of iterations for loops detected by the
  branch history support in 'perf report' (Jin Yao)

- Support PERF_SAMPLE_PHYS_ADDR as a sort key "phys_daddr" in the 'script', 'mem',
  'top' and 'report'. Also add a test entry for it in 'perf test' (Kan Liang)

- Fix 'Object code reading' 'perf test' entry in PowerPC (Ravi Bangoria)

- Remove some duplicate Power9 duplicate vendor events (described in JSON
  files) (Sukadev Bhattiprolu)

- Add help entry in the TUI annotate browser about cycling thru hottest
  instructions with TAB/shift+TAB (Arnaldo Carvalho de Melo)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 24e700e2 eba9fac0
...@@ -139,8 +139,9 @@ enum perf_event_sample_format { ...@@ -139,8 +139,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_IDENTIFIER = 1U << 16, PERF_SAMPLE_IDENTIFIER = 1U << 16,
PERF_SAMPLE_TRANSACTION = 1U << 17, PERF_SAMPLE_TRANSACTION = 1U << 17,
PERF_SAMPLE_REGS_INTR = 1U << 18, PERF_SAMPLE_REGS_INTR = 1U << 18,
PERF_SAMPLE_PHYS_ADDR = 1U << 19,
PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */ PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */
}; };
/* /*
...@@ -814,6 +815,7 @@ enum perf_event_type { ...@@ -814,6 +815,7 @@ enum perf_event_type {
* { u64 transaction; } && PERF_SAMPLE_TRANSACTION * { u64 transaction; } && PERF_SAMPLE_TRANSACTION
* { u64 abi; # enum perf_sample_regs_abi * { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
* { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
* }; * };
*/ */
PERF_RECORD_SAMPLE = 9, PERF_RECORD_SAMPLE = 9,
......
...@@ -873,7 +873,7 @@ amended to take the number of elements as a parameter. ...@@ -873,7 +873,7 @@ amended to take the number of elements as a parameter.
$ cat ~/.perfconfig $ cat ~/.perfconfig
[intel-pt] [intel-pt]
mispred-all mispred-all = on
$ perf record -e intel_pt//u ./sort 3000 $ perf record -e intel_pt//u ./sort 3000
Bubble sorting array of 3000 elements Bubble sorting array of 3000 elements
......
...@@ -59,6 +59,10 @@ OPTIONS ...@@ -59,6 +59,10 @@ OPTIONS
--ldload:: --ldload::
Specify desired latency for loads event. Specify desired latency for loads event.
-p::
--phys-data::
Record/Report sample physical addresses
SEE ALSO SEE ALSO
-------- --------
linkperf:perf-record[1], linkperf:perf-report[1] linkperf:perf-record[1], linkperf:perf-report[1]
...@@ -249,7 +249,10 @@ OPTIONS ...@@ -249,7 +249,10 @@ OPTIONS
-d:: -d::
--data:: --data::
Record the sample addresses. Record the sample virtual addresses.
--phys-data::
Record the sample physical addresses.
-T:: -T::
--timestamp:: --timestamp::
......
...@@ -137,6 +137,7 @@ OPTIONS ...@@ -137,6 +137,7 @@ OPTIONS
- mem: type of memory access for the data at the time of the sample - mem: type of memory access for the data at the time of the sample
- snoop: type of snoop (if any) for the data at the time of the sample - snoop: type of snoop (if any) for the data at the time of the sample
- dcacheline: the cacheline the data address is on at the time of the sample - dcacheline: the cacheline the data address is on at the time of the sample
- phys_daddr: physical address of data being executed on at the time of sample
And the default sort keys are changed to local_weight, mem, sym, dso, And the default sort keys are changed to local_weight, mem, sym, dso,
symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
......
...@@ -117,7 +117,7 @@ OPTIONS ...@@ -117,7 +117,7 @@ OPTIONS
Comma separated list of fields to print. Options are: Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff,
callindent, insn, insnlen, synth. callindent, insn, insnlen, synth, phys_addr.
Field list can be prepended with the type, trace, sw or hw, Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies. to indicate to which event type the field list applies.
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
......
...@@ -37,7 +37,7 @@ OPTIONS ...@@ -37,7 +37,7 @@ OPTIONS
--expr:: --expr::
--event:: --event::
List of syscalls and other perf events (tracepoints, HW cache events, List of syscalls and other perf events (tracepoints, HW cache events,
etc) to show. etc) to show. Globbing is supported, e.g.: "epoll_*", "*msg*", etc.
See 'perf list' for a complete list of events. See 'perf list' for a complete list of events.
Prefixing with ! shows all syscalls but the ones specified. You may Prefixing with ! shows all syscalls but the ones specified. You may
need to escape it. need to escape it.
......
...@@ -23,6 +23,7 @@ struct perf_mem { ...@@ -23,6 +23,7 @@ struct perf_mem {
bool hide_unresolved; bool hide_unresolved;
bool dump_raw; bool dump_raw;
bool force; bool force;
bool phys_addr;
int operation; int operation;
const char *cpu_list; const char *cpu_list;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
...@@ -101,6 +102,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) ...@@ -101,6 +102,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
rec_argv[i++] = "-d"; rec_argv[i++] = "-d";
if (mem->phys_addr)
rec_argv[i++] = "--phys-data";
for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
if (!perf_mem_events[j].record) if (!perf_mem_events[j].record)
continue; continue;
...@@ -161,30 +165,60 @@ dump_raw_samples(struct perf_tool *tool, ...@@ -161,30 +165,60 @@ dump_raw_samples(struct perf_tool *tool,
if (al.map != NULL) if (al.map != NULL)
al.map->dso->hit = 1; al.map->dso->hit = 1;
if (symbol_conf.field_sep) { if (mem->phys_addr) {
fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64 if (symbol_conf.field_sep) {
"%s0x%"PRIx64"%s%s:%s\n"; fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s0x%016"PRIx64
"%s%"PRIu64"%s0x%"PRIx64"%s%s:%s\n";
} else {
fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
"%s0x%016"PRIx64"%s%5"PRIu64"%s0x%06"PRIx64
"%s%s:%s\n";
symbol_conf.field_sep = " ";
}
printf(fmt,
sample->pid,
symbol_conf.field_sep,
sample->tid,
symbol_conf.field_sep,
sample->ip,
symbol_conf.field_sep,
sample->addr,
symbol_conf.field_sep,
sample->phys_addr,
symbol_conf.field_sep,
sample->weight,
symbol_conf.field_sep,
sample->data_src,
symbol_conf.field_sep,
al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
al.sym ? al.sym->name : "???");
} else { } else {
fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64 if (symbol_conf.field_sep) {
"%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n"; fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
symbol_conf.field_sep = " "; "%s0x%"PRIx64"%s%s:%s\n";
} } else {
fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
"%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
symbol_conf.field_sep = " ";
}
printf(fmt, printf(fmt,
sample->pid, sample->pid,
symbol_conf.field_sep, symbol_conf.field_sep,
sample->tid, sample->tid,
symbol_conf.field_sep, symbol_conf.field_sep,
sample->ip, sample->ip,
symbol_conf.field_sep, symbol_conf.field_sep,
sample->addr, sample->addr,
symbol_conf.field_sep, symbol_conf.field_sep,
sample->weight, sample->weight,
symbol_conf.field_sep, symbol_conf.field_sep,
sample->data_src, sample->data_src,
symbol_conf.field_sep, symbol_conf.field_sep,
al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
al.sym ? al.sym->name : "???"); al.sym ? al.sym->name : "???");
}
out_put: out_put:
addr_location__put(&al); addr_location__put(&al);
return 0; return 0;
...@@ -224,7 +258,10 @@ static int report_raw_events(struct perf_mem *mem) ...@@ -224,7 +258,10 @@ static int report_raw_events(struct perf_mem *mem)
if (ret < 0) if (ret < 0)
goto out_delete; goto out_delete;
printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); if (mem->phys_addr)
printf("# PID, TID, IP, ADDR, PHYS ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
else
printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
ret = perf_session__process_events(session); ret = perf_session__process_events(session);
...@@ -254,9 +291,16 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) ...@@ -254,9 +291,16 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
* there is no weight (cost) associated with stores, so don't print * there is no weight (cost) associated with stores, so don't print
* the column * the column
*/ */
if (!(mem->operation & MEM_OPERATION_LOAD)) if (!(mem->operation & MEM_OPERATION_LOAD)) {
rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," if (mem->phys_addr)
"dso_daddr,tlb,locked"; rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
"dso_daddr,tlb,locked,phys_daddr";
else
rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
"dso_daddr,tlb,locked";
} else if (mem->phys_addr)
rep_argv[i++] = "--sort=local_weight,mem,sym,dso,symbol_daddr,"
"dso_daddr,snoop,tlb,locked,phys_daddr";
for (j = 1; j < argc; j++, i++) for (j = 1; j < argc; j++, i++)
rep_argv[i] = argv[j]; rep_argv[i] = argv[j];
...@@ -373,6 +417,7 @@ int cmd_mem(int argc, const char **argv) ...@@ -373,6 +417,7 @@ int cmd_mem(int argc, const char **argv)
"separator for columns, no spaces will be added" "separator for columns, no spaces will be added"
" between columns '.' is reserved."), " between columns '.' is reserved."),
OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"), OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"),
OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"),
OPT_END() OPT_END()
}; };
const char *const mem_subcommands[] = { "record", "report", NULL }; const char *const mem_subcommands[] = { "record", "report", NULL };
......
...@@ -1604,6 +1604,8 @@ static struct option __record_options[] = { ...@@ -1604,6 +1604,8 @@ static struct option __record_options[] = {
OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
"per thread counts"), "per thread counts"),
OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
"Record the sample physical addresses"),
OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
&record.opts.sample_time_set, &record.opts.sample_time_set,
......
...@@ -87,6 +87,7 @@ enum perf_output_field { ...@@ -87,6 +87,7 @@ enum perf_output_field {
PERF_OUTPUT_BRSTACKINSN = 1U << 23, PERF_OUTPUT_BRSTACKINSN = 1U << 23,
PERF_OUTPUT_BRSTACKOFF = 1U << 24, PERF_OUTPUT_BRSTACKOFF = 1U << 24,
PERF_OUTPUT_SYNTH = 1U << 25, PERF_OUTPUT_SYNTH = 1U << 25,
PERF_OUTPUT_PHYS_ADDR = 1U << 26,
}; };
struct output_option { struct output_option {
...@@ -119,6 +120,7 @@ struct output_option { ...@@ -119,6 +120,7 @@ struct output_option {
{.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN}, {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF}, {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
{.str = "synth", .field = PERF_OUTPUT_SYNTH}, {.str = "synth", .field = PERF_OUTPUT_SYNTH},
{.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
}; };
enum { enum {
...@@ -175,7 +177,8 @@ static struct { ...@@ -175,7 +177,8 @@ static struct {
PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR |
PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT, PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT |
PERF_OUTPUT_PHYS_ADDR,
.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
}, },
...@@ -382,6 +385,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, ...@@ -382,6 +385,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
PERF_OUTPUT_IREGS)) PERF_OUTPUT_IREGS))
return -EINVAL; return -EINVAL;
if (PRINT_FIELD(PHYS_ADDR) &&
perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR",
PERF_OUTPUT_PHYS_ADDR))
return -EINVAL;
return 0; return 0;
} }
...@@ -1446,6 +1454,9 @@ static void process_event(struct perf_script *script, ...@@ -1446,6 +1454,9 @@ static void process_event(struct perf_script *script,
if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
print_sample_bpf_output(sample); print_sample_bpf_output(sample);
print_insn(sample, attr, thread, machine); print_insn(sample, attr, thread, machine);
if (PRINT_FIELD(PHYS_ADDR))
printf("%16" PRIx64, sample->phys_addr);
printf("\n"); printf("\n");
} }
...@@ -2729,7 +2740,7 @@ int cmd_script(int argc, const char **argv) ...@@ -2729,7 +2740,7 @@ int cmd_script(int argc, const char **argv)
"Valid types: hw,sw,trace,raw,synth. " "Valid types: hw,sw,trace,raw,synth. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
"addr,symoff,period,iregs,brstack,brstacksym,flags," "addr,symoff,period,iregs,brstack,brstacksym,flags,"
"bpf-output,callindent,insn,insnlen,brstackinsn,synth", "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr",
parse_output_fields), parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide, OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"), "system-wide collection from all CPUs"),
......
...@@ -1257,7 +1257,7 @@ static bool collect_data(struct perf_evsel *counter, ...@@ -1257,7 +1257,7 @@ static bool collect_data(struct perf_evsel *counter,
if (counter->merged_stat) if (counter->merged_stat)
return false; return false;
cb(counter, data, true); cb(counter, data, true);
if (!no_merge) if (!no_merge && counter->auto_merge_stats)
collect_all_aliases(counter, cb, data); collect_all_aliases(counter, cb, data);
return true; return true;
} }
......
...@@ -1261,6 +1261,7 @@ static int trace__read_syscall_info(struct trace *trace, int id) ...@@ -1261,6 +1261,7 @@ static int trace__read_syscall_info(struct trace *trace, int id)
static int trace__validate_ev_qualifier(struct trace *trace) static int trace__validate_ev_qualifier(struct trace *trace)
{ {
int err = 0, i; int err = 0, i;
size_t nr_allocated;
struct str_node *pos; struct str_node *pos;
trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
...@@ -1274,13 +1275,18 @@ static int trace__validate_ev_qualifier(struct trace *trace) ...@@ -1274,13 +1275,18 @@ static int trace__validate_ev_qualifier(struct trace *trace)
goto out; goto out;
} }
nr_allocated = trace->ev_qualifier_ids.nr;
i = 0; i = 0;
strlist__for_each_entry(pos, trace->ev_qualifier) { strlist__for_each_entry(pos, trace->ev_qualifier) {
const char *sc = pos->s; const char *sc = pos->s;
int id = syscalltbl__id(trace->sctbl, sc); int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
if (id < 0) { if (id < 0) {
id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
if (id >= 0)
goto matches;
if (err == 0) { if (err == 0) {
fputs("Error:\tInvalid syscall ", trace->output); fputs("Error:\tInvalid syscall ", trace->output);
err = -EINVAL; err = -EINVAL;
...@@ -1290,13 +1296,37 @@ static int trace__validate_ev_qualifier(struct trace *trace) ...@@ -1290,13 +1296,37 @@ static int trace__validate_ev_qualifier(struct trace *trace)
fputs(sc, trace->output); fputs(sc, trace->output);
} }
matches:
trace->ev_qualifier_ids.entries[i++] = id; trace->ev_qualifier_ids.entries[i++] = id;
if (match_next == -1)
continue;
while (1) {
id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
if (id < 0)
break;
if (nr_allocated == trace->ev_qualifier_ids.nr) {
void *entries;
nr_allocated += 8;
entries = realloc(trace->ev_qualifier_ids.entries,
nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
if (entries == NULL) {
err = -ENOMEM;
fputs("\nError:\t Not enough memory for parsing\n", trace->output);
goto out_free;
}
trace->ev_qualifier_ids.entries = entries;
}
trace->ev_qualifier_ids.nr++;
trace->ev_qualifier_ids.entries[i++] = id;
}
} }
if (err < 0) { if (err < 0) {
fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
"\nHint:\tand: 'man syscalls'\n", trace->output); "\nHint:\tand: 'man syscalls'\n", trace->output);
out_free:
zfree(&trace->ev_qualifier_ids.entries); zfree(&trace->ev_qualifier_ids.entries);
trace->ev_qualifier_ids.nr = 0; trace->ev_qualifier_ids.nr = 0;
} }
...@@ -2814,7 +2844,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str, ...@@ -2814,7 +2844,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
struct trace *trace = (struct trace *)opt->value; struct trace *trace = (struct trace *)opt->value;
const char *s = str; const char *s = str;
char *sep = NULL, *lists[2] = { NULL, NULL, }; char *sep = NULL, *lists[2] = { NULL, NULL, };
int len = strlen(str) + 1, err = -1, list; int len = strlen(str) + 1, err = -1, list, idx;
char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
char group_name[PATH_MAX]; char group_name[PATH_MAX];
...@@ -2831,7 +2861,8 @@ static int trace__parse_events_option(const struct option *opt, const char *str, ...@@ -2831,7 +2861,8 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
*sep = '\0'; *sep = '\0';
list = 0; list = 0;
if (syscalltbl__id(trace->sctbl, s) >= 0) { if (syscalltbl__id(trace->sctbl, s) >= 0 ||
syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
list = 1; list = 1;
} else { } else {
path__join(group_name, sizeof(group_name), strace_groups_dir, s); path__join(group_name, sizeof(group_name), strace_groups_dir, s);
......
...@@ -43,6 +43,7 @@ struct record_opts { ...@@ -43,6 +43,7 @@ struct record_opts {
bool no_samples; bool no_samples;
bool raw_samples; bool raw_samples;
bool sample_address; bool sample_address;
bool sample_phys_addr;
bool sample_weight; bool sample_weight;
bool sample_time; bool sample_time;
bool sample_time_set; bool sample_time_set;
......
...@@ -79,11 +79,6 @@ ...@@ -79,11 +79,6 @@
"EventName": "PM_LD_MISS_L1", "EventName": "PM_LD_MISS_L1",
"BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load." "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
}, },
{,
"EventCode": "0x400F0",
"EventName": "PM_LD_MISS_L1",
"BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
},
{, {,
"EventCode": "0x2E01A", "EventCode": "0x2E01A",
"EventName": "PM_CMPLU_STALL_LSU_FLUSH_NEXT", "EventName": "PM_CMPLU_STALL_LSU_FLUSH_NEXT",
...@@ -374,4 +369,4 @@ ...@@ -374,4 +369,4 @@
"EventName": "PM_IPTEG_FROM_L31_ECO_MOD", "EventName": "PM_IPTEG_FROM_L31_ECO_MOD",
"BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request" "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request"
} }
] ]
\ No newline at end of file
...@@ -604,11 +604,6 @@ ...@@ -604,11 +604,6 @@
"EventName": "PM_L2_RTY_LD", "EventName": "PM_L2_RTY_LD",
"BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)" "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)"
}, },
{,
"EventCode": "0x3689E",
"EventName": "PM_L2_RTY_LD",
"BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)"
},
{, {,
"EventCode": "0xE08C", "EventCode": "0xE08C",
"EventName": "PM_LSU0_ERAT_HIT", "EventName": "PM_LSU0_ERAT_HIT",
...@@ -714,11 +709,6 @@ ...@@ -714,11 +709,6 @@
"EventName": "PM_L3_RD0_BUSY", "EventName": "PM_L3_RD0_BUSY",
"BriefDescription": "Lifetime, sample of RD machine 0 valid" "BriefDescription": "Lifetime, sample of RD machine 0 valid"
}, },
{,
"EventCode": "0x468B4",
"EventName": "PM_L3_RD0_BUSY",
"BriefDescription": "Lifetime, sample of RD machine 0 valid"
},
{, {,
"EventCode": "0x46080", "EventCode": "0x46080",
"EventName": "PM_L2_DISP_ALL_L2MISS", "EventName": "PM_L2_DISP_ALL_L2MISS",
...@@ -849,21 +839,11 @@ ...@@ -849,21 +839,11 @@
"EventName": "PM_RC0_BUSY", "EventName": "PM_RC0_BUSY",
"BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)" "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)"
}, },
{,
"EventCode": "0x2608C",
"EventName": "PM_RC0_BUSY",
"BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)"
},
{, {,
"EventCode": "0x36082", "EventCode": "0x36082",
"EventName": "PM_L2_LD_DISP", "EventName": "PM_L2_LD_DISP",
"BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)." "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)."
}, },
{,
"EventCode": "0x1609E",
"EventName": "PM_L2_LD_DISP",
"BriefDescription": "All successful D side load dispatches for this thread (L2 miss + L2 hits)"
},
{, {,
"EventCode": "0xF8B0", "EventCode": "0xF8B0",
"EventName": "PM_L3_SW_PREF", "EventName": "PM_L3_SW_PREF",
...@@ -1039,11 +1019,6 @@ ...@@ -1039,11 +1019,6 @@
"EventName": "PM_L3_CO_MEPF", "EventName": "PM_L3_CO_MEPF",
"BriefDescription": "L3 castouts in Mepf state for this thread" "BriefDescription": "L3 castouts in Mepf state for this thread"
}, },
{,
"EventCode": "0x168A0",
"EventName": "PM_L3_CO_MEPF",
"BriefDescription": "L3 CO of line in Mep state (includes casthrough to memory). The Mepf state indicates that a line was brought in to satisfy an L3 prefetch request"
},
{, {,
"EventCode": "0x460A2", "EventCode": "0x460A2",
"EventName": "PM_L3_LAT_CI_HIT", "EventName": "PM_L3_LAT_CI_HIT",
...@@ -1149,11 +1124,6 @@ ...@@ -1149,11 +1124,6 @@
"EventName": "PM_L2_RTY_ST", "EventName": "PM_L2_RTY_ST",
"BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)" "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)"
}, },
{,
"EventCode": "0x4689E",
"EventName": "PM_L2_RTY_ST",
"BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)"
},
{, {,
"EventCode": "0x24040", "EventCode": "0x24040",
"EventName": "PM_INST_FROM_L2_MEPF", "EventName": "PM_INST_FROM_L2_MEPF",
...@@ -1254,11 +1224,6 @@ ...@@ -1254,11 +1224,6 @@
"EventName": "PM_CO0_BUSY", "EventName": "PM_CO0_BUSY",
"BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)" "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)"
}, },
{,
"EventCode": "0x4608C",
"EventName": "PM_CO0_BUSY",
"BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)"
},
{, {,
"EventCode": "0x2C122", "EventCode": "0x2C122",
"EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC", "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC",
...@@ -1394,11 +1359,6 @@ ...@@ -1394,11 +1359,6 @@
"EventName": "PM_IPTEG_FROM_LMEM", "EventName": "PM_IPTEG_FROM_LMEM",
"BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request" "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request"
}, },
{,
"EventCode": "0x40006",
"EventName": "PM_ISLB_MISS",
"BriefDescription": "Number of ISLB misses for this thread"
},
{, {,
"EventCode": "0xD8A8", "EventCode": "0xD8A8",
"EventName": "PM_ISLB_MISS", "EventName": "PM_ISLB_MISS",
...@@ -1514,11 +1474,6 @@ ...@@ -1514,11 +1474,6 @@
"EventName": "PM_L2_INST", "EventName": "PM_L2_INST",
"BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)." "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)."
}, },
{,
"EventCode": "0x3609E",
"EventName": "PM_L2_INST",
"BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
},
{, {,
"EventCode": "0x3504C", "EventCode": "0x3504C",
"EventName": "PM_IPTEG_FROM_DL4", "EventName": "PM_IPTEG_FROM_DL4",
...@@ -1689,11 +1644,6 @@ ...@@ -1689,11 +1644,6 @@
"EventName": "PM_L2_LD_HIT", "EventName": "PM_L2_LD_HIT",
"BriefDescription": "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)" "BriefDescription": "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)"
}, },
{,
"EventCode": "0x2609E",
"EventName": "PM_L2_LD_HIT",
"BriefDescription": "All successful D side load dispatches for this thread that were L2 hits for this thread"
},
{, {,
"EventCode": "0x168AC", "EventCode": "0x168AC",
"EventName": "PM_L3_CI_USAGE", "EventName": "PM_L3_CI_USAGE",
...@@ -1794,21 +1744,11 @@ ...@@ -1794,21 +1744,11 @@
"EventName": "PM_L3_WI0_BUSY", "EventName": "PM_L3_WI0_BUSY",
"BriefDescription": "Rotating sample of 8 WI valid" "BriefDescription": "Rotating sample of 8 WI valid"
}, },
{,
"EventCode": "0x260B6",
"EventName": "PM_L3_WI0_BUSY",
"BriefDescription": "Rotating sample of 8 WI valid (duplicate)"
},
{, {,
"EventCode": "0x368AC", "EventCode": "0x368AC",
"EventName": "PM_L3_CO0_BUSY", "EventName": "PM_L3_CO0_BUSY",
"BriefDescription": "Lifetime, sample of CO machine 0 valid" "BriefDescription": "Lifetime, sample of CO machine 0 valid"
}, },
{,
"EventCode": "0x468AC",
"EventName": "PM_L3_CO0_BUSY",
"BriefDescription": "Lifetime, sample of CO machine 0 valid"
},
{, {,
"EventCode": "0x2E040", "EventCode": "0x2E040",
"EventName": "PM_DPTEG_FROM_L2_MEPF", "EventName": "PM_DPTEG_FROM_L2_MEPF",
...@@ -1839,11 +1779,6 @@ ...@@ -1839,11 +1779,6 @@
"EventName": "PM_L3_P0_PF_RTY", "EventName": "PM_L3_P0_PF_RTY",
"BriefDescription": "L3 PF received retry port 0, every retry counted" "BriefDescription": "L3 PF received retry port 0, every retry counted"
}, },
{,
"EventCode": "0x260AE",
"EventName": "PM_L3_P0_PF_RTY",
"BriefDescription": "L3 PF received retry port 0, every retry counted"
},
{, {,
"EventCode": "0x268B2", "EventCode": "0x268B2",
"EventName": "PM_L3_LOC_GUESS_WRONG", "EventName": "PM_L3_LOC_GUESS_WRONG",
...@@ -1894,11 +1829,6 @@ ...@@ -1894,11 +1829,6 @@
"EventName": "PM_L3_SN0_BUSY", "EventName": "PM_L3_SN0_BUSY",
"BriefDescription": "Lifetime, sample of snooper machine 0 valid" "BriefDescription": "Lifetime, sample of snooper machine 0 valid"
}, },
{,
"EventCode": "0x460AC",
"EventName": "PM_L3_SN0_BUSY",
"BriefDescription": "Lifetime, sample of snooper machine 0 valid"
},
{, {,
"EventCode": "0x3005C", "EventCode": "0x3005C",
"EventName": "PM_BFU_BUSY", "EventName": "PM_BFU_BUSY",
...@@ -1934,11 +1864,6 @@ ...@@ -1934,11 +1864,6 @@
"EventName": "PM_L3_PF0_BUSY", "EventName": "PM_L3_PF0_BUSY",
"BriefDescription": "Lifetime, sample of PF machine 0 valid" "BriefDescription": "Lifetime, sample of PF machine 0 valid"
}, },
{,
"EventCode": "0x460B4",
"EventName": "PM_L3_PF0_BUSY",
"BriefDescription": "Lifetime, sample of PF machine 0 valid"
},
{, {,
"EventCode": "0xC0B0", "EventCode": "0xC0B0",
"EventName": "PM_LSU_FLUSH_UE", "EventName": "PM_LSU_FLUSH_UE",
...@@ -2084,11 +2009,6 @@ ...@@ -2084,11 +2009,6 @@
"EventName": "PM_L3_P1_CO_RTY", "EventName": "PM_L3_P1_CO_RTY",
"BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted" "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted"
}, },
{,
"EventCode": "0x468AE",
"EventName": "PM_L3_P1_CO_RTY",
"BriefDescription": "L3 CO received retry port 3 (memory only), every retry counted"
},
{, {,
"EventCode": "0xC0AC", "EventCode": "0xC0AC",
"EventName": "PM_LSU_FLUSH_EMSH", "EventName": "PM_LSU_FLUSH_EMSH",
...@@ -2194,11 +2114,6 @@ ...@@ -2194,11 +2114,6 @@
"EventName": "PM_L2_SN_M_WR_DONE", "EventName": "PM_L2_SN_M_WR_DONE",
"BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)" "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)"
}, },
{,
"EventCode": "0x46886",
"EventName": "PM_L2_SN_M_WR_DONE",
"BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)"
},
{, {,
"EventCode": "0x489C", "EventCode": "0x489C",
"EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL", "EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL",
...@@ -2289,21 +2204,11 @@ ...@@ -2289,21 +2204,11 @@
"EventName": "PM_SN0_BUSY", "EventName": "PM_SN0_BUSY",
"BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)" "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)"
}, },
{,
"EventCode": "0x26090",
"EventName": "PM_SN0_BUSY",
"BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)"
},
{, {,
"EventCode": "0x360AE", "EventCode": "0x360AE",
"EventName": "PM_L3_P0_CO_RTY", "EventName": "PM_L3_P0_CO_RTY",
"BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted" "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted"
}, },
{,
"EventCode": "0x460AE",
"EventName": "PM_L3_P0_CO_RTY",
"BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted"
},
{, {,
"EventCode": "0x168A8", "EventCode": "0x168A8",
"EventName": "PM_L3_WI_USAGE", "EventName": "PM_L3_WI_USAGE",
...@@ -2339,26 +2244,11 @@ ...@@ -2339,26 +2244,11 @@
"EventName": "PM_L3_P1_PF_RTY", "EventName": "PM_L3_P1_PF_RTY",
"BriefDescription": "L3 PF received retry port 1, every retry counted" "BriefDescription": "L3 PF received retry port 1, every retry counted"
}, },
{,
"EventCode": "0x268AE",
"EventName": "PM_L3_P1_PF_RTY",
"BriefDescription": "L3 PF received retry port 3, every retry counted"
},
{, {,
"EventCode": "0x46082", "EventCode": "0x46082",
"EventName": "PM_L2_ST_DISP", "EventName": "PM_L2_ST_DISP",
"BriefDescription": "All successful D-side store dispatches for this thread " "BriefDescription": "All successful D-side store dispatches for this thread "
}, },
{,
"EventCode": "0x1689E",
"EventName": "PM_L2_ST_DISP",
"BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)"
},
{,
"EventCode": "0x36880",
"EventName": "PM_L2_INST_MISS",
"BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
},
{, {,
"EventCode": "0x4609E", "EventCode": "0x4609E",
"EventName": "PM_L2_INST_MISS", "EventName": "PM_L2_INST_MISS",
...@@ -2429,11 +2319,6 @@ ...@@ -2429,11 +2319,6 @@
"EventName": "PM_INST_DISP", "EventName": "PM_INST_DISP",
"BriefDescription": "# PPC Dispatched" "BriefDescription": "# PPC Dispatched"
}, },
{,
"EventCode": "0x300F2",
"EventName": "PM_INST_DISP",
"BriefDescription": "# PPC Dispatched"
},
{, {,
"EventCode": "0x4E05E", "EventCode": "0x4E05E",
"EventName": "PM_TM_OUTER_TBEGIN_DISP", "EventName": "PM_TM_OUTER_TBEGIN_DISP",
...@@ -2459,11 +2344,6 @@ ...@@ -2459,11 +2344,6 @@
"EventName": "PM_L2_ST_HIT", "EventName": "PM_L2_ST_HIT",
"BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits" "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits"
}, },
{,
"EventCode": "0x2689E",
"EventName": "PM_L2_ST_HIT",
"BriefDescription": "All successful D-side store dispatches that were L2 hits for this thread"
},
{, {,
"EventCode": "0x360A8", "EventCode": "0x360A8",
"EventName": "PM_L3_CO", "EventName": "PM_L3_CO",
......
...@@ -419,11 +419,6 @@ ...@@ -419,11 +419,6 @@
"EventName": "PM_INST_GRP_PUMP_MPRED_RTY", "EventName": "PM_INST_GRP_PUMP_MPRED_RTY",
"BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch" "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch"
}, },
{,
"EventCode": "0x10016",
"EventName": "PM_DSLB_MISS",
"BriefDescription": "Data SLB Miss - Total of all segment sizes"
},
{, {,
"EventCode": "0xD0A8", "EventCode": "0xD0A8",
"EventName": "PM_DSLB_MISS", "EventName": "PM_DSLB_MISS",
...@@ -554,4 +549,4 @@ ...@@ -554,4 +549,4 @@
"EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC", "EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC",
"BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load" "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load"
} }
] ]
\ No newline at end of file
...@@ -4,11 +4,6 @@ ...@@ -4,11 +4,6 @@
"EventName": "PM_BR_2PATH", "EventName": "PM_BR_2PATH",
"BriefDescription": "Branches that are not strongly biased" "BriefDescription": "Branches that are not strongly biased"
}, },
{,
"EventCode": "0x40036",
"EventName": "PM_BR_2PATH",
"BriefDescription": "Branches that are not strongly biased"
},
{, {,
"EventCode": "0x40056", "EventCode": "0x40056",
"EventName": "PM_MEM_LOC_THRESH_LSU_HIGH", "EventName": "PM_MEM_LOC_THRESH_LSU_HIGH",
...@@ -124,4 +119,4 @@ ...@@ -124,4 +119,4 @@
"EventName": "PM_1FLOP_CMPL", "EventName": "PM_1FLOP_CMPL",
"BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed" "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed"
} }
] ]
\ No newline at end of file
...@@ -237,6 +237,11 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, ...@@ -237,6 +237,11 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
if (!al.map || !al.map->dso) { if (!al.map || !al.map->dso) {
if (cpumode == PERF_RECORD_MISC_HYPERVISOR) {
pr_debug("Hypervisor address can not be resolved - skipping\n");
return 0;
}
pr_debug("thread__find_addr_map failed\n"); pr_debug("thread__find_addr_map failed\n");
return -1; return -1;
} }
......
...@@ -141,6 +141,9 @@ static bool samples_same(const struct perf_sample *s1, ...@@ -141,6 +141,9 @@ static bool samples_same(const struct perf_sample *s1,
} }
} }
if (type & PERF_SAMPLE_PHYS_ADDR)
COMP(phys_addr);
return true; return true;
} }
...@@ -206,6 +209,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) ...@@ -206,6 +209,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
.mask = sample_regs, .mask = sample_regs,
.regs = regs, .regs = regs,
}, },
.phys_addr = 113,
}; };
struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},}; struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},};
struct perf_sample sample_out; struct perf_sample sample_out;
...@@ -305,7 +309,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u ...@@ -305,7 +309,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u
* were added. Please actually update the test rather than just change * were added. Please actually update the test rather than just change
* the condition below. * the condition below.
*/ */
if (PERF_SAMPLE_MAX > PERF_SAMPLE_REGS_INTR << 1) { if (PERF_SAMPLE_MAX > PERF_SAMPLE_PHYS_ADDR << 1) {
pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n");
return -1; return -1;
} }
......
...@@ -829,7 +829,8 @@ static int annotate_browser__run(struct annotate_browser *browser, ...@@ -829,7 +829,8 @@ static int annotate_browser__run(struct annotate_browser *browser,
"q/ESC/CTRL+C Exit\n\n" "q/ESC/CTRL+C Exit\n\n"
"ENTER Go to target\n" "ENTER Go to target\n"
"ESC Exit\n" "ESC Exit\n"
"H Cycle thru hottest instructions\n" "H Go to hottest instruction\n"
"TAB/shift+TAB Cycle thru hottest instructions\n"
"j Toggle showing jump to target arrows\n" "j Toggle showing jump to target arrows\n"
"J Toggle showing number of jump sources on targets\n" "J Toggle showing number of jump sources on targets\n"
"n Search next string\n" "n Search next string\n"
......
...@@ -931,12 +931,8 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, ...@@ -931,12 +931,8 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
browser->show_dso); browser->show_dso);
if (symbol_conf.show_branchflag_count) { if (symbol_conf.show_branchflag_count) {
if (need_percent) callchain_list_counts__printf_value(chain, NULL,
callchain_list_counts__printf_value(node, chain, NULL, buf, sizeof(buf));
buf, sizeof(buf));
else
callchain_list_counts__printf_value(NULL, chain, NULL,
buf, sizeof(buf));
if (asprintf(&alloc_str2, "%s%s", str, buf) < 0) if (asprintf(&alloc_str2, "%s%s", str, buf) < 0)
str = "Not enough memory!"; str = "Not enough memory!";
......
...@@ -124,12 +124,8 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node, ...@@ -124,12 +124,8 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
str = callchain_list__sym_name(chain, bf, sizeof(bf), false); str = callchain_list__sym_name(chain, bf, sizeof(bf), false);
if (symbol_conf.show_branchflag_count) { if (symbol_conf.show_branchflag_count) {
if (!period) callchain_list_counts__printf_value(chain, NULL,
callchain_list_counts__printf_value(node, chain, NULL, buf, sizeof(buf));
buf, sizeof(buf));
else
callchain_list_counts__printf_value(NULL, chain, NULL,
buf, sizeof(buf));
if (asprintf(&alloc_str, "%s%s", str, buf) < 0) if (asprintf(&alloc_str, "%s%s", str, buf) < 0)
str = "Not enough memory!"; str = "Not enough memory!";
...@@ -313,7 +309,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, ...@@ -313,7 +309,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
if (symbol_conf.show_branchflag_count) if (symbol_conf.show_branchflag_count)
ret += callchain_list_counts__printf_value( ret += callchain_list_counts__printf_value(
NULL, chain, fp, NULL, 0); chain, fp, NULL, 0);
ret += fprintf(fp, "\n"); ret += fprintf(fp, "\n");
if (++entries_printed == callchain_param.print_limit) if (++entries_printed == callchain_param.print_limit)
......
...@@ -588,7 +588,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) ...@@ -588,7 +588,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
call->cycles_count = call->cycles_count =
cursor_node->branch_flags.cycles; cursor_node->branch_flags.cycles;
call->iter_count = cursor_node->nr_loop_iter; call->iter_count = cursor_node->nr_loop_iter;
call->samples_count = cursor_node->samples; call->iter_cycles = cursor_node->iter_cycles;
} }
} }
...@@ -722,7 +722,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node, ...@@ -722,7 +722,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
cnode->cycles_count += cnode->cycles_count +=
node->branch_flags.cycles; node->branch_flags.cycles;
cnode->iter_count += node->nr_loop_iter; cnode->iter_count += node->nr_loop_iter;
cnode->samples_count += node->samples; cnode->iter_cycles += node->iter_cycles;
} }
} }
...@@ -998,7 +998,7 @@ int callchain_merge(struct callchain_cursor *cursor, ...@@ -998,7 +998,7 @@ int callchain_merge(struct callchain_cursor *cursor,
int callchain_cursor_append(struct callchain_cursor *cursor, int callchain_cursor_append(struct callchain_cursor *cursor,
u64 ip, struct map *map, struct symbol *sym, u64 ip, struct map *map, struct symbol *sym,
bool branch, struct branch_flags *flags, bool branch, struct branch_flags *flags,
int nr_loop_iter, int samples, u64 branch_from) int nr_loop_iter, u64 iter_cycles, u64 branch_from)
{ {
struct callchain_cursor_node *node = *cursor->last; struct callchain_cursor_node *node = *cursor->last;
...@@ -1016,7 +1016,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor, ...@@ -1016,7 +1016,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
node->sym = sym; node->sym = sym;
node->branch = branch; node->branch = branch;
node->nr_loop_iter = nr_loop_iter; node->nr_loop_iter = nr_loop_iter;
node->samples = samples; node->iter_cycles = iter_cycles;
if (flags) if (flags)
memcpy(&node->branch_flags, flags, memcpy(&node->branch_flags, flags,
...@@ -1306,7 +1306,7 @@ static int branch_to_str(char *bf, int bfsize, ...@@ -1306,7 +1306,7 @@ static int branch_to_str(char *bf, int bfsize,
static int branch_from_str(char *bf, int bfsize, static int branch_from_str(char *bf, int bfsize,
u64 branch_count, u64 branch_count,
u64 cycles_count, u64 iter_count, u64 cycles_count, u64 iter_count,
u64 samples_count) u64 iter_cycles)
{ {
int printed = 0, i = 0; int printed = 0, i = 0;
u64 cycles; u64 cycles;
...@@ -1318,9 +1318,13 @@ static int branch_from_str(char *bf, int bfsize, ...@@ -1318,9 +1318,13 @@ static int branch_from_str(char *bf, int bfsize,
bf + printed, bfsize - printed); bf + printed, bfsize - printed);
} }
if (iter_count && samples_count) { if (iter_count) {
printed += count_pri64_printf(i++, "iterations", printed += count_pri64_printf(i++, "iter",
iter_count / samples_count, iter_count,
bf + printed, bfsize - printed);
printed += count_pri64_printf(i++, "avg_cycles",
iter_cycles / iter_count,
bf + printed, bfsize - printed); bf + printed, bfsize - printed);
} }
...@@ -1333,7 +1337,7 @@ static int branch_from_str(char *bf, int bfsize, ...@@ -1333,7 +1337,7 @@ static int branch_from_str(char *bf, int bfsize,
static int counts_str_build(char *bf, int bfsize, static int counts_str_build(char *bf, int bfsize,
u64 branch_count, u64 predicted_count, u64 branch_count, u64 predicted_count,
u64 abort_count, u64 cycles_count, u64 abort_count, u64 cycles_count,
u64 iter_count, u64 samples_count, u64 iter_count, u64 iter_cycles,
struct branch_type_stat *brtype_stat) struct branch_type_stat *brtype_stat)
{ {
int printed; int printed;
...@@ -1346,7 +1350,7 @@ static int counts_str_build(char *bf, int bfsize, ...@@ -1346,7 +1350,7 @@ static int counts_str_build(char *bf, int bfsize,
predicted_count, abort_count, brtype_stat); predicted_count, abort_count, brtype_stat);
} else { } else {
printed = branch_from_str(bf, bfsize, branch_count, printed = branch_from_str(bf, bfsize, branch_count,
cycles_count, iter_count, samples_count); cycles_count, iter_count, iter_cycles);
} }
if (!printed) if (!printed)
...@@ -1358,14 +1362,14 @@ static int counts_str_build(char *bf, int bfsize, ...@@ -1358,14 +1362,14 @@ static int counts_str_build(char *bf, int bfsize,
static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
u64 branch_count, u64 predicted_count, u64 branch_count, u64 predicted_count,
u64 abort_count, u64 cycles_count, u64 abort_count, u64 cycles_count,
u64 iter_count, u64 samples_count, u64 iter_count, u64 iter_cycles,
struct branch_type_stat *brtype_stat) struct branch_type_stat *brtype_stat)
{ {
char str[256]; char str[256];
counts_str_build(str, sizeof(str), branch_count, counts_str_build(str, sizeof(str), branch_count,
predicted_count, abort_count, cycles_count, predicted_count, abort_count, cycles_count,
iter_count, samples_count, brtype_stat); iter_count, iter_cycles, brtype_stat);
if (fp) if (fp)
return fprintf(fp, "%s", str); return fprintf(fp, "%s", str);
...@@ -1373,31 +1377,23 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, ...@@ -1373,31 +1377,23 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
return scnprintf(bf, bfsize, "%s", str); return scnprintf(bf, bfsize, "%s", str);
} }
int callchain_list_counts__printf_value(struct callchain_node *node, int callchain_list_counts__printf_value(struct callchain_list *clist,
struct callchain_list *clist,
FILE *fp, char *bf, int bfsize) FILE *fp, char *bf, int bfsize)
{ {
u64 branch_count, predicted_count; u64 branch_count, predicted_count;
u64 abort_count, cycles_count; u64 abort_count, cycles_count;
u64 iter_count = 0, samples_count = 0; u64 iter_count, iter_cycles;
branch_count = clist->branch_count; branch_count = clist->branch_count;
predicted_count = clist->predicted_count; predicted_count = clist->predicted_count;
abort_count = clist->abort_count; abort_count = clist->abort_count;
cycles_count = clist->cycles_count; cycles_count = clist->cycles_count;
iter_count = clist->iter_count;
if (node) { iter_cycles = clist->iter_cycles;
struct callchain_list *call;
list_for_each_entry(call, &node->val, list) {
iter_count += call->iter_count;
samples_count += call->samples_count;
}
}
return callchain_counts_printf(fp, bf, bfsize, branch_count, return callchain_counts_printf(fp, bf, bfsize, branch_count,
predicted_count, abort_count, predicted_count, abort_count,
cycles_count, iter_count, samples_count, cycles_count, iter_count, iter_cycles,
&clist->brtype_stat); &clist->brtype_stat);
} }
...@@ -1523,7 +1519,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst, ...@@ -1523,7 +1519,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst,
rc = callchain_cursor_append(dst, node->ip, node->map, node->sym, rc = callchain_cursor_append(dst, node->ip, node->map, node->sym,
node->branch, &node->branch_flags, node->branch, &node->branch_flags,
node->nr_loop_iter, node->samples, node->nr_loop_iter,
node->iter_cycles,
node->branch_from); node->branch_from);
if (rc) if (rc)
break; break;
......
...@@ -119,7 +119,7 @@ struct callchain_list { ...@@ -119,7 +119,7 @@ struct callchain_list {
u64 abort_count; u64 abort_count;
u64 cycles_count; u64 cycles_count;
u64 iter_count; u64 iter_count;
u64 samples_count; u64 iter_cycles;
struct branch_type_stat brtype_stat; struct branch_type_stat brtype_stat;
char *srcline; char *srcline;
struct list_head list; struct list_head list;
...@@ -139,7 +139,7 @@ struct callchain_cursor_node { ...@@ -139,7 +139,7 @@ struct callchain_cursor_node {
struct branch_flags branch_flags; struct branch_flags branch_flags;
u64 branch_from; u64 branch_from;
int nr_loop_iter; int nr_loop_iter;
int samples; u64 iter_cycles;
struct callchain_cursor_node *next; struct callchain_cursor_node *next;
}; };
...@@ -201,7 +201,7 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor) ...@@ -201,7 +201,7 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
struct map *map, struct symbol *sym, struct map *map, struct symbol *sym,
bool branch, struct branch_flags *flags, bool branch, struct branch_flags *flags,
int nr_loop_iter, int samples, u64 branch_from); int nr_loop_iter, u64 iter_cycles, u64 branch_from);
/* Close a cursor writing session. Initialize for the reader */ /* Close a cursor writing session. Initialize for the reader */
static inline void callchain_cursor_commit(struct callchain_cursor *cursor) static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
...@@ -282,8 +282,7 @@ char *callchain_node__scnprintf_value(struct callchain_node *node, ...@@ -282,8 +282,7 @@ char *callchain_node__scnprintf_value(struct callchain_node *node,
int callchain_node__fprintf_value(struct callchain_node *node, int callchain_node__fprintf_value(struct callchain_node *node,
FILE *fp, u64 total); FILE *fp, u64 total);
int callchain_list_counts__printf_value(struct callchain_node *node, int callchain_list_counts__printf_value(struct callchain_list *clist,
struct callchain_list *clist,
FILE *fp, char *bf, int bfsize); FILE *fp, char *bf, int bfsize);
void free_callchain(struct callchain_root *root); void free_callchain(struct callchain_root *root);
......
...@@ -200,6 +200,7 @@ struct perf_sample { ...@@ -200,6 +200,7 @@ struct perf_sample {
u32 cpu; u32 cpu;
u32 raw_size; u32 raw_size;
u64 data_src; u64 data_src;
u64 phys_addr;
u32 flags; u32 flags;
u16 insn_len; u16 insn_len;
u8 cpumode; u8 cpumode;
......
...@@ -955,6 +955,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, ...@@ -955,6 +955,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
if (opts->sample_address) if (opts->sample_address)
perf_evsel__set_sample_bit(evsel, DATA_SRC); perf_evsel__set_sample_bit(evsel, DATA_SRC);
if (opts->sample_phys_addr)
perf_evsel__set_sample_bit(evsel, PHYS_ADDR);
if (opts->no_buffering) { if (opts->no_buffering) {
attr->watermark = 0; attr->watermark = 0;
attr->wakeup_events = 1; attr->wakeup_events = 1;
...@@ -1464,7 +1467,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) ...@@ -1464,7 +1467,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
bit_name(WEIGHT), bit_name(WEIGHT), bit_name(PHYS_ADDR),
{ .name = NULL, } { .name = NULL, }
}; };
#undef bit_name #undef bit_name
...@@ -2206,6 +2209,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, ...@@ -2206,6 +2209,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
} }
} }
data->phys_addr = 0;
if (type & PERF_SAMPLE_PHYS_ADDR) {
data->phys_addr = *array;
array++;
}
return 0; return 0;
} }
...@@ -2311,6 +2320,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, ...@@ -2311,6 +2320,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
} }
} }
if (type & PERF_SAMPLE_PHYS_ADDR)
result += sizeof(u64);
return result; return result;
} }
...@@ -2500,6 +2512,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, ...@@ -2500,6 +2512,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
} }
} }
if (type & PERF_SAMPLE_PHYS_ADDR) {
*array = sample->phys_addr;
array++;
}
return 0; return 0;
} }
......
...@@ -131,6 +131,7 @@ struct perf_evsel { ...@@ -131,6 +131,7 @@ struct perf_evsel {
bool cmdline_group_boundary; bool cmdline_group_boundary;
struct list_head config_terms; struct list_head config_terms;
int bpf_fd; int bpf_fd;
bool auto_merge_stats;
bool merged_stat; bool merged_stat;
const char * metric_expr; const char * metric_expr;
const char * metric_name; const char * metric_name;
......
...@@ -167,6 +167,10 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) ...@@ -167,6 +167,10 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
symlen = unresolved_col_width + 4 + 2; symlen = unresolved_col_width + 4 + 2;
hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO); hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
} }
hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR,
unresolved_col_width + 4 + 2);
} else { } else {
symlen = unresolved_col_width + 4 + 2; symlen = unresolved_col_width + 4 + 2;
hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);
......
...@@ -47,6 +47,7 @@ enum hist_column { ...@@ -47,6 +47,7 @@ enum hist_column {
HISTC_GLOBAL_WEIGHT, HISTC_GLOBAL_WEIGHT,
HISTC_MEM_DADDR_SYMBOL, HISTC_MEM_DADDR_SYMBOL,
HISTC_MEM_DADDR_DSO, HISTC_MEM_DADDR_DSO,
HISTC_MEM_PHYS_DADDR,
HISTC_MEM_LOCKED, HISTC_MEM_LOCKED,
HISTC_MEM_TLB, HISTC_MEM_TLB,
HISTC_MEM_LVL, HISTC_MEM_LVL,
......
...@@ -1635,10 +1635,12 @@ static void ip__resolve_ams(struct thread *thread, ...@@ -1635,10 +1635,12 @@ static void ip__resolve_ams(struct thread *thread,
ams->al_addr = al.addr; ams->al_addr = al.addr;
ams->sym = al.sym; ams->sym = al.sym;
ams->map = al.map; ams->map = al.map;
ams->phys_addr = 0;
} }
static void ip__resolve_data(struct thread *thread, static void ip__resolve_data(struct thread *thread,
u8 m, struct addr_map_symbol *ams, u64 addr) u8 m, struct addr_map_symbol *ams,
u64 addr, u64 phys_addr)
{ {
struct addr_location al; struct addr_location al;
...@@ -1658,6 +1660,7 @@ static void ip__resolve_data(struct thread *thread, ...@@ -1658,6 +1660,7 @@ static void ip__resolve_data(struct thread *thread,
ams->al_addr = al.addr; ams->al_addr = al.addr;
ams->sym = al.sym; ams->sym = al.sym;
ams->map = al.map; ams->map = al.map;
ams->phys_addr = phys_addr;
} }
struct mem_info *sample__resolve_mem(struct perf_sample *sample, struct mem_info *sample__resolve_mem(struct perf_sample *sample,
...@@ -1669,12 +1672,18 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, ...@@ -1669,12 +1672,18 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
return NULL; return NULL;
ip__resolve_ams(al->thread, &mi->iaddr, sample->ip); ip__resolve_ams(al->thread, &mi->iaddr, sample->ip);
ip__resolve_data(al->thread, al->cpumode, &mi->daddr, sample->addr); ip__resolve_data(al->thread, al->cpumode, &mi->daddr,
sample->addr, sample->phys_addr);
mi->data_src.val = sample->data_src; mi->data_src.val = sample->data_src;
return mi; return mi;
} }
struct iterations {
int nr_loop_iter;
u64 cycles;
};
static int add_callchain_ip(struct thread *thread, static int add_callchain_ip(struct thread *thread,
struct callchain_cursor *cursor, struct callchain_cursor *cursor,
struct symbol **parent, struct symbol **parent,
...@@ -1683,11 +1692,12 @@ static int add_callchain_ip(struct thread *thread, ...@@ -1683,11 +1692,12 @@ static int add_callchain_ip(struct thread *thread,
u64 ip, u64 ip,
bool branch, bool branch,
struct branch_flags *flags, struct branch_flags *flags,
int nr_loop_iter, struct iterations *iter,
int samples,
u64 branch_from) u64 branch_from)
{ {
struct addr_location al; struct addr_location al;
int nr_loop_iter = 0;
u64 iter_cycles = 0;
al.filtered = 0; al.filtered = 0;
al.sym = NULL; al.sym = NULL;
...@@ -1737,9 +1747,15 @@ static int add_callchain_ip(struct thread *thread, ...@@ -1737,9 +1747,15 @@ static int add_callchain_ip(struct thread *thread,
if (symbol_conf.hide_unresolved && al.sym == NULL) if (symbol_conf.hide_unresolved && al.sym == NULL)
return 0; return 0;
if (iter) {
nr_loop_iter = iter->nr_loop_iter;
iter_cycles = iter->cycles;
}
return callchain_cursor_append(cursor, al.addr, al.map, al.sym, return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
branch, flags, nr_loop_iter, samples, branch, flags, nr_loop_iter,
branch_from); iter_cycles, branch_from);
} }
struct branch_info *sample__resolve_bstack(struct perf_sample *sample, struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
...@@ -1760,6 +1776,18 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, ...@@ -1760,6 +1776,18 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
return bi; return bi;
} }
static void save_iterations(struct iterations *iter,
struct branch_entry *be, int nr)
{
int i;
iter->nr_loop_iter = nr;
iter->cycles = 0;
for (i = 0; i < nr; i++)
iter->cycles += be[i].flags.cycles;
}
#define CHASHSZ 127 #define CHASHSZ 127
#define CHASHBITS 7 #define CHASHBITS 7
#define NO_ENTRY 0xff #define NO_ENTRY 0xff
...@@ -1767,7 +1795,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, ...@@ -1767,7 +1795,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
#define PERF_MAX_BRANCH_DEPTH 127 #define PERF_MAX_BRANCH_DEPTH 127
/* Remove loops. */ /* Remove loops. */
static int remove_loops(struct branch_entry *l, int nr) static int remove_loops(struct branch_entry *l, int nr,
struct iterations *iter)
{ {
int i, j, off; int i, j, off;
unsigned char chash[CHASHSZ]; unsigned char chash[CHASHSZ];
...@@ -1792,8 +1821,18 @@ static int remove_loops(struct branch_entry *l, int nr) ...@@ -1792,8 +1821,18 @@ static int remove_loops(struct branch_entry *l, int nr)
break; break;
} }
if (is_loop) { if (is_loop) {
memmove(l + i, l + i + off, j = nr - (i + off);
(nr - (i + off)) * sizeof(*l)); if (j > 0) {
save_iterations(iter + i + off,
l + i, off);
memmove(iter + i, iter + i + off,
j * sizeof(*iter));
memmove(l + i, l + i + off,
j * sizeof(*l));
}
nr -= off; nr -= off;
} }
} }
...@@ -1883,7 +1922,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, ...@@ -1883,7 +1922,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent, err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip, root_al, &cpumode, ip,
branch, flags, 0, 0, branch, flags, NULL,
branch_from); branch_from);
if (err) if (err)
return (err < 0) ? err : 0; return (err < 0) ? err : 0;
...@@ -1909,7 +1948,6 @@ static int thread__resolve_callchain_sample(struct thread *thread, ...@@ -1909,7 +1948,6 @@ static int thread__resolve_callchain_sample(struct thread *thread,
int i, j, err, nr_entries; int i, j, err, nr_entries;
int skip_idx = -1; int skip_idx = -1;
int first_call = 0; int first_call = 0;
int nr_loop_iter;
if (chain) if (chain)
chain_nr = chain->nr; chain_nr = chain->nr;
...@@ -1942,6 +1980,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, ...@@ -1942,6 +1980,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
if (branch && callchain_param.branch_callstack) { if (branch && callchain_param.branch_callstack) {
int nr = min(max_stack, (int)branch->nr); int nr = min(max_stack, (int)branch->nr);
struct branch_entry be[nr]; struct branch_entry be[nr];
struct iterations iter[nr];
if (branch->nr > PERF_MAX_BRANCH_DEPTH) { if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
pr_warning("corrupted branch chain. skipping...\n"); pr_warning("corrupted branch chain. skipping...\n");
...@@ -1972,38 +2011,21 @@ static int thread__resolve_callchain_sample(struct thread *thread, ...@@ -1972,38 +2011,21 @@ static int thread__resolve_callchain_sample(struct thread *thread,
be[i] = branch->entries[branch->nr - i - 1]; be[i] = branch->entries[branch->nr - i - 1];
} }
nr_loop_iter = nr; memset(iter, 0, sizeof(struct iterations) * nr);
nr = remove_loops(be, nr); nr = remove_loops(be, nr, iter);
/*
* Get the number of iterations.
* It's only approximation, but good enough in practice.
*/
if (nr_loop_iter > nr)
nr_loop_iter = nr_loop_iter - nr + 1;
else
nr_loop_iter = 0;
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
if (i == nr - 1) err = add_callchain_ip(thread, cursor, parent,
err = add_callchain_ip(thread, cursor, parent, root_al,
root_al, NULL, be[i].to,
NULL, be[i].to, true, &be[i].flags,
true, &be[i].flags, NULL, be[i].from);
nr_loop_iter, 1,
be[i].from);
else
err = add_callchain_ip(thread, cursor, parent,
root_al,
NULL, be[i].to,
true, &be[i].flags,
0, 0, be[i].from);
if (!err) if (!err)
err = add_callchain_ip(thread, cursor, parent, root_al, err = add_callchain_ip(thread, cursor, parent, root_al,
NULL, be[i].from, NULL, be[i].from,
true, &be[i].flags, true, &be[i].flags,
0, 0, 0); &iter[i], 0);
if (err == -EINVAL) if (err == -EINVAL)
break; break;
if (err) if (err)
...@@ -2037,7 +2059,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, ...@@ -2037,7 +2059,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent, err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip, root_al, &cpumode, ip,
false, NULL, 0, 0, 0); false, NULL, NULL, 0);
if (err) if (err)
return (err < 0) ? err : 0; return (err < 0) ? err : 0;
......
...@@ -310,7 +310,7 @@ static struct perf_evsel * ...@@ -310,7 +310,7 @@ static struct perf_evsel *
__add_event(struct list_head *list, int *idx, __add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr, struct perf_event_attr *attr,
char *name, struct cpu_map *cpus, char *name, struct cpu_map *cpus,
struct list_head *config_terms) struct list_head *config_terms, bool auto_merge_stats)
{ {
struct perf_evsel *evsel; struct perf_evsel *evsel;
...@@ -324,6 +324,7 @@ __add_event(struct list_head *list, int *idx, ...@@ -324,6 +324,7 @@ __add_event(struct list_head *list, int *idx,
evsel->cpus = cpu_map__get(cpus); evsel->cpus = cpu_map__get(cpus);
evsel->own_cpus = cpu_map__get(cpus); evsel->own_cpus = cpu_map__get(cpus);
evsel->system_wide = !!cpus; evsel->system_wide = !!cpus;
evsel->auto_merge_stats = auto_merge_stats;
if (name) if (name)
evsel->name = strdup(name); evsel->name = strdup(name);
...@@ -339,7 +340,7 @@ static int add_event(struct list_head *list, int *idx, ...@@ -339,7 +340,7 @@ static int add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr, char *name, struct perf_event_attr *attr, char *name,
struct list_head *config_terms) struct list_head *config_terms)
{ {
return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM; return __add_event(list, idx, attr, name, NULL, config_terms, false) ? 0 : -ENOMEM;
} }
static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
...@@ -1209,9 +1210,9 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, ...@@ -1209,9 +1210,9 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
get_config_name(head_config), &config_terms); get_config_name(head_config), &config_terms);
} }
int parse_events_add_pmu(struct parse_events_state *parse_state, static int __parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, char *name, struct list_head *list, char *name,
struct list_head *head_config) struct list_head *head_config, bool auto_merge_stats)
{ {
struct perf_event_attr attr; struct perf_event_attr attr;
struct perf_pmu_info info; struct perf_pmu_info info;
...@@ -1232,7 +1233,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, ...@@ -1232,7 +1233,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
if (!head_config) { if (!head_config) {
attr.type = pmu->type; attr.type = pmu->type;
evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL); evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL, auto_merge_stats);
return evsel ? 0 : -ENOMEM; return evsel ? 0 : -ENOMEM;
} }
...@@ -1254,7 +1255,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, ...@@ -1254,7 +1255,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
evsel = __add_event(list, &parse_state->idx, &attr, evsel = __add_event(list, &parse_state->idx, &attr,
get_config_name(head_config), pmu->cpus, get_config_name(head_config), pmu->cpus,
&config_terms); &config_terms, auto_merge_stats);
if (evsel) { if (evsel) {
evsel->unit = info.unit; evsel->unit = info.unit;
evsel->scale = info.scale; evsel->scale = info.scale;
...@@ -1267,6 +1268,13 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, ...@@ -1267,6 +1268,13 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
return evsel ? 0 : -ENOMEM; return evsel ? 0 : -ENOMEM;
} }
int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, char *name,
struct list_head *head_config)
{
return __parse_events_add_pmu(parse_state, list, name, head_config, false);
}
int parse_events_multi_pmu_add(struct parse_events_state *parse_state, int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
char *str, struct list_head **listp) char *str, struct list_head **listp)
{ {
...@@ -1296,8 +1304,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, ...@@ -1296,8 +1304,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
return -1; return -1;
list_add_tail(&term->list, head); list_add_tail(&term->list, head);
if (!parse_events_add_pmu(parse_state, list, if (!__parse_events_add_pmu(parse_state, list,
pmu->name, head)) { pmu->name, head, true)) {
pr_debug("%s -> %s/%s/\n", str, pr_debug("%s -> %s/%s/\n", str,
pmu->name, alias->str); pmu->name, alias->str);
ok++; ok++;
......
...@@ -1120,6 +1120,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, ...@@ -1120,6 +1120,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
if (sample_type & PERF_SAMPLE_DATA_SRC) if (sample_type & PERF_SAMPLE_DATA_SRC)
printf(" . data_src: 0x%"PRIx64"\n", sample->data_src); printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr);
if (sample_type & PERF_SAMPLE_TRANSACTION) if (sample_type & PERF_SAMPLE_TRANSACTION)
printf("... transaction: %" PRIx64 "\n", sample->transaction); printf("... transaction: %" PRIx64 "\n", sample->transaction);
......
...@@ -1315,6 +1315,47 @@ struct sort_entry sort_mem_dcacheline = { ...@@ -1315,6 +1315,47 @@ struct sort_entry sort_mem_dcacheline = {
.se_width_idx = HISTC_MEM_DCACHELINE, .se_width_idx = HISTC_MEM_DCACHELINE,
}; };
static int64_t
sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
{
uint64_t l = 0, r = 0;
if (left->mem_info)
l = left->mem_info->daddr.phys_addr;
if (right->mem_info)
r = right->mem_info->daddr.phys_addr;
return (int64_t)(r - l);
}
static int hist_entry__phys_daddr_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width)
{
uint64_t addr = 0;
size_t ret = 0;
size_t len = BITS_PER_LONG / 4;
addr = he->mem_info->daddr.phys_addr;
ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", he->level);
ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len, addr);
ret += repsep_snprintf(bf + ret, size - ret, "%-*s", width - ret, "");
if (ret > width)
bf[width] = '\0';
return width;
}
struct sort_entry sort_mem_phys_daddr = {
.se_header = "Data Physical Address",
.se_cmp = sort__phys_daddr_cmp,
.se_snprintf = hist_entry__phys_daddr_snprintf,
.se_width_idx = HISTC_MEM_PHYS_DADDR,
};
static int64_t static int64_t
sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
{ {
...@@ -1547,6 +1588,7 @@ static struct sort_dimension memory_sort_dimensions[] = { ...@@ -1547,6 +1588,7 @@ static struct sort_dimension memory_sort_dimensions[] = {
DIM(SORT_MEM_LVL, "mem", sort_mem_lvl), DIM(SORT_MEM_LVL, "mem", sort_mem_lvl),
DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop), DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline), DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr),
}; };
#undef DIM #undef DIM
......
...@@ -245,6 +245,7 @@ enum sort_type { ...@@ -245,6 +245,7 @@ enum sort_type {
SORT_MEM_SNOOP, SORT_MEM_SNOOP,
SORT_MEM_DCACHELINE, SORT_MEM_DCACHELINE,
SORT_MEM_IADDR_SYMBOL, SORT_MEM_IADDR_SYMBOL,
SORT_MEM_PHYS_DADDR,
}; };
/* /*
......
...@@ -186,6 +186,7 @@ struct addr_map_symbol { ...@@ -186,6 +186,7 @@ struct addr_map_symbol {
struct symbol *sym; struct symbol *sym;
u64 addr; u64 addr;
u64 al_addr; u64 al_addr;
u64 phys_addr;
}; };
struct branch_info { struct branch_info {
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#ifdef HAVE_SYSCALL_TABLE #ifdef HAVE_SYSCALL_TABLE
#include <linux/compiler.h> #include <linux/compiler.h>
#include <string.h> #include <string.h>
#include "string2.h"
#include "util.h" #include "util.h"
#if defined(__x86_64__) #if defined(__x86_64__)
...@@ -105,6 +106,27 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name) ...@@ -105,6 +106,27 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name)
return sc ? sc->id : -1; return sc ? sc->id : -1;
} }
int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
{
int i;
struct syscall *syscalls = tbl->syscalls.entries;
for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) {
if (strglobmatch(syscalls[i].name, syscall_glob)) {
*idx = i;
return syscalls[i].id;
}
}
return -1;
}
int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
{
*idx = -1;
return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
}
#else /* HAVE_SYSCALL_TABLE */ #else /* HAVE_SYSCALL_TABLE */
#include <libaudit.h> #include <libaudit.h>
...@@ -131,4 +153,15 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name) ...@@ -131,4 +153,15 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name)
{ {
return audit_name_to_syscall(name, tbl->audit_machine); return audit_name_to_syscall(name, tbl->audit_machine);
} }
int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused,
const char *syscall_glob __maybe_unused, int *idx __maybe_unused)
{
return -1;
}
int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
{
return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
}
#endif /* HAVE_SYSCALL_TABLE */ #endif /* HAVE_SYSCALL_TABLE */
...@@ -17,4 +17,7 @@ void syscalltbl__delete(struct syscalltbl *tbl); ...@@ -17,4 +17,7 @@ void syscalltbl__delete(struct syscalltbl *tbl);
const char *syscalltbl__name(const struct syscalltbl *tbl, int id); const char *syscalltbl__name(const struct syscalltbl *tbl, int id);
int syscalltbl__id(struct syscalltbl *tbl, const char *name); int syscalltbl__id(struct syscalltbl *tbl, const char *name);
int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
#endif /* __PERF_SYSCALLTBL_H */ #endif /* __PERF_SYSCALLTBL_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment