Commit 53f3feeb authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-5.6-20200106' of...

Merge tag 'perf-core-for-mingo-5.6-20200106' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

perf record:

  Alexey Budankov:

  - Adapt affinity for machines with #CPUs > 1K to overcome current 1024 CPUs
    mask size limitation of cpu_set_t type.

perf report/top TUI:

  Arnaldo Carvalho de Melo:

  - Make ENTER consistently present the pop up menu with and without call
    chains, to eliminate confusion. The menu continues available at all times
    use 'm' and '+' can be used to toggle just one call chain level, 'e' for all
    the call chains for a top level histogram entry and 'E' to expand all call
    chains in all top level entries. Extra info about these options was added to
    the pop up menu entries. Pressing 'k' serves as special hotkey to go straight
    to the main vmlinux entries, to avoid having to press enter and then select
    "Zoom into the kernel DSO".

perf sched timehist:

  David Ahern:

  - Add support for filtering on CPU.

perf tests:

  Arnaldo Carvalho de Melo:

  - Show expected versus obtained values in bp_signal test.

libperf:

  Jiri Olsa:

  - Move to tools/lib/perf.

  - Add man pages.

libapi:

  Andrey Zhizhikin:

  - Fix gcc9 stringop-truncation compilation error.

tools lib:

  Vitaly Chikunov:

  - Fix builds when glibc contains strlcpy(), which is the case for ALT Linux.
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 1f676247 6c4798d3
......@@ -15,6 +15,8 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, int bits);
int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits);
int __bitmap_equal(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits);
void bitmap_clear(unsigned long *map, unsigned int start, int len);
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
......@@ -123,6 +125,15 @@ static inline unsigned long *bitmap_alloc(int nbits)
return calloc(1, BITS_TO_LONGS(nbits) * sizeof(unsigned long));
}
/*
* bitmap_free - Free bitmap
* @bitmap: pointer to bitmap
*/
static inline void bitmap_free(unsigned long *bitmap)
{
free(bitmap);
}
/*
* bitmap_scnprintf - print bitmap list into buffer
* @bitmap: bitmap
......@@ -148,4 +159,23 @@ static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
return __bitmap_and(dst, src1, src2, nbits);
}
#ifdef __LITTLE_ENDIAN
#define BITMAP_MEM_ALIGNMENT 8
#else
#define BITMAP_MEM_ALIGNMENT (8 * sizeof(unsigned long))
#endif
#define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1)
#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
static inline int bitmap_equal(const unsigned long *src1,
const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
if (__builtin_constant_p(nbits & BITMAP_MEM_MASK) &&
IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT))
return !memcmp(src1, src2, nbits / 8);
return __bitmap_equal(src1, src2, nbits);
}
#endif /* _PERF_BITOPS_H */
......@@ -17,7 +17,15 @@ int strtobool(const char *s, bool *res);
* However uClibc headers also define __GLIBC__ hence the hack below
*/
#if defined(__GLIBC__) && !defined(__UCLIBC__)
// pragma diagnostic was introduced in gcc 4.6
#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wredundant-decls"
#endif
extern size_t strlcpy(char *dest, const char *src, size_t size);
#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
#pragma GCC diagnostic pop
#endif
#endif
char *str_error_r(int errnum, char *buf, size_t buflen);
......
......@@ -210,6 +210,7 @@ static bool fs__env_override(struct fs *fs)
size_t name_len = strlen(fs->name);
/* name + "_PATH" + '\0' */
char upper_name[name_len + 5 + 1];
memcpy(upper_name, fs->name, name_len);
mem_toupper(upper_name, name_len);
strcpy(&upper_name[name_len], "_PATH");
......@@ -219,7 +220,8 @@ static bool fs__env_override(struct fs *fs)
return false;
fs->found = true;
strncpy(fs->path, override_path, sizeof(fs->path));
strncpy(fs->path, override_path, sizeof(fs->path) - 1);
fs->path[sizeof(fs->path) - 1] = '\0';
return true;
}
......
......@@ -71,3 +71,18 @@ int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
BITMAP_LAST_WORD_MASK(bits));
return result != 0;
}
int __bitmap_equal(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits)
{
unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
if (bitmap1[k] != bitmap2[k])
return 0;
if (bits % BITS_PER_LONG)
if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
return 0;
return 1;
}
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
# Most of this file is copied from tools/perf/Documentation/Makefile
include ../../../scripts/Makefile.include
include ../../../scripts/utilities.mak
MAN3_TXT = libperf.txt
MAN7_TXT = libperf-counting.txt libperf-sampling.txt
MAN_EX = examples/*.c
MAN_TXT = $(MAN3_TXT) $(MAN7_TXT)
_MAN_XML = $(patsubst %.txt,%.xml,$(MAN_TXT))
_MAN_HTML = $(patsubst %.txt,%.html,$(MAN_TXT))
_MAN_3 = $(patsubst %.txt,%.3,$(MAN3_TXT))
_MAN_7 = $(patsubst %.txt,%.7,$(MAN7_TXT))
MAN_XML = $(addprefix $(OUTPUT),$(_MAN_XML))
MAN_HTML = $(addprefix $(OUTPUT),$(_MAN_HTML))
MAN_3 = $(addprefix $(OUTPUT),$(_MAN_3))
MAN_7 = $(addprefix $(OUTPUT),$(_MAN_7))
MAN_X = $(MAN_3) $(MAN_7)
# Make the path relative to DESTDIR, not prefix
ifndef DESTDIR
prefix ?=$(HOME)
endif
mandir ?= $(prefix)/share/man
man3dir = $(mandir)/man3
man7dir = $(mandir)/man7
docdir ?= $(prefix)/share/doc/libperf
htmldir = $(docdir)/html
exdir = $(docdir)/examples
ASCIIDOC = asciidoc
ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf
ASCIIDOC_HTML = xhtml11
MANPAGE_XSL = manpage-normal.xsl
XMLTO_EXTRA =
XMLTO =xmlto
INSTALL ?= install
RM ?= rm -f
# For asciidoc ...
# -7.1.2, no extra settings are needed.
# 8.0-, set ASCIIDOC8.
#
# For docbook-xsl ...
# -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0)
# 1.69.0, no extra settings are needed?
# 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP?
# 1.71.1, no extra settings are needed?
# 1.72.0, set DOCBOOK_XSL_172.
# 1.73.0-, set ASCIIDOC_NO_ROFF
# If you had been using DOCBOOK_XSL_172 in an attempt to get rid
# of 'the ".ft C" problem' in your generated manpages, and you
# instead ended up with weird characters around callouts, try
# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8).
ifdef ASCIIDOC8
ASCIIDOC_EXTRA += -a asciidoc7compatible
endif
ifdef DOCBOOK_XSL_172
ASCIIDOC_EXTRA += -a libperf-asciidoc-no-roff
MANPAGE_XSL = manpage-1.72.xsl
else
ifdef ASCIIDOC_NO_ROFF
# docbook-xsl after 1.72 needs the regular XSL, but will not
# pass-thru raw roff codes from asciidoc.conf, so turn them off.
ASCIIDOC_EXTRA += -a libperf-asciidoc-no-roff
endif
endif
ifdef MAN_BOLD_LITERAL
XMLTO_EXTRA += -m manpage-bold-literal.xsl
endif
ifdef DOCBOOK_SUPPRESS_SP
XMLTO_EXTRA += -m manpage-suppress-sp.xsl
endif
DESTDIR ?=
DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
export DESTDIR DESTDIR_SQ
# Please note that there is a minor bug in asciidoc.
# The version after 6.0.3 _will_ include the patch found here:
# http://marc.theaimsgroup.com/?l=libtraceevent&m=111558757202243&w=2
#
# Until that version is released you may have to apply the patch
# yourself - yes, all 6 characters of it!
QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir
QUIET_SUBDIR1 =
ifneq ($(findstring $(MAKEFLAGS),w),w)
PRINT_DIR = --no-print-directory
else # "make -w"
NO_SUBDIR = :
endif
ifneq ($(findstring $(MAKEFLAGS),s),s)
ifneq ($(V),1)
QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@;
QUIET_XMLTO = @echo ' XMLTO '$@;
endif
endif
all: $(MAN_X) $(MAN_HTML)
$(MAN_HTML) $(MAN_X): asciidoc.conf
install-man: all
$(call QUIET_INSTALL, man) \
$(INSTALL) -d -m 755 $(DESTDIR)$(man3dir); \
$(INSTALL) -m 644 $(MAN_3) $(DESTDIR)$(man3dir); \
$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \
$(INSTALL) -m 644 $(MAN_7) $(DESTDIR)$(man7dir);
install-html:
$(call QUIET_INSTALL, html) \
$(INSTALL) -d -m 755 $(DESTDIR)$(htmldir); \
$(INSTALL) -m 644 $(MAN_HTML) $(DESTDIR)$(htmldir); \
install-examples:
$(call QUIET_INSTALL, examples) \
$(INSTALL) -d -m 755 $(DESTDIR)$(exdir); \
$(INSTALL) -m 644 $(MAN_EX) $(DESTDIR)$(exdir); \
CLEAN_FILES = \
$(MAN_XML) $(addsuffix +,$(MAN_XML)) \
$(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \
$(MAN_X)
clean:
$(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES)
$(MAN_3): $(OUTPUT)%.3: %.xml
$(QUIET_XMLTO)$(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
$(MAN_7): $(OUTPUT)%.7: %.xml
$(QUIET_XMLTO)$(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
$(MAN_XML): $(OUTPUT)%.xml: %.txt
$(QUIET_ASCIIDOC)$(ASCIIDOC) -b docbook -d manpage \
$(ASCIIDOC_EXTRA) -alibperf_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \
mv $@+ $@
$(MAN_HTML): $(OUTPUT)%.html: %.txt
$(QUIET_ASCIIDOC)$(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \
$(ASCIIDOC_EXTRA) -aperf_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \
mv $@+ $@
## linktep: macro
#
# Usage: linktep:command[manpage-section]
#
# Note, {0} is the manpage section, while {target} is the command.
#
# Show TEP link as: <command>(<section>); if section is defined, else just show
# the command.
[macros]
(?su)[\\]?(?P<name>linktep):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
[attributes]
asterisk=&#42;
plus=&#43;
caret=&#94;
startsb=&#91;
endsb=&#93;
tilde=&#126;
ifdef::backend-docbook[]
[linktep-inlinemacro]
{0%{target}}
{0#<citerefentry>}
{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>}
{0#</citerefentry>}
endif::backend-docbook[]
ifdef::backend-docbook[]
ifndef::tep-asciidoc-no-roff[]
# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this.
# v1.72 breaks with this because it replaces dots not in roff requests.
[listingblock]
<example><title>{title}</title>
<literallayout>
ifdef::doctype-manpage[]
&#10;.ft C&#10;
endif::doctype-manpage[]
|
ifdef::doctype-manpage[]
&#10;.ft&#10;
endif::doctype-manpage[]
</literallayout>
{title#}</example>
endif::tep-asciidoc-no-roff[]
ifdef::tep-asciidoc-no-roff[]
ifdef::doctype-manpage[]
# The following two small workarounds insert a simple paragraph after screen
[listingblock]
<example><title>{title}</title>
<literallayout>
|
</literallayout><simpara></simpara>
{title#}</example>
[verseblock]
<formalpara{id? id="{id}"}><title>{title}</title><para>
{title%}<literallayout{id? id="{id}"}>
{title#}<literallayout>
|
</literallayout>
{title#}</para></formalpara>
{title%}<simpara></simpara>
endif::doctype-manpage[]
endif::tep-asciidoc-no-roff[]
endif::backend-docbook[]
ifdef::doctype-manpage[]
ifdef::backend-docbook[]
[header]
template::[header-declarations]
<refentry>
<refmeta>
<refentrytitle>{mantitle}</refentrytitle>
<manvolnum>{manvolnum}</manvolnum>
<refmiscinfo class="source">libperf</refmiscinfo>
<refmiscinfo class="version">{libperf_version}</refmiscinfo>
<refmiscinfo class="manual">libperf Manual</refmiscinfo>
</refmeta>
<refnamediv>
<refname>{manname1}</refname>
<refname>{manname2}</refname>
<refname>{manname3}</refname>
<refname>{manname4}</refname>
<refname>{manname5}</refname>
<refname>{manname6}</refname>
<refname>{manname7}</refname>
<refname>{manname8}</refname>
<refname>{manname9}</refname>
<refname>{manname10}</refname>
<refname>{manname11}</refname>
<refname>{manname12}</refname>
<refname>{manname13}</refname>
<refname>{manname14}</refname>
<refname>{manname15}</refname>
<refname>{manname16}</refname>
<refname>{manname17}</refname>
<refname>{manname18}</refname>
<refname>{manname19}</refname>
<refname>{manname20}</refname>
<refname>{manname21}</refname>
<refname>{manname22}</refname>
<refname>{manname23}</refname>
<refname>{manname24}</refname>
<refname>{manname25}</refname>
<refname>{manname26}</refname>
<refname>{manname27}</refname>
<refname>{manname28}</refname>
<refname>{manname29}</refname>
<refname>{manname30}</refname>
<refpurpose>{manpurpose}</refpurpose>
</refnamediv>
endif::backend-docbook[]
endif::doctype-manpage[]
ifdef::backend-xhtml11[]
[linktep-inlinemacro]
<a href="{target}.html">{target}{0?({0})}</a>
endif::backend-xhtml11[]
#include <linux/perf_event.h>
#include <perf/evlist.h>
#include <perf/evsel.h>
#include <perf/cpumap.h>
#include <perf/threadmap.h>
#include <perf/mmap.h>
#include <perf/core.h>
#include <perf/event.h>
#include <stdio.h>
#include <unistd.h>
static int libperf_print(enum libperf_print_level level,
const char *fmt, va_list ap)
{
return vfprintf(stderr, fmt, ap);
}
union u64_swap {
__u64 val64;
__u32 val32[2];
};
int main(int argc, char **argv)
{
struct perf_evlist *evlist;
struct perf_evsel *evsel;
struct perf_mmap *map;
struct perf_cpu_map *cpus;
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
.disabled = 1,
.freq = 1,
.sample_freq = 10,
.sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_CPU|PERF_SAMPLE_PERIOD,
};
int err = -1;
union perf_event *event;
libperf_init(libperf_print);
cpus = perf_cpu_map__new(NULL);
if (!cpus) {
fprintf(stderr, "failed to create cpus\n");
return -1;
}
evlist = perf_evlist__new();
if (!evlist) {
fprintf(stderr, "failed to create evlist\n");
goto out_cpus;
}
evsel = perf_evsel__new(&attr);
if (!evsel) {
fprintf(stderr, "failed to create cycles\n");
goto out_cpus;
}
perf_evlist__add(evlist, evsel);
perf_evlist__set_maps(evlist, cpus, NULL);
err = perf_evlist__open(evlist);
if (err) {
fprintf(stderr, "failed to open evlist\n");
goto out_evlist;
}
err = perf_evlist__mmap(evlist, 4);
if (err) {
fprintf(stderr, "failed to mmap evlist\n");
goto out_evlist;
}
perf_evlist__enable(evlist);
sleep(3);
perf_evlist__disable(evlist);
perf_evlist__for_each_mmap(evlist, map, false) {
if (perf_mmap__read_init(map) < 0)
continue;
while ((event = perf_mmap__read_event(map)) != NULL) {
int cpu, pid, tid;
__u64 ip, period, *array;
union u64_swap u;
array = event->sample.array;
ip = *array;
array++;
u.val64 = *array;
pid = u.val32[0];
tid = u.val32[1];
array++;
u.val64 = *array;
cpu = u.val32[0];
array++;
period = *array;
fprintf(stdout, "cpu %3d, pid %6d, tid %6d, ip %20llx, period %20llu\n",
cpu, pid, tid, ip, period);
perf_mmap__consume(map);
}
perf_mmap__read_done(map);
}
out_evlist:
perf_evlist__delete(evlist);
out_cpus:
perf_cpu_map__put(cpus);
return err;
}
libperf-counting(7)
===================
NAME
----
libperf-counting - counting interface
DESCRIPTION
-----------
The counting interface provides API to meassure and get count for specific perf events.
The following test tries to explain count on `counting.c` example.
It is by no means complete guide to counting, but shows libperf basic API for counting.
The `counting.c` comes with libbperf package and can be compiled and run like:
[source,bash]
--
$ gcc -o counting counting.c -lperf
$ sudo ./counting
count 176792, enabled 176944, run 176944
count 176242, enabled 176242, run 176242
--
It requires root access, because of the `PERF_COUNT_SW_CPU_CLOCK` event,
which is available only for root.
The `counting.c` example monitors two events on the current process and displays their count, in a nutshel it:
* creates events
* adds them to the event list
* opens and enables events through the event list
* does some workload
* disables events
* reads and displays event counts
* destroys the event list
The first thing you need to do before using libperf is to call init function:
[source,c]
--
8 static int libperf_print(enum libperf_print_level level,
9 const char *fmt, va_list ap)
10 {
11 return vfprintf(stderr, fmt, ap);
12 }
14 int main(int argc, char **argv)
15 {
...
35 libperf_init(libperf_print);
--
It will setup the library and sets function for debug output from library.
The `libperf_print` callback will receive any message with its debug level,
defined as:
[source,c]
--
enum libperf_print_level {
LIBPERF_ERR,
LIBPERF_WARN,
LIBPERF_INFO,
LIBPERF_DEBUG,
LIBPERF_DEBUG2,
LIBPERF_DEBUG3,
};
--
Once the setup is complete we start by defining specific events using the `struct perf_event_attr`.
We create software events for cpu and task:
[source,c]
--
20 struct perf_event_attr attr1 = {
21 .type = PERF_TYPE_SOFTWARE,
22 .config = PERF_COUNT_SW_CPU_CLOCK,
23 .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING,
24 .disabled = 1,
25 };
26 struct perf_event_attr attr2 = {
27 .type = PERF_TYPE_SOFTWARE,
28 .config = PERF_COUNT_SW_TASK_CLOCK,
29 .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING,
30 .disabled = 1,
31 };
--
The `read_format` setup tells perf to include timing details together with each count.
Next step is to prepare threads map.
In this case we will monitor current process, so we create threads map with single pid (0):
[source,c]
--
37 threads = perf_thread_map__new_dummy();
38 if (!threads) {
39 fprintf(stderr, "failed to create threads\n");
40 return -1;
41 }
42
43 perf_thread_map__set_pid(threads, 0, 0);
--
Now we create libperf's event list, which will serve as holder for the events we want:
[source,c]
--
45 evlist = perf_evlist__new();
46 if (!evlist) {
47 fprintf(stderr, "failed to create evlist\n");
48 goto out_threads;
49 }
--
We create libperf's events for the attributes we defined earlier and add them to the list:
[source,c]
--
51 evsel = perf_evsel__new(&attr1);
52 if (!evsel) {
53 fprintf(stderr, "failed to create evsel1\n");
54 goto out_evlist;
55 }
56
57 perf_evlist__add(evlist, evsel);
58
59 evsel = perf_evsel__new(&attr2);
60 if (!evsel) {
61 fprintf(stderr, "failed to create evsel2\n");
62 goto out_evlist;
63 }
64
65 perf_evlist__add(evlist, evsel);
--
Configure event list with the thread map and open events:
[source,c]
--
67 perf_evlist__set_maps(evlist, NULL, threads);
68
69 err = perf_evlist__open(evlist);
70 if (err) {
71 fprintf(stderr, "failed to open evsel\n");
72 goto out_evlist;
73 }
--
Both events are created as disabled (note the `disabled = 1` assignment above),
so we need to enable the whole list explicitely (both events).
From this moment events are counting and we can do our workload.
When we are done we disable the events list.
[source,c]
--
75 perf_evlist__enable(evlist);
76
77 while (count--);
78
79 perf_evlist__disable(evlist);
--
Now we need to get the counts from events, following code iterates throught the events list and read counts:
[source,c]
--
81 perf_evlist__for_each_evsel(evlist, evsel) {
82 perf_evsel__read(evsel, 0, 0, &counts);
83 fprintf(stdout, "count %llu, enabled %llu, run %llu\n",
84 counts.val, counts.ena, counts.run);
85 }
--
And finaly cleanup.
We close the whole events list (both events) and remove it together with the threads map:
[source,c]
--
87 perf_evlist__close(evlist);
88
89 out_evlist:
90 perf_evlist__delete(evlist);
91 out_threads:
92 perf_thread_map__put(threads);
93 return err;
94 }
--
REPORTING BUGS
--------------
Report bugs to <linux-perf-users@vger.kernel.org>.
LICENSE
-------
libperf is Free Software licensed under the GNU LGPL 2.1
RESOURCES
---------
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
SEE ALSO
--------
libperf(3), libperf-sampling(7)
libperf-sampling(7)
===================
NAME
----
libperf-sampling - sampling interface
DESCRIPTION
-----------
The sampling interface provides API to meassure and get count for specific perf events.
The following test tries to explain count on `sampling.c` example.
It is by no means complete guide to sampling, but shows libperf basic API for sampling.
The `sampling.c` comes with libbperf package and can be compiled and run like:
[source,bash]
--
$ gcc -o sampling sampling.c -lperf
$ sudo ./sampling
cpu 0, pid 0, tid 0, ip ffffffffad06c4e6, period 1
cpu 0, pid 4465, tid 4469, ip ffffffffad118748, period 18322959
cpu 0, pid 0, tid 0, ip ffffffffad115722, period 33544846
cpu 0, pid 4465, tid 4470, ip 7f84fe0cdad6, period 23687474
cpu 0, pid 0, tid 0, ip ffffffffad9e0349, period 34255790
cpu 0, pid 4465, tid 4469, ip ffffffffad136581, period 38664069
cpu 0, pid 0, tid 0, ip ffffffffad9e55e2, period 21922384
cpu 0, pid 4465, tid 4470, ip 7f84fe0ebebf, period 17655175
...
--
It requires root access, because it uses hardware cycles event.
The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a nutshel it:
- creates events
- adds them to the event list
- opens and enables events through the event list
- sleeps for 3 seconds
- disables events
- reads and displays recorded samples
- destroys the event list
The first thing you need to do before using libperf is to call init function:
[source,c]
--
12 static int libperf_print(enum libperf_print_level level,
13 const char *fmt, va_list ap)
14 {
15 return vfprintf(stderr, fmt, ap);
16 }
23 int main(int argc, char **argv)
24 {
...
40 libperf_init(libperf_print);
--
It will setup the library and sets function for debug output from library.
The `libperf_print` callback will receive any message with its debug level,
defined as:
[source,c]
--
enum libperf_print_level {
LIBPERF_ERR,
LIBPERF_WARN,
LIBPERF_INFO,
LIBPERF_DEBUG,
LIBPERF_DEBUG2,
LIBPERF_DEBUG3,
};
--
Once the setup is complete we start by defining cycles event using the `struct perf_event_attr`:
[source,c]
--
29 struct perf_event_attr attr = {
30 .type = PERF_TYPE_HARDWARE,
31 .config = PERF_COUNT_HW_CPU_CYCLES,
32 .disabled = 1,
33 .freq = 1,
34 .sample_freq = 10,
35 .sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_CPU|PERF_SAMPLE_PERIOD,
36 };
--
Next step is to prepare cpus map.
In this case we will monitor all the available CPUs:
[source,c]
--
42 cpus = perf_cpu_map__new(NULL);
43 if (!cpus) {
44 fprintf(stderr, "failed to create cpus\n");
45 return -1;
46 }
--
Now we create libperf's event list, which will serve as holder for the cycles event:
[source,c]
--
48 evlist = perf_evlist__new();
49 if (!evlist) {
50 fprintf(stderr, "failed to create evlist\n");
51 goto out_cpus;
52 }
--
We create libperf's event for the cycles attribute we defined earlier and add it to the list:
[source,c]
--
54 evsel = perf_evsel__new(&attr);
55 if (!evsel) {
56 fprintf(stderr, "failed to create cycles\n");
57 goto out_cpus;
58 }
59
60 perf_evlist__add(evlist, evsel);
--
Configure event list with the cpus map and open event:
[source,c]
--
62 perf_evlist__set_maps(evlist, cpus, NULL);
63
64 err = perf_evlist__open(evlist);
65 if (err) {
66 fprintf(stderr, "failed to open evlist\n");
67 goto out_evlist;
68 }
--
Once the events list is open, we can create memory maps AKA perf ring buffers:
[source,c]
--
70 err = perf_evlist__mmap(evlist, 4);
71 if (err) {
72 fprintf(stderr, "failed to mmap evlist\n");
73 goto out_evlist;
74 }
--
The event is created as disabled (note the `disabled = 1` assignment above),
so we need to enable the events list explicitely.
From this moment the cycles event is sampling.
We will sleep for 3 seconds while the ring buffers get data from all CPUs, then we disable the events list.
[source,c]
--
76 perf_evlist__enable(evlist);
77 sleep(3);
78 perf_evlist__disable(evlist);
--
Following code walks through the ring buffers and reads stored events/samples:
[source,c]
--
80 perf_evlist__for_each_mmap(evlist, map, false) {
81 if (perf_mmap__read_init(map) < 0)
82 continue;
83
84 while ((event = perf_mmap__read_event(map)) != NULL) {
/* process event */
108 perf_mmap__consume(map);
109 }
110 perf_mmap__read_done(map);
111 }
--
Each sample needs to get parsed:
[source,c]
--
85 int cpu, pid, tid;
86 __u64 ip, period, *array;
87 union u64_swap u;
88
89 array = event->sample.array;
90
91 ip = *array;
92 array++;
93
94 u.val64 = *array;
95 pid = u.val32[0];
96 tid = u.val32[1];
97 array++;
98
99 u.val64 = *array;
100 cpu = u.val32[0];
101 array++;
102
103 period = *array;
104
105 fprintf(stdout, "cpu %3d, pid %6d, tid %6d, ip %20llx, period %20llu\n",
106 cpu, pid, tid, ip, period);
--
And finaly cleanup.
We close the whole events list (both events) and remove it together with the threads map:
[source,c]
--
113 out_evlist:
114 perf_evlist__delete(evlist);
115 out_cpus:
116 perf_cpu_map__put(cpus);
117 return err;
118 }
--
REPORTING BUGS
--------------
Report bugs to <linux-perf-users@vger.kernel.org>.
LICENSE
-------
libperf is Free Software licensed under the GNU LGPL 2.1
RESOURCES
---------
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
SEE ALSO
--------
libperf(3), libperf-counting(7)
libperf(3)
==========
NAME
----
libperf - Linux kernel perf event library
SYNOPSIS
--------
*Generic API:*
[source,c]
--
#include <perf/core.h>
enum libperf_print_level {
LIBPERF_ERR,
LIBPERF_WARN,
LIBPERF_INFO,
LIBPERF_DEBUG,
LIBPERF_DEBUG2,
LIBPERF_DEBUG3,
};
typedef int (*libperf_print_fn_t)(enum libperf_print_level level,
const char *, va_list ap);
void libperf_init(libperf_print_fn_t fn);
--
*API to handle cpu maps:*
[source,c]
--
#include <perf/cpumap.h>
struct perf_cpu_map;
struct perf_cpu_map *perf_cpu_map__dummy_new(void);
struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
struct perf_cpu_map *perf_cpu_map__read(FILE *file);
struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other);
void perf_cpu_map__put(struct perf_cpu_map *map);
int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
bool perf_cpu_map__empty(const struct perf_cpu_map *map);
int perf_cpu_map__max(struct perf_cpu_map *map);
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus)
--
*API to handle thread maps:*
[source,c]
--
#include <perf/threadmap.h>
struct perf_thread_map;
struct perf_thread_map *perf_thread_map__new_dummy(void);
void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid);
char *perf_thread_map__comm(struct perf_thread_map *map, int thread);
int perf_thread_map__nr(struct perf_thread_map *threads);
pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread);
struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map);
void perf_thread_map__put(struct perf_thread_map *map);
--
*API to handle event lists:*
[source,c]
--
#include <perf/evlist.h>
struct perf_evlist;
void perf_evlist__add(struct perf_evlist *evlist,
struct perf_evsel *evsel);
void perf_evlist__remove(struct perf_evlist *evlist,
struct perf_evsel *evsel);
struct perf_evlist *perf_evlist__new(void);
void perf_evlist__delete(struct perf_evlist *evlist);
struct perf_evsel* perf_evlist__next(struct perf_evlist *evlist,
struct perf_evsel *evsel);
int perf_evlist__open(struct perf_evlist *evlist);
void perf_evlist__close(struct perf_evlist *evlist);
void perf_evlist__enable(struct perf_evlist *evlist);
void perf_evlist__disable(struct perf_evlist *evlist);
#define perf_evlist__for_each_evsel(evlist, pos)
void perf_evlist__set_maps(struct perf_evlist *evlist,
struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
int perf_evlist__poll(struct perf_evlist *evlist, int timeout);
int perf_evlist__filter_pollfd(struct perf_evlist *evlist,
short revents_and_mask);
int perf_evlist__mmap(struct perf_evlist *evlist, int pages);
void perf_evlist__munmap(struct perf_evlist *evlist);
struct perf_mmap *perf_evlist__next_mmap(struct perf_evlist *evlist,
struct perf_mmap *map,
bool overwrite);
#define perf_evlist__for_each_mmap(evlist, pos, overwrite)
--
*API to handle events:*
[source,c]
--
#include <perf/evsel.h>*
struct perf_evsel;
struct perf_counts_values {
union {
struct {
uint64_t val;
uint64_t ena;
uint64_t run;
};
uint64_t values[3];
};
};
struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr);
void perf_evsel__delete(struct perf_evsel *evsel);
int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
void perf_evsel__close(struct perf_evsel *evsel);
void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
struct perf_counts_values *count);
int perf_evsel__enable(struct perf_evsel *evsel);
int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
int perf_evsel__disable(struct perf_evsel *evsel);
int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
--
*API to handle maps (perf ring buffers):*
[source,c]
--
#include <perf/mmap.h>
struct perf_mmap;
void perf_mmap__consume(struct perf_mmap *map);
int perf_mmap__read_init(struct perf_mmap *map);
void perf_mmap__read_done(struct perf_mmap *map);
union perf_event *perf_mmap__read_event(struct perf_mmap *map);
--
*Structures to access perf API events:*
[source,c]
--
#include <perf/event.h>
struct perf_record_mmap;
struct perf_record_mmap2;
struct perf_record_comm;
struct perf_record_namespaces;
struct perf_record_fork;
struct perf_record_lost;
struct perf_record_lost_samples;
struct perf_record_read;
struct perf_record_throttle;
struct perf_record_ksymbol;
struct perf_record_bpf_event;
struct perf_record_sample;
struct perf_record_switch;
struct perf_record_header_attr;
struct perf_record_record_cpu_map;
struct perf_record_cpu_map_data;
struct perf_record_cpu_map;
struct perf_record_event_update_cpus;
struct perf_record_event_update_scale;
struct perf_record_event_update;
struct perf_trace_event_type;
struct perf_record_header_event_type;
struct perf_record_header_tracing_data;
struct perf_record_header_build_id;
struct perf_record_id_index;
struct perf_record_auxtrace_info;
struct perf_record_auxtrace;
struct perf_record_auxtrace_error;
struct perf_record_aux;
struct perf_record_itrace_start;
struct perf_record_thread_map_entry;
struct perf_record_thread_map;
struct perf_record_stat_config_entry;
struct perf_record_stat_config;
struct perf_record_stat;
struct perf_record_stat_round;
struct perf_record_time_conv;
struct perf_record_header_feature;
struct perf_record_compressed;
--
DESCRIPTION
-----------
The libperf library provides an API to access the linux kernel perf
events subsystem.
Following objects are key to the libperf interface:
[horizontal]
struct perf_cpu_map:: Provides a cpu list abstraction.
struct perf_thread_map:: Provides a thread list abstraction.
struct perf_evsel:: Provides an abstraction for single a perf event.
struct perf_evlist:: Gathers several struct perf_evsel object and performs functions on all of them.
struct perf_mmap:: Provides an abstraction for accessing perf ring buffer.
The exported API functions bind these objects together.
REPORTING BUGS
--------------
Report bugs to <linux-perf-users@vger.kernel.org>.
LICENSE
-------
libperf is Free Software licensed under the GNU LGPL 2.1
RESOURCES
---------
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
SEE ALSO
--------
libperf-sampling(7), libperf-counting(7)
<!-- manpage-1.72.xsl:
special settings for manpages rendered from asciidoc+docbook
handles peculiarities in docbook-xsl 1.72.0 -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:import href="manpage-base.xsl"/>
<!-- these are the special values for the roff control characters
needed for docbook-xsl 1.72.0 -->
<xsl:param name="git.docbook.backslash">&#x2593;</xsl:param>
<xsl:param name="git.docbook.dot" >&#x2302;</xsl:param>
</xsl:stylesheet>
<!-- manpage-base.xsl:
special formatting for manpages rendered from asciidoc+docbook -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<!-- these params silence some output from xmlto -->
<xsl:param name="man.output.quietly" select="1"/>
<xsl:param name="refentry.meta.get.quietly" select="1"/>
<!-- convert asciidoc callouts to man page format;
git.docbook.backslash and git.docbook.dot params
must be supplied by another XSL file or other means -->
<xsl:template match="co">
<xsl:value-of select="concat(
$git.docbook.backslash,'fB(',
substring-after(@id,'-'),')',
$git.docbook.backslash,'fR')"/>
</xsl:template>
<xsl:template match="calloutlist">
<xsl:value-of select="$git.docbook.dot"/>
<xsl:text>sp&#10;</xsl:text>
<xsl:apply-templates/>
<xsl:text>&#10;</xsl:text>
</xsl:template>
<xsl:template match="callout">
<xsl:value-of select="concat(
$git.docbook.backslash,'fB',
substring-after(@arearefs,'-'),
'. ',$git.docbook.backslash,'fR')"/>
<xsl:apply-templates/>
<xsl:value-of select="$git.docbook.dot"/>
<xsl:text>br&#10;</xsl:text>
</xsl:template>
</xsl:stylesheet>
<!-- manpage-bold-literal.xsl:
special formatting for manpages rendered from asciidoc+docbook -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<!-- render literal text as bold (instead of plain or monospace);
this makes literal text easier to distinguish in manpages
viewed on a tty -->
<xsl:template match="literal">
<xsl:value-of select="$git.docbook.backslash"/>
<xsl:text>fB</xsl:text>
<xsl:apply-templates/>
<xsl:value-of select="$git.docbook.backslash"/>
<xsl:text>fR</xsl:text>
</xsl:template>
</xsl:stylesheet>
<!-- manpage-normal.xsl:
special settings for manpages rendered from asciidoc+docbook
handles anything we want to keep away from docbook-xsl 1.72.0 -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:import href="manpage-base.xsl"/>
<!-- these are the normal values for the roff control characters -->
<xsl:param name="git.docbook.backslash">\</xsl:param>
<xsl:param name="git.docbook.dot" >.</xsl:param>
</xsl:stylesheet>
<!-- manpage-suppress-sp.xsl:
special settings for manpages rendered from asciidoc+docbook
handles erroneous, inline .sp in manpage output of some
versions of docbook-xsl -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<!-- attempt to work around spurious .sp at the tail of the line
that some versions of docbook stylesheets seem to add -->
<xsl:template match="simpara">
<xsl:variable name="content">
<xsl:apply-templates/>
</xsl:variable>
<xsl:value-of select="normalize-space($content)"/>
<xsl:if test="not(ancestor::authorblurb) and
not(ancestor::personblurb)">
<xsl:text>&#10;&#10;</xsl:text>
</xsl:if>
</xsl:template>
</xsl:stylesheet>
......@@ -60,7 +60,7 @@ else
endif
INCLUDES = \
-I$(srctree)/tools/perf/lib/include \
-I$(srctree)/tools/lib/perf/include \
-I$(srctree)/tools/lib/ \
-I$(srctree)/tools/include \
-I$(srctree)/tools/arch/$(SRCARCH)/include/ \
......@@ -181,7 +181,10 @@ install_pkgconfig: $(LIBPERF_PC)
$(call QUIET_INSTALL, $(LIBPERF_PC)) \
$(call do_install,$(LIBPERF_PC),$(libdir_SQ)/pkgconfig,644)
install: install_lib install_headers install_pkgconfig
install_doc:
$(Q)$(MAKE) -C Documentation install-man install-html install-examples
install: install_lib install_headers install_pkgconfig install_doc
FORCE:
......
......@@ -16,7 +16,7 @@ all:
include $(srctree)/tools/scripts/Makefile.include
INCLUDE = -I$(srctree)/tools/perf/lib/include -I$(srctree)/tools/include -I$(srctree)/tools/lib
INCLUDE = -I$(srctree)/tools/lib/perf/include -I$(srctree)/tools/include -I$(srctree)/tools/lib
$(TESTS_A): FORCE
$(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -o $@ $(subst -a,.c,$@) ../libperf.a $(LIBAPI)
......
......@@ -96,6 +96,10 @@ int strtobool(const char *s, bool *res)
* If libc has strlcpy() then that version will override this
* implementation:
*/
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wignored-attributes"
#endif
size_t __weak strlcpy(char *dest, const char *src, size_t size)
{
size_t ret = strlen(src);
......@@ -107,6 +111,9 @@ size_t __weak strlcpy(char *dest, const char *src, size_t size)
}
return ret;
}
#ifdef __clang__
#pragma clang diagnostic pop
#endif
/**
* skip_spaces - Removes leading whitespace from @str.
......
......@@ -1228,8 +1228,10 @@ filter_event(struct tep_event_filter *filter, struct tep_event *event,
}
filter_type = add_filter_type(filter, event->id);
if (filter_type == NULL)
if (filter_type == NULL) {
free_arg(arg);
return TEP_ERRNO__MEM_ALLOC_FAILED;
}
if (filter_type->filter)
free_arg(filter_type->filter);
......
......@@ -110,6 +110,10 @@ OPTIONS for 'perf sched timehist'
--max-stack::
Maximum number of functions to display in backtrace, default 5.
-C=::
--cpu=::
Only show events for the given CPU(s) (comma separated list).
-p=::
--pid=::
Only show events for given process ID (comma separated list).
......
......@@ -7,6 +7,7 @@ tools/lib/traceevent
tools/lib/api
tools/lib/bpf
tools/lib/subcmd
tools/lib/perf
tools/lib/argv_split.c
tools/lib/ctype.c
tools/lib/hweight.c
......
......@@ -286,7 +286,7 @@ ifeq ($(DEBUG),0)
endif
endif
INC_FLAGS += -I$(src-perf)/lib/include
INC_FLAGS += -I$(srctree)/tools/lib/perf/include
INC_FLAGS += -I$(src-perf)/util/include
INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include
INC_FLAGS += -I$(srctree)/tools/include/
......
......@@ -230,7 +230,7 @@ LIB_DIR = $(srctree)/tools/lib/api/
TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
BPF_DIR = $(srctree)/tools/lib/bpf/
SUBCMD_DIR = $(srctree)/tools/lib/subcmd/
LIBPERF_DIR = $(srctree)/tools/perf/lib/
LIBPERF_DIR = $(srctree)/tools/lib/perf/
# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed.
# Without this setting the output feature dump file misses some features, for
......
......@@ -2384,7 +2384,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
c2c_browser__update_nr_entries(browser);
while (1) {
key = hist_browser__run(browser, "? - help", true);
key = hist_browser__run(browser, "? - help", true, 0);
switch (key) {
case 's':
......@@ -2453,7 +2453,7 @@ static int perf_c2c__hists_browse(struct hists *hists)
c2c_browser__update_nr_entries(browser);
while (1) {
key = hist_browser__run(browser, "? - help", true);
key = hist_browser__run(browser, "? - help", true, 0);
switch (key) {
case 'q':
......
......@@ -62,6 +62,7 @@
#include <linux/string.h>
#include <linux/time64.h>
#include <linux/zalloc.h>
#include <linux/bitmap.h>
struct switch_output {
bool enabled;
......@@ -93,7 +94,7 @@ struct record {
bool timestamp_boundary;
struct switch_output switch_output;
unsigned long long samples;
cpu_set_t affinity_mask;
struct mmap_cpu_mask affinity_mask;
unsigned long output_max_size; /* = 0: unlimited */
};
......@@ -961,10 +962,15 @@ static struct perf_event_header finished_round_event = {
static void record__adjust_affinity(struct record *rec, struct mmap *map)
{
if (rec->opts.affinity != PERF_AFFINITY_SYS &&
!CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
CPU_ZERO(&rec->affinity_mask);
CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
!bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits,
rec->affinity_mask.nbits)) {
bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits);
bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits,
map->affinity_mask.bits, rec->affinity_mask.nbits);
sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask),
(cpu_set_t *)rec->affinity_mask.bits);
if (verbose == 2)
mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread");
}
}
......@@ -2433,7 +2439,6 @@ int cmd_record(int argc, const char **argv)
# undef REASON
#endif
CPU_ZERO(&rec->affinity_mask);
rec->opts.affinity = PERF_AFFINITY_SYS;
rec->evlist = evlist__new();
......@@ -2499,6 +2504,16 @@ int cmd_record(int argc, const char **argv)
symbol__init(NULL);
if (rec->opts.affinity != PERF_AFFINITY_SYS) {
rec->affinity_mask.nbits = cpu__max_cpu();
rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
if (!rec->affinity_mask.bits) {
pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
return -ENOMEM;
}
pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
}
err = record__auxtrace_init(rec);
if (err)
goto out;
......@@ -2613,6 +2628,7 @@ int cmd_record(int argc, const char **argv)
err = __cmd_record(&record, argc, argv);
out:
bitmap_free(rec->affinity_mask.bits);
evlist__delete(rec->evlist);
symbol__exit();
auxtrace_record__free(rec->itr);
......
......@@ -1076,6 +1076,7 @@ int cmd_report(int argc, const char **argv)
struct stat st;
bool has_br_stack = false;
int branch_mode = -1;
int last_key = 0;
bool branch_call_mode = false;
#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
static const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
......@@ -1450,7 +1451,8 @@ int cmd_report(int argc, const char **argv)
sort_order = sort_tmp;
}
if (setup_sorting(session->evlist) < 0) {
if ((last_key != K_SWITCH_INPUT_DATA) &&
(setup_sorting(session->evlist) < 0)) {
if (sort_order)
parse_options_usage(report_usage, options, "s", 1);
if (field_order)
......@@ -1530,6 +1532,7 @@ int cmd_report(int argc, const char **argv)
ret = __cmd_report(&report);
if (ret == K_SWITCH_INPUT_DATA) {
perf_session__delete(session);
last_key = K_SWITCH_INPUT_DATA;
goto repeat;
} else
ret = 0;
......
......@@ -51,6 +51,9 @@
#define SYM_LEN 129
#define MAX_PID 1024000
static const char *cpu_list;
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
struct sched_atom;
struct task_desc {
......@@ -2008,6 +2011,9 @@ static void timehist_print_sample(struct perf_sched *sched,
char nstr[30];
u64 wait_time;
if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
return;
timestamp__scnprintf_usec(t, tstr, sizeof(tstr));
printf("%15s [%04d] ", tstr, sample->cpu);
......@@ -2994,6 +3000,12 @@ static int perf_sched__timehist(struct perf_sched *sched)
if (IS_ERR(session))
return PTR_ERR(session);
if (cpu_list) {
err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
if (err < 0)
goto out;
}
evlist = session->evlist;
symbol__init(&session->header.env);
......@@ -3429,6 +3441,7 @@ int cmd_sched(int argc, const char **argv)
"analyze events only for given process id(s)"),
OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
"analyze events only for given thread id(s)"),
OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
OPT_PARENT(sched_options)
};
......
all:
rst2man man/libperf.rst > man/libperf.7
rst2pdf tutorial/tutorial.rst
clean:
rm -f man/libperf.7
rm -f tutorial/tutorial.pdf
.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
libperf
The libperf library provides an API to access the linux kernel perf
events subsystem. It provides the following high level objects:
- struct perf_cpu_map
- struct perf_thread_map
- struct perf_evlist
- struct perf_evsel
reference
=========
Function reference by header files:
perf/core.h
-----------
.. code-block:: c
typedef int (\*libperf_print_fn_t)(enum libperf_print_level level,
const char \*, va_list ap);
void libperf_set_print(libperf_print_fn_t fn);
perf/cpumap.h
-------------
.. code-block:: c
struct perf_cpu_map \*perf_cpu_map__dummy_new(void);
struct perf_cpu_map \*perf_cpu_map__new(const char \*cpu_list);
struct perf_cpu_map \*perf_cpu_map__read(FILE \*file);
struct perf_cpu_map \*perf_cpu_map__get(struct perf_cpu_map \*map);
void perf_cpu_map__put(struct perf_cpu_map \*map);
int perf_cpu_map__cpu(const struct perf_cpu_map \*cpus, int idx);
int perf_cpu_map__nr(const struct perf_cpu_map \*cpus);
perf_cpu_map__for_each_cpu(cpu, idx, cpus)
perf/threadmap.h
----------------
.. code-block:: c
struct perf_thread_map \*perf_thread_map__new_dummy(void);
void perf_thread_map__set_pid(struct perf_thread_map \*map, int thread, pid_t pid);
char \*perf_thread_map__comm(struct perf_thread_map \*map, int thread);
struct perf_thread_map \*perf_thread_map__get(struct perf_thread_map \*map);
void perf_thread_map__put(struct perf_thread_map \*map);
perf/evlist.h
-------------
.. code-block::
void perf_evlist__init(struct perf_evlist \*evlist);
void perf_evlist__add(struct perf_evlist \*evlist,
struct perf_evsel \*evsel);
void perf_evlist__remove(struct perf_evlist \*evlist,
struct perf_evsel \*evsel);
struct perf_evlist \*perf_evlist__new(void);
void perf_evlist__delete(struct perf_evlist \*evlist);
struct perf_evsel\* perf_evlist__next(struct perf_evlist \*evlist,
struct perf_evsel \*evsel);
int perf_evlist__open(struct perf_evlist \*evlist);
void perf_evlist__close(struct perf_evlist \*evlist);
void perf_evlist__enable(struct perf_evlist \*evlist);
void perf_evlist__disable(struct perf_evlist \*evlist);
perf_evlist__for_each_evsel(evlist, pos)
void perf_evlist__set_maps(struct perf_evlist \*evlist,
struct perf_cpu_map \*cpus,
struct perf_thread_map \*threads);
perf/evsel.h
------------
.. code-block:: c
struct perf_counts_values {
union {
struct {
uint64_t val;
uint64_t ena;
uint64_t run;
};
uint64_t values[3];
};
};
void perf_evsel__init(struct perf_evsel \*evsel,
struct perf_event_attr \*attr);
struct perf_evsel \*perf_evsel__new(struct perf_event_attr \*attr);
void perf_evsel__delete(struct perf_evsel \*evsel);
int perf_evsel__open(struct perf_evsel \*evsel, struct perf_cpu_map \*cpus,
struct perf_thread_map \*threads);
void perf_evsel__close(struct perf_evsel \*evsel);
int perf_evsel__read(struct perf_evsel \*evsel, int cpu, int thread,
struct perf_counts_values \*count);
int perf_evsel__enable(struct perf_evsel \*evsel);
int perf_evsel__disable(struct perf_evsel \*evsel);
int perf_evsel__apply_filter(struct perf_evsel \*evsel, const char \*filter);
struct perf_cpu_map \*perf_evsel__cpus(struct perf_evsel \*evsel);
struct perf_thread_map \*perf_evsel__threads(struct perf_evsel \*evsel);
struct perf_event_attr \*perf_evsel__attr(struct perf_evsel \*evsel);
.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
libperf tutorial
================
Compile and install libperf from kernel sources
===============================================
.. code-block:: bash
git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
cd linux/tools/perf/lib
make
sudo make install prefix=/usr
Libperf object
==============
The libperf library provides several high level objects:
struct perf_cpu_map
Provides a cpu list abstraction.
struct perf_thread_map
Provides a thread list abstraction.
struct perf_evsel
Provides an abstraction for single a perf event.
struct perf_evlist
Gathers several struct perf_evsel object and performs functions on all of them.
The exported API binds these objects together,
for full reference see the libperf.7 man page.
Examples
========
Examples aim to explain libperf functionality on simple use cases.
They are based in on a checked out linux kernel git tree:
.. code-block:: bash
$ cd tools/perf/lib/Documentation/tutorial/
$ ls -d ex-*
ex-1-compile ex-2-evsel-stat ex-3-evlist-stat
ex-1-compile example
====================
This example shows the basic usage of *struct perf_cpu_map*,
how to create it and display its cpus:
.. code-block:: bash
$ cd ex-1-compile/
$ make
gcc -o test test.c -lperf
$ ./test
0 1 2 3 4 5 6 7
The full code listing is here:
.. code-block:: c
1 #include <perf/cpumap.h>
2
3 int main(int argc, char **Argv)
4 {
5 struct perf_cpu_map *cpus;
6 int cpu, tmp;
7
8 cpus = perf_cpu_map__new(NULL);
9
10 perf_cpu_map__for_each_cpu(cpu, tmp, cpus)
11 fprintf(stdout, "%d ", cpu);
12
13 fprintf(stdout, "\n");
14
15 perf_cpu_map__put(cpus);
16 return 0;
17 }
First you need to include the proper header to have *struct perf_cpumap*
declaration and functions:
.. code-block:: c
1 #include <perf/cpumap.h>
The *struct perf_cpumap* object is created by *perf_cpu_map__new* call.
The *NULL* argument asks it to populate the object with the current online CPUs list:
.. code-block:: c
8 cpus = perf_cpu_map__new(NULL);
This is paired with a *perf_cpu_map__put*, that drops its reference at the end, possibly deleting it.
.. code-block:: c
15 perf_cpu_map__put(cpus);
The iteration through the *struct perf_cpumap* CPUs is done using the *perf_cpu_map__for_each_cpu*
macro which requires 3 arguments:
- cpu - the cpu numer
- tmp - iteration helper variable
- cpus - the *struct perf_cpumap* object
.. code-block:: c
10 perf_cpu_map__for_each_cpu(cpu, tmp, cpus)
11 fprintf(stdout, "%d ", cpu);
ex-2-evsel-stat example
=======================
TBD
ex-3-evlist-stat example
========================
TBD
......@@ -263,20 +263,20 @@ int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused
if (count1 == 11)
pr_debug("failed: RF EFLAG recursion issue detected\n");
else
pr_debug("failed: wrong count for bp1%lld\n", count1);
pr_debug("failed: wrong count for bp1: %lld, expected 1\n", count1);
}
if (overflows != 3)
pr_debug("failed: wrong overflow hit\n");
pr_debug("failed: wrong overflow (%d) hit, expected 3\n", overflows);
if (overflows_2 != 3)
pr_debug("failed: wrong overflow_2 hit\n");
pr_debug("failed: wrong overflow_2 (%d) hit, expected 3\n", overflows_2);
if (count2 != 3)
pr_debug("failed: wrong count for bp2\n");
pr_debug("failed: wrong count for bp2 (%lld), expected 3\n", count2);
if (count3 != 2)
pr_debug("failed: wrong count for bp3\n");
pr_debug("failed: wrong count for bp3 (%lld), expected 2\n", count3);
return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ?
TEST_OK : TEST_FAIL;
......
This diff is collapsed.
......@@ -34,7 +34,7 @@ struct hist_browser {
struct hist_browser *hist_browser__new(struct hists *hists);
void hist_browser__delete(struct hist_browser *browser);
int hist_browser__run(struct hist_browser *browser, const char *help,
bool warn_lost_event);
bool warn_lost_event, int key);
void hist_browser__init(struct hist_browser *browser,
struct hists *hists);
#endif /* _PERF_UI_BROWSER_HISTS_H_ */
......@@ -56,7 +56,7 @@ int res_sample_browse(struct res_sample *res_samples, int num_res,
return -1;
}
}
choice = ui__popup_menu(num_res, names);
choice = ui__popup_menu(num_res, names, NULL);
for (i = 0; i < num_res; i++)
zfree(&names[i]);
free(names);
......
......@@ -126,7 +126,7 @@ static int list_scripts(char *script_name, bool *custom,
SCRIPT_FULLPATH_LEN);
if (num < 0)
num = 0;
choice = ui__popup_menu(num + max_std, (char * const *)names);
choice = ui__popup_menu(num + max_std, (char * const *)names, NULL);
if (choice < 0) {
ret = -1;
goto out;
......
......@@ -23,7 +23,7 @@ static void ui_browser__argv_write(struct ui_browser *browser,
ui_browser__write_nstring(browser, *arg, browser->width);
}
static int popup_menu__run(struct ui_browser *menu)
static int popup_menu__run(struct ui_browser *menu, int *keyp)
{
int key;
......@@ -45,6 +45,11 @@ static int popup_menu__run(struct ui_browser *menu)
key = -1;
break;
default:
if (keyp) {
*keyp = key;
key = menu->nr_entries;
break;
}
continue;
}
......@@ -55,7 +60,7 @@ static int popup_menu__run(struct ui_browser *menu)
return key;
}
int ui__popup_menu(int argc, char * const argv[])
int ui__popup_menu(int argc, char * const argv[], int *keyp)
{
struct ui_browser menu = {
.entries = (void *)argv,
......@@ -64,8 +69,7 @@ int ui__popup_menu(int argc, char * const argv[])
.write = ui_browser__argv_write,
.nr_entries = argc,
};
return popup_menu__run(&menu);
return popup_menu__run(&menu, keyp);
}
int ui_browser__input_window(const char *title, const char *text, char *input,
......
......@@ -5,7 +5,7 @@
#include <stdarg.h>
int ui__getch(int delay_secs);
int ui__popup_menu(int argc, char * const argv[]);
int ui__popup_menu(int argc, char * const argv[], int *keyp);
int ui__help_window(const char *text);
int ui__dialog_yesno(const char *msg);
void __ui__info_window(const char *title, const char *text, const char *exit_msg);
......
......@@ -339,10 +339,10 @@ static inline void perf_hpp__prepend_sort_field(struct perf_hpp_fmt *format)
list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list)
#define hists__for_each_format(hists, format) \
perf_hpp_list__for_each_format((hists)->hpp_list, fmt)
perf_hpp_list__for_each_format((hists)->hpp_list, format)
#define hists__for_each_sort_list(hists, format) \
perf_hpp_list__for_each_sort_list((hists)->hpp_list, fmt)
perf_hpp_list__for_each_sort_list((hists)->hpp_list, format)
extern struct perf_hpp_fmt perf_hpp__format[];
......
......@@ -23,6 +23,18 @@
#include "mmap.h"
#include "../perf.h"
#include <internal/lib.h> /* page_size */
#include <linux/bitmap.h>
#define MASK_SIZE 1023
void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag)
{
char buf[MASK_SIZE + 1];
size_t len;
len = bitmap_scnprintf(mask->bits, mask->nbits, buf, MASK_SIZE);
buf[len] = '\0';
pr_debug("%p: %s mask[%zd]: %s\n", mask, tag, mask->nbits, buf);
}
size_t mmap__mmap_len(struct mmap *map)
{
......@@ -207,6 +219,8 @@ static void perf_mmap__aio_munmap(struct mmap *map __maybe_unused)
void mmap__munmap(struct mmap *map)
{
bitmap_free(map->affinity_mask.bits);
perf_mmap__aio_munmap(map);
if (map->data != NULL) {
munmap(map->data, mmap__mmap_len(map));
......@@ -215,7 +229,7 @@ void mmap__munmap(struct mmap *map)
auxtrace_mmap__munmap(&map->auxtrace_mmap);
}
static void build_node_mask(int node, cpu_set_t *mask)
static void build_node_mask(int node, struct mmap_cpu_mask *mask)
{
int c, cpu, nr_cpus;
const struct perf_cpu_map *cpu_map = NULL;
......@@ -228,17 +242,23 @@ static void build_node_mask(int node, cpu_set_t *mask)
for (c = 0; c < nr_cpus; c++) {
cpu = cpu_map->map[c]; /* map c index to online cpu index */
if (cpu__get_node(cpu) == node)
CPU_SET(cpu, mask);
set_bit(cpu, mask->bits);
}
}
static void perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
{
CPU_ZERO(&map->affinity_mask);
map->affinity_mask.nbits = cpu__max_cpu();
map->affinity_mask.bits = bitmap_alloc(map->affinity_mask.nbits);
if (!map->affinity_mask.bits)
return -1;
if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1)
build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask);
else if (mp->affinity == PERF_AFFINITY_CPU)
CPU_SET(map->core.cpu, &map->affinity_mask);
set_bit(map->core.cpu, map->affinity_mask.bits);
return 0;
}
int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu)
......@@ -249,7 +269,15 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu)
return -1;
}
perf_mmap__setup_affinity_mask(map, mp);
if (mp->affinity != PERF_AFFINITY_SYS &&
perf_mmap__setup_affinity_mask(map, mp)) {
pr_debug2("failed to alloc mmap affinity mask, error %d\n",
errno);
return -1;
}
if (verbose == 2)
mmap_cpu_mask__scnprintf(&map->affinity_mask, "mmap");
map->core.flush = mp->flush;
......
......@@ -15,6 +15,15 @@
#include "event.h"
struct aiocb;
struct mmap_cpu_mask {
unsigned long *bits;
size_t nbits;
};
#define MMAP_CPU_MASK_BYTES(m) \
(BITS_TO_LONGS(((struct mmap_cpu_mask *)m)->nbits) * sizeof(unsigned long))
/**
* struct mmap - perf's ring buffer mmap details
*
......@@ -31,7 +40,7 @@ struct mmap {
int nr_cblocks;
} aio;
#endif
cpu_set_t affinity_mask;
struct mmap_cpu_mask affinity_mask;
void *data;
int comp_level;
};
......@@ -52,4 +61,6 @@ int perf_mmap__push(struct mmap *md, void *to,
size_t mmap__mmap_len(struct mmap *map);
void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag);
#endif /*__PERF_MMAP_H */
......@@ -324,8 +324,7 @@ static int _hist_entry__sym_snprintf(struct map_symbol *ms,
return ret;
}
static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width)
int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width)
{
return _hist_entry__sym_snprintf(&he->ms, he->ip,
he->level, bf, size, width);
......
......@@ -164,6 +164,8 @@ static __pure inline bool hist_entry__has_callchains(struct hist_entry *he)
return he->callchain_size != 0;
}
int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width);
static inline bool hist_entry__has_pairs(struct hist_entry *he)
{
return !list_empty(&he->pairs.node);
......
......@@ -920,6 +920,9 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
if (curr_map == NULL)
return -1;
if (curr_dso->kernel)
map__kmap(curr_map)->kmaps = kmaps;
if (adjust_kernel_syms) {
curr_map->start = shdr->sh_addr + ref_reloc(kmap);
curr_map->end = curr_map->start + shdr->sh_size;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment