Commit dd2281be authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo' of...

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

  - Support Intel PT in several tools, enabling the use of the processor trace
    feature introduced in Intel Broadwell processors: (Adrian Hunter)

	 # dmesg | grep Performance
	 # [0.188477] Performance Events: PEBS fmt2+, 16-deep LBR, Broadwell events, full-width counters, Intel PMU driver.
	 # perf record -e intel_pt//u -a sleep 1
	 [ perf record: Woken up 1 times to write data ]
	 [ perf record: Captured and wrote 0.216 MB perf.data ]
	 # perf script # then navigate in the tool output to some area, like this one:
	 184 1030 dl_main (/usr/lib64/ld-2.17.so) => 7f21ba661440 dl_main (/usr/lib64/ld-2.17.so)
	 185 1457 dl_main (/usr/lib64/ld-2.17.so) => 7f21ba669f10 _dl_new_object (/usr/lib64/ld-2.17.so)
	 186 9f37 _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba677b90 strlen (/usr/lib64/ld-2.17.so)
	 187 7ba3 strlen (/usr/lib64/ld-2.17.so) => 7f21ba677c75 strlen (/usr/lib64/ld-2.17.so)
	 188 7c78 strlen (/usr/lib64/ld-2.17.so) => 7f21ba669f3c _dl_new_object (/usr/lib64/ld-2.17.so)
	 189 9f8a _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba65fab0 calloc@plt (/usr/lib64/ld-2.17.so)
	 190 fab0 calloc@plt (/usr/lib64/ld-2.17.so) => 7f21ba675e70 calloc (/usr/lib64/ld-2.17.so)
	 191 5e87 calloc (/usr/lib64/ld-2.17.so) => 7f21ba65fa90 malloc@plt (/usr/lib64/ld-2.17.so)
	 192 fa90 malloc@plt (/usr/lib64/ld-2.17.so) => 7f21ba675e60 malloc (/usr/lib64/ld-2.17.so)
	 193 5e68 malloc (/usr/lib64/ld-2.17.so) => 7f21ba65fa80 __libc_memalign@plt (/usr/lib64/ld-2.17.so)
	 194 fa80 __libc_memalign@plt (/usr/lib64/ld-2.17.so) => 7f21ba675d50 __libc_memalign (/usr/lib64/ld-2.17.so)
	 195 5d63 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675e20 __libc_memalign (/usr/lib64/ld-2.17.so)
	 196 5e40 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675d73 __libc_memalign (/usr/lib64/ld-2.17.so)
	 197 5d97 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675e18 __libc_memalign (/usr/lib64/ld-2.17.so)
	 198 5e1e __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675df9 __libc_memalign (/usr/lib64/ld-2.17.so)
	 199 5e10 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba669f8f _dl_new_object (/usr/lib64/ld-2.17.so)
	 200 9fc2 _dl_new_object (/usr/lib64/ld-2.17.so) =>  7f21ba678e70 memcpy (/usr/lib64/ld-2.17.so)
	 201 8e8c memcpy (/usr/lib64/ld-2.17.so) => 7f21ba678ea0 memcpy (/usr/lib64/ld-2.17.so)

  - Fix annotation of vdso (Adrian Hunter)

  - Fix DWARF callchains in 'perf script' (Jiri Olsa)

  - Fix adding probes in kernel syscalls and listing which variables can be
    collected at kernel syscall function lines (Masami Hiramatsu)

Build Fixes:

  - Fix 32-bit compilation error in util/annotate.c (Adrian Hunter)

  - Support static linking with libdw on Fedora 22 (Andi Kleen)

Infrastructure changes:

  - Add a helper function to probe whether cpu-wide tracing is possible (Adrian Hunter)

  - Move vfs_getname storage to per thread area in 'perf trace' (Arnaldo Carvalho de Melo)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 40a2ea1b 5efb1d54
...@@ -57,6 +57,8 @@ quiet_cmd_cc_i_c = CPP $@ ...@@ -57,6 +57,8 @@ quiet_cmd_cc_i_c = CPP $@
quiet_cmd_cc_s_c = AS $@ quiet_cmd_cc_s_c = AS $@
cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $< cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $<
quiet_cmd_gen = GEN $@
# Link agregate command # Link agregate command
# If there's nothing to link, create empty $@ object. # If there's nothing to link, create empty $@ object.
quiet_cmd_ld_multi = LD $@ quiet_cmd_ld_multi = LD $@
......
...@@ -70,8 +70,13 @@ test-libelf.bin: ...@@ -70,8 +70,13 @@ test-libelf.bin:
test-glibc.bin: test-glibc.bin:
$(BUILD) $(BUILD)
DWARFLIBS := -ldw
ifeq ($(findstring -static,${LDFLAGS}),-static)
DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
endif
test-dwarf.bin: test-dwarf.bin:
$(BUILD) -ldw $(BUILD) $(DWARFLIBS)
test-libelf-mmap.bin: test-libelf-mmap.bin:
$(BUILD) -lelf $(BUILD) -lelf
......
...@@ -29,3 +29,4 @@ config.mak.autogen ...@@ -29,3 +29,4 @@ config.mak.autogen
*.pyc *.pyc
*.pyo *.pyo
.config-detected .config-detected
util/intel-pt-decoder/inat-tables.c
Intel Processor Trace
=====================
Overview
========
Intel Processor Trace (Intel PT) is an extension of Intel Architecture that
collects information about software execution such as control flow, execution
modes and timings and formats it into highly compressed binary packets.
Technical details are documented in the Intel 64 and IA-32 Architectures
Software Developer Manuals, Chapter 36 Intel Processor Trace.
Intel PT is first supported in Intel Core M and 5th generation Intel Core
processors that are based on the Intel micro-architecture code name Broadwell.
Trace data is collected by 'perf record' and stored within the perf.data file.
See below for options to 'perf record'.
Trace data must be 'decoded' which involves walking the object code and matching
the trace data packets. For example a TNT packet only tells whether a
conditional branch was taken or not taken, so to make use of that packet the
decoder must know precisely which instruction was being executed.
Decoding is done on-the-fly. The decoder outputs samples in the same format as
samples output by perf hardware events, for example as though the "instructions"
or "branches" events had been recorded. Presently 3 tools support this:
'perf script', 'perf report' and 'perf inject'. See below for more information
on using those tools.
The main distinguishing feature of Intel PT is that the decoder can determine
the exact flow of software execution. Intel PT can be used to understand why
and how did software get to a certain point, or behave a certain way. The
software does not have to be recompiled, so Intel PT works with debug or release
builds, however the executed images are needed - which makes use in JIT-compiled
environments, or with self-modified code, a challenge. Also symbols need to be
provided to make sense of addresses.
A limitation of Intel PT is that it produces huge amounts of trace data
(hundreds of megabytes per second per core) which takes a long time to decode,
for example two or three orders of magnitude longer than it took to collect.
Another limitation is the performance impact of tracing, something that will
vary depending on the use-case and architecture.
Quickstart
==========
It is important to start small. That is because it is easy to capture vastly
more data than can possibly be processed.
The simplest thing to do with Intel PT is userspace profiling of small programs.
Data is captured with 'perf record' e.g. to trace 'ls' userspace-only:
perf record -e intel_pt//u ls
And profiled with 'perf report' e.g.
perf report
To also trace kernel space presents a problem, namely kernel self-modifying
code. A fairly good kernel image is available in /proc/kcore but to get an
accurate image a copy of /proc/kcore needs to be made under the same conditions
as the data capture. A script perf-with-kcore can do that, but beware that the
script makes use of 'sudo' to copy /proc/kcore. If you have perf installed
locally from the source tree you can do:
~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls
which will create a directory named 'pt_ls' and put the perf.data file and
copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use
'perf report' becomes:
~/libexec/perf-core/perf-with-kcore report pt_ls
Because samples are synthesized after-the-fact, the sampling period can be
selected for reporting. e.g. sample every microsecond
~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge
See the sections below for more information about the --itrace option.
Beware the smaller the period, the more samples that are produced, and the
longer it takes to process them.
Also note that the coarseness of Intel PT timing information will start to
distort the statistical value of the sampling as the sampling period becomes
smaller.
To represent software control flow, "branches" samples are produced. By default
a branch sample is synthesized for every single branch. To get an idea what
data is available you can use the 'perf script' tool with no parameters, which
will list all the samples.
perf record -e intel_pt//u ls
perf script
An interesting field that is not printed by default is 'flags' which can be
displayed as follows:
perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags
The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
in transaction, respectively.
While it is possible to create scripts to analyze the data, an alternative
approach is available to export the data to a postgresql database. Refer to
script export-to-postgresql.py for more details, and to script
call-graph-from-postgresql.py for an example of using the database.
As mentioned above, it is easy to capture too much data. One way to limit the
data captured is to use 'snapshot' mode which is explained further below.
Refer to 'new snapshot option' and 'Intel PT modes of operation' further below.
Another problem that will be experienced is decoder errors. They can be caused
by inability to access the executed image, self-modified or JIT-ed code, or the
inability to match side-band information (such as context switches and mmaps)
which results in the decoder not knowing what code was executed.
There is also the problem of perf not being able to copy the data fast enough,
resulting in data lost because the buffer was full. See 'Buffer handling' below
for more details.
perf record
===========
new event
---------
The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are
selected by providing the PMU name followed by the "config" separated by slashes.
An enhancement has been made to allow default "config" e.g. the option
-e intel_pt//
will use a default config value. Currently that is the same as
-e intel_pt/tsc,noretcomp=0/
which is the same as
-e intel_pt/tsc=1,noretcomp=0/
The config terms are listed in /sys/devices/intel_pt/format. They are bit
fields within the config member of the struct perf_event_attr which is
passed to the kernel by the perf_event_open system call. They correspond to bit
fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions:
$ for f in `ls /sys/devices/intel_pt/format`;do
> echo $f
> cat /sys/devices/intel_pt/format/$f
> done
noretcomp
config:11
tsc
config:10
Note that the default config must be overridden for each term i.e.
-e intel_pt/noretcomp=0/
is the same as:
-e intel_pt/tsc=1,noretcomp=0/
So, to disable TSC packets use:
-e intel_pt/tsc=0/
It is also possible to specify the config value explicitly:
-e intel_pt/config=0x400/
Note that, as with all events, the event is suffixed with event modifiers:
u userspace
k kernel
h hypervisor
G guest
H host
p precise ip
'h', 'G' and 'H' are for virtualization which is not supported by Intel PT.
'p' is also not relevant to Intel PT. So only options 'u' and 'k' are
meaningful for Intel PT.
perf_event_attr is displayed if the -vv option is used e.g.
------------------------------------------------------------
perf_event_attr:
type 6
size 112
config 0x400
{ sample_period, sample_freq } 1
sample_type IP|TID|TIME|CPU|IDENTIFIER
read_format ID
disabled 1
inherit 1
exclude_kernel 1
exclude_hv 1
enable_on_exec 1
sample_id_all 1
------------------------------------------------------------
sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
------------------------------------------------------------
new snapshot option
-------------------
To select snapshot mode a new option has been added:
-S
Optionally it can be followed by the snapshot size e.g.
-S0x100000
The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size
nor snapshot size is specified, then the default is 4MiB for privileged users
(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
If an unprivileged user does not specify mmap pages, the mmap pages will be
reduced as described in the 'new auxtrace mmap size option' section below.
The snapshot size is displayed if the option -vv is used e.g.
Intel PT snapshot size: %zu
new auxtrace mmap size option
---------------------------
Intel PT buffer size is specified by an addition to the -m option e.g.
-m,16
selects a buffer size of 16 pages i.e. 64KiB.
Note that the existing functionality of -m is unchanged. The auxtrace mmap size
is specified by the optional addition of a comma and the value.
The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users
(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
If an unprivileged user does not specify mmap pages, the mmap pages will be
reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
user is likely to get an error as they exceed their mlock limit (Max locked
memory as shown in /proc/self/limits). Note that perf does not count the first
512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu
against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus
their mlock limit (which defaults to 64KiB but is not multiplied by the number
of cpus).
In full-trace mode, powers of two are allowed for buffer size, with a minimum
size of 2 pages. In snapshot mode, it is the same but the minimum size is
1 page.
The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g.
mmap length 528384
auxtrace mmap length 4198400
Intel PT modes of operation
---------------------------
Intel PT can be used in 2 modes:
full-trace mode
snapshot mode
Full-trace mode traces continuously e.g.
perf record -e intel_pt//u uname
Snapshot mode captures the available data when a signal is sent e.g.
perf record -v -e intel_pt//u -S ./loopy 1000000000 &
[1] 11435
kill -USR2 11435
Recording AUX area tracing snapshot
Note that the signal sent is SIGUSR2.
Note that "Recording AUX area tracing snapshot" is displayed because the -v
option is used.
The 2 modes cannot be used together.
Buffer handling
---------------
There may be buffer limitations (i.e. single ToPa entry) which means that actual
buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to
provide other sizes, and in particular an arbitrarily large size, multiple
buffers are logically concatenated. However an interrupt must be used to switch
between buffers. That has two potential problems:
a) the interrupt may not be handled in time so that the current buffer
becomes full and some trace data is lost.
b) the interrupts may slow the system and affect the performance
results.
If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event
which the tools report as an error.
In full-trace mode, the driver waits for data to be copied out before allowing
the (logical) buffer to wrap-around. If data is not copied out quickly enough,
again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to
wait, the intel_pt event gets disabled. Because it is difficult to know when
that happens, perf tools always re-enable the intel_pt event after copying out
data.
Intel PT and build ids
----------------------
By default "perf record" post-processes the event stream to find all build ids
for executables for all addresses sampled. Deliberately, Intel PT is not
decoded for that purpose (it would take too long). Instead the build ids for
all executables encountered (due to mmap, comm or task events) are included
in the perf.data file.
To see buildids included in the perf.data file use the command:
perf buildid-list
If the perf.data file contains Intel PT data, that is the same as:
perf buildid-list --with-hits
Snapshot mode and event disabling
---------------------------------
In order to make a snapshot, the intel_pt event is disabled using an IOCTL,
namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the
collection of side-band information. In order to prevent that, a dummy
software event has been introduced that permits tracking events (like mmaps) to
continue to be recorded while intel_pt is disabled. That is important to ensure
there is complete side-band information to allow the decoding of subsequent
snapshots.
A test has been created for that. To find the test:
perf test list
...
23: Test using a dummy software event to keep tracking
To run the test:
perf test 23
23: Test using a dummy software event to keep tracking : Ok
perf record modes (nothing new here)
------------------------------------
perf record essentially operates in one of three modes:
per thread
per cpu
workload only
"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a
workload).
"per cpu" is selected by -C or -a.
"workload only" mode is selected by not using the other options but providing a
command to run (i.e. the workload).
In per-thread mode an exact list of threads is traced. There is no inheritance.
Each thread has its own event buffer.
In per-cpu mode all processes (or processes from the selected cgroup i.e. -G
option, or processes selected with -p or -u) are traced. Each cpu has its own
buffer. Inheritance is allowed.
In workload-only mode, the workload is traced but with per-cpu buffers.
Inheritance is allowed. Note that you can now trace a workload in per-thread
mode by using the --per-thread option.
Privileged vs non-privileged users
----------------------------------
Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users
have memory limits imposed upon them. That affects what buffer sizes they can
have as outlined above.
Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
not permitted to use tracepoints which means there is insufficient side-band
information to decode Intel PT in per-cpu mode, and potentially workload-only
mode too if the workload creates new processes.
Note also, that to use tracepoints, read-access to debugfs is required. So if
debugfs is not mounted or the user does not have read-access, it will again not
be possible to decode Intel PT in per-cpu mode.
sched_switch tracepoint
-----------------------
The sched_switch tracepoint is used to provide side-band data for Intel PT
decoding. sched_switch events are automatically added. e.g. the second event
shown below
$ perf record -vv -e intel_pt//u uname
------------------------------------------------------------
perf_event_attr:
type 6
size 112
config 0x400
{ sample_period, sample_freq } 1
sample_type IP|TID|TIME|CPU|IDENTIFIER
read_format ID
disabled 1
inherit 1
exclude_kernel 1
exclude_hv 1
enable_on_exec 1
sample_id_all 1
------------------------------------------------------------
sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
------------------------------------------------------------
perf_event_attr:
type 2
size 112
config 0x108
{ sample_period, sample_freq } 1
sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER
read_format ID
inherit 1
sample_id_all 1
exclude_guest 1
------------------------------------------------------------
sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8
sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8
sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8
sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8
------------------------------------------------------------
perf_event_attr:
type 1
size 112
config 0x9
{ sample_period, sample_freq } 1
sample_type IP|TID|TIME|IDENTIFIER
read_format ID
disabled 1
inherit 1
exclude_kernel 1
exclude_hv 1
mmap 1
comm 1
enable_on_exec 1
task 1
sample_id_all 1
mmap2 1
comm_exec 1
------------------------------------------------------------
sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
mmap size 528384B
AUX area mmap length 4194304
perf event ring buffer mmapped per cpu
Synthesizing auxtrace information
Linux
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.042 MB perf.data ]
Note, the sched_switch event is only added if the user is permitted to use it
and only in per-cpu mode.
Note also, the sched_switch event is only added if TSC packets are requested.
That is because, in the absence of timing information, the sched_switch events
cannot be matched against the Intel PT trace.
perf script
===========
By default, perf script will decode trace data found in the perf.data file.
This can be further controlled by new option --itrace.
New --itrace option
-------------------
Having no option is the same as
--itrace
which, in turn, is the same as
--itrace=ibxe
The letters are:
i synthesize "instructions" events
b synthesize "branches" events
x synthesize "transactions" events
c synthesize branches events (calls only)
r synthesize branches events (returns only)
e synthesize tracing error events
d create a debug log
g synthesize a call chain (use with i or x)
"Instructions" events look like they were recorded by "perf record -e
instructions".
"Branches" events look like they were recorded by "perf record -e branches". "c"
and "r" can be combined to get calls and returns.
"Transactions" events correspond to the start or end of transactions. The
'flags' field can be used in perf script to determine whether the event is a
tranasaction start, commit or abort.
Error events are new. They show where the decoder lost the trace. Error events
are quite important. Users must know if what they are seeing is a complete
picture or not.
The "d" option will cause the creation of a file "intel_pt.log" containing all
decoded packets and instructions. Note that this option slows down the decoder
and that the resulting file may be very large.
In addition, the period of the "instructions" event can be specified. e.g.
--itrace=i10us
sets the period to 10us i.e. one instruction sample is synthesized for each 10
microseconds of trace. Alternatives to "us" are "ms" (milliseconds),
"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions).
"ms", "us" and "ns" are converted to TSC ticks.
The timing information included with Intel PT does not give the time of every
instruction. Consequently, for the purpose of sampling, the decoder estimates
the time since the last timing packet based on 1 tick per instruction. The time
on the sample is *not* adjusted and reflects the last known value of TSC.
For Intel PT, the default period is 100us.
Also the call chain size (default 16, max. 1024) for instructions or
transactions events can be specified. e.g.
--itrace=ig32
--itrace=xg32
To disable trace decoding entirely, use the option --no-itrace.
dump option
-----------
perf script has an option (-D) to "dump" the events i.e. display the binary
data.
When -D is used, Intel PT packets are displayed. The packet decoder does not
pay attention to PSB packets, but just decodes the bytes - so the packets seen
by the actual decoder may not be identical in places where the data is corrupt.
One example of that would be when the buffer-switching interrupt has been too
slow, and the buffer has been filled completely. In that case, the last packet
in the buffer might be truncated and immediately followed by a PSB as the trace
continues in the next buffer.
To disable the display of Intel PT packets, combine the -D option with
--no-itrace.
perf report
===========
By default, perf report will decode trace data found in the perf.data file.
This can be further controlled by new option --itrace exactly the same as
perf script, with the exception that the default is --itrace=igxe.
perf inject
===========
perf inject also accepts the --itrace option in which case tracing data is
removed and replaced with the synthesized events. e.g.
perf inject --itrace -i perf.data -o perf.data.new
...@@ -76,6 +76,12 @@ include config/utilities.mak ...@@ -76,6 +76,12 @@ include config/utilities.mak
# #
# Define NO_AUXTRACE if you do not want AUX area tracing support # Define NO_AUXTRACE if you do not want AUX area tracing support
# As per kernel Makefile, avoid funny character set dependencies
unexport LC_ALL
LC_COLLATE=C
LC_NUMERIC=C
export LC_COLLATE LC_NUMERIC
ifeq ($(srctree),) ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(shell pwd))) srctree := $(patsubst %/,%,$(dir $(shell pwd)))
srctree := $(patsubst %/,%,$(dir $(srctree))) srctree := $(patsubst %/,%,$(dir $(srctree)))
...@@ -135,6 +141,7 @@ INSTALL = install ...@@ -135,6 +141,7 @@ INSTALL = install
FLEX = flex FLEX = flex
BISON = bison BISON = bison
STRIP = strip STRIP = strip
AWK = awk
LIB_DIR = $(srctree)/tools/lib/api/ LIB_DIR = $(srctree)/tools/lib/api/
TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
...@@ -289,7 +296,7 @@ strip: $(PROGRAMS) $(OUTPUT)perf ...@@ -289,7 +296,7 @@ strip: $(PROGRAMS) $(OUTPUT)perf
PERF_IN := $(OUTPUT)perf-in.o PERF_IN := $(OUTPUT)perf-in.o
export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK
build := -f $(srctree)/tools/build/Makefile.build dir=. obj build := -f $(srctree)/tools/build/Makefile.build dir=. obj
$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE $(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE
...@@ -565,7 +572,8 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean ...@@ -565,7 +572,8 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean
$(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected $(Q)$(RM) $(OUTPUT).config-detected
$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32
$(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
$(OUTPUT)util/intel-pt-decoder/inat-tables.c
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
$(python-clean) $(python-clean)
......
libperf-y += header.o libperf-y += header.o
libperf-y += tsc.o libperf-y += tsc.o
libperf-y += pmu.o
libperf-y += kvm-stat.o libperf-y += kvm-stat.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-$(CONFIG_AUXTRACE) += intel-pt.o
/*
* auxtrace.c: AUX area tracing support
* Copyright (c) 2013-2014, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
*/
#include "../../util/header.h"
#include "../../util/auxtrace.h"
#include "../../util/intel-pt.h"
struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist __maybe_unused,
int *err)
{
char buffer[64];
int ret;
*err = 0;
ret = get_cpuid(buffer, sizeof(buffer));
if (ret) {
*err = ret;
return NULL;
}
if (!strncmp(buffer, "GenuineIntel,", 13))
return intel_pt_recording_init(err);
return NULL;
}
/*
* intel_pt.c: Intel Processor Trace support
* Copyright (c) 2013-2015, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
*/
#include <stdbool.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/log2.h>
#include "../../perf.h"
#include "../../util/session.h"
#include "../../util/event.h"
#include "../../util/evlist.h"
#include "../../util/evsel.h"
#include "../../util/cpumap.h"
#include "../../util/parse-options.h"
#include "../../util/parse-events.h"
#include "../../util/pmu.h"
#include "../../util/debug.h"
#include "../../util/auxtrace.h"
#include "../../util/tsc.h"
#include "../../util/intel-pt.h"
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
#define KiB_MASK(x) (KiB(x) - 1)
#define MiB_MASK(x) (MiB(x) - 1)
#define INTEL_PT_DEFAULT_SAMPLE_SIZE KiB(4)
#define INTEL_PT_MAX_SAMPLE_SIZE KiB(60)
#define INTEL_PT_PSB_PERIOD_NEAR 256
struct intel_pt_snapshot_ref {
void *ref_buf;
size_t ref_offset;
bool wrapped;
};
struct intel_pt_recording {
struct auxtrace_record itr;
struct perf_pmu *intel_pt_pmu;
int have_sched_switch;
struct perf_evlist *evlist;
bool snapshot_mode;
bool snapshot_init_done;
size_t snapshot_size;
size_t snapshot_ref_buf_size;
int snapshot_ref_cnt;
struct intel_pt_snapshot_ref *snapshot_refs;
};
static int intel_pt_parse_terms_with_default(struct list_head *formats,
const char *str,
u64 *config)
{
struct list_head *terms;
struct perf_event_attr attr = { .size = 0, };
int err;
terms = malloc(sizeof(struct list_head));
if (!terms)
return -ENOMEM;
INIT_LIST_HEAD(terms);
err = parse_events_terms(terms, str);
if (err)
goto out_free;
attr.config = *config;
err = perf_pmu__config_terms(formats, &attr, terms, true, NULL);
if (err)
goto out_free;
*config = attr.config;
out_free:
parse_events__free_terms(terms);
return err;
}
static int intel_pt_parse_terms(struct list_head *formats, const char *str,
u64 *config)
{
*config = 0;
return intel_pt_parse_terms_with_default(formats, str, config);
}
static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu __maybe_unused,
struct perf_evlist *evlist __maybe_unused)
{
return 256;
}
static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
{
u64 config;
intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &config);
return config;
}
static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr,
struct record_opts *opts,
const char *str)
{
struct intel_pt_recording *ptr =
container_of(itr, struct intel_pt_recording, itr);
unsigned long long snapshot_size = 0;
char *endptr;
if (str) {
snapshot_size = strtoull(str, &endptr, 0);
if (*endptr || snapshot_size > SIZE_MAX)
return -1;
}
opts->auxtrace_snapshot_mode = true;
opts->auxtrace_snapshot_size = snapshot_size;
ptr->snapshot_size = snapshot_size;
return 0;
}
struct perf_event_attr *
intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
{
struct perf_event_attr *attr;
attr = zalloc(sizeof(struct perf_event_attr));
if (!attr)
return NULL;
attr->config = intel_pt_default_config(intel_pt_pmu);
intel_pt_pmu->selectable = true;
return attr;
}
static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused)
{
return INTEL_PT_AUXTRACE_PRIV_SIZE;
}
static int intel_pt_info_fill(struct auxtrace_record *itr,
struct perf_session *session,
struct auxtrace_info_event *auxtrace_info,
size_t priv_size)
{
struct intel_pt_recording *ptr =
container_of(itr, struct intel_pt_recording, itr);
struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
struct perf_event_mmap_page *pc;
struct perf_tsc_conversion tc = { .time_mult = 0, };
bool cap_user_time_zero = false, per_cpu_mmaps;
u64 tsc_bit, noretcomp_bit;
int err;
if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE)
return -EINVAL;
intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp",
&noretcomp_bit);
if (!session->evlist->nr_mmaps)
return -EINVAL;
pc = session->evlist->mmap[0].base;
if (pc) {
err = perf_read_tsc_conversion(pc, &tc);
if (err) {
if (err != -EOPNOTSUPP)
return err;
} else {
cap_user_time_zero = tc.time_mult != 0;
}
if (!cap_user_time_zero)
ui__warning("Intel Processor Trace: TSC not available\n");
}
per_cpu_mmaps = !cpu_map__empty(session->evlist->cpus);
auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift;
auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult;
auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero;
auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero;
auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit;
auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit;
auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch;
auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode;
auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps;
return 0;
}
static int intel_pt_track_switches(struct perf_evlist *evlist)
{
const char *sched_switch = "sched:sched_switch";
struct perf_evsel *evsel;
int err;
if (!perf_evlist__can_select_event(evlist, sched_switch))
return -EPERM;
err = parse_events(evlist, sched_switch, NULL);
if (err) {
pr_debug2("%s: failed to parse %s, error %d\n",
__func__, sched_switch, err);
return err;
}
evsel = perf_evlist__last(evlist);
perf_evsel__set_sample_bit(evsel, CPU);
perf_evsel__set_sample_bit(evsel, TIME);
evsel->system_wide = true;
evsel->no_aux_samples = true;
evsel->immediate = true;
return 0;
}
static int intel_pt_recording_options(struct auxtrace_record *itr,
struct perf_evlist *evlist,
struct record_opts *opts)
{
struct intel_pt_recording *ptr =
container_of(itr, struct intel_pt_recording, itr);
struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
bool have_timing_info;
struct perf_evsel *evsel, *intel_pt_evsel = NULL;
const struct cpu_map *cpus = evlist->cpus;
bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
u64 tsc_bit;
ptr->evlist = evlist;
ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
evlist__for_each(evlist, evsel) {
if (evsel->attr.type == intel_pt_pmu->type) {
if (intel_pt_evsel) {
pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n");
return -EINVAL;
}
evsel->attr.freq = 0;
evsel->attr.sample_period = 1;
intel_pt_evsel = evsel;
opts->full_auxtrace = true;
}
}
if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n");
return -EINVAL;
}
if (opts->use_clockid) {
pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n");
return -EINVAL;
}
if (!opts->full_auxtrace)
return 0;
/* Set default sizes for snapshot mode */
if (opts->auxtrace_snapshot_mode) {
size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist);
if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
if (privileged) {
opts->auxtrace_mmap_pages = MiB(4) / page_size;
} else {
opts->auxtrace_mmap_pages = KiB(128) / page_size;
if (opts->mmap_pages == UINT_MAX)
opts->mmap_pages = KiB(256) / page_size;
}
} else if (!opts->auxtrace_mmap_pages && !privileged &&
opts->mmap_pages == UINT_MAX) {
opts->mmap_pages = KiB(256) / page_size;
}
if (!opts->auxtrace_snapshot_size)
opts->auxtrace_snapshot_size =
opts->auxtrace_mmap_pages * (size_t)page_size;
if (!opts->auxtrace_mmap_pages) {
size_t sz = opts->auxtrace_snapshot_size;
sz = round_up(sz, page_size) / page_size;
opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
}
if (opts->auxtrace_snapshot_size >
opts->auxtrace_mmap_pages * (size_t)page_size) {
pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
opts->auxtrace_snapshot_size,
opts->auxtrace_mmap_pages * (size_t)page_size);
return -EINVAL;
}
if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
return -EINVAL;
}
pr_debug2("Intel PT snapshot size: %zu\n",
opts->auxtrace_snapshot_size);
if (psb_period &&
opts->auxtrace_snapshot_size <= psb_period +
INTEL_PT_PSB_PERIOD_NEAR)
ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n",
opts->auxtrace_snapshot_size, psb_period);
}
/* Set default sizes for full trace mode */
if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
if (privileged) {
opts->auxtrace_mmap_pages = MiB(4) / page_size;
} else {
opts->auxtrace_mmap_pages = KiB(128) / page_size;
if (opts->mmap_pages == UINT_MAX)
opts->mmap_pages = KiB(256) / page_size;
}
}
/* Validate auxtrace_mmap_pages */
if (opts->auxtrace_mmap_pages) {
size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
size_t min_sz;
if (opts->auxtrace_snapshot_mode)
min_sz = KiB(4);
else
min_sz = KiB(8);
if (sz < min_sz || !is_power_of_2(sz)) {
pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n",
min_sz / 1024);
return -EINVAL;
}
}
intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
if (opts->full_auxtrace && (intel_pt_evsel->attr.config & tsc_bit))
have_timing_info = true;
else
have_timing_info = false;
/*
* Per-cpu recording needs sched_switch events to distinguish different
* threads.
*/
if (have_timing_info && !cpu_map__empty(cpus)) {
int err;
err = intel_pt_track_switches(evlist);
if (err == -EPERM)
pr_debug2("Unable to select sched:sched_switch\n");
else if (err)
return err;
else
ptr->have_sched_switch = 1;
}
if (intel_pt_evsel) {
/*
* To obtain the auxtrace buffer file descriptor, the auxtrace
* event must come first.
*/
perf_evlist__to_front(evlist, intel_pt_evsel);
/*
* In the case of per-cpu mmaps, we need the CPU on the
* AUX event.
*/
if (!cpu_map__empty(cpus))
perf_evsel__set_sample_bit(intel_pt_evsel, CPU);
}
/* Add dummy event to keep tracking */
if (opts->full_auxtrace) {
struct perf_evsel *tracking_evsel;
int err;
err = parse_events(evlist, "dummy:u", NULL);
if (err)
return err;
tracking_evsel = perf_evlist__last(evlist);
perf_evlist__set_tracking_event(evlist, tracking_evsel);
tracking_evsel->attr.freq = 0;
tracking_evsel->attr.sample_period = 1;
/* In per-cpu case, always need the time of mmap events etc */
if (!cpu_map__empty(cpus))
perf_evsel__set_sample_bit(tracking_evsel, TIME);
}
/*
* Warn the user when we do not have enough information to decode i.e.
* per-cpu with no sched_switch (except workload-only).
*/
if (!ptr->have_sched_switch && !cpu_map__empty(cpus) &&
!target__none(&opts->target))
ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
return 0;
}
static int intel_pt_snapshot_start(struct auxtrace_record *itr)
{
struct intel_pt_recording *ptr =
container_of(itr, struct intel_pt_recording, itr);
struct perf_evsel *evsel;
evlist__for_each(ptr->evlist, evsel) {
if (evsel->attr.type == ptr->intel_pt_pmu->type)
return perf_evlist__disable_event(ptr->evlist, evsel);
}
return -EINVAL;
}
static int intel_pt_snapshot_finish(struct auxtrace_record *itr)
{
struct intel_pt_recording *ptr =
container_of(itr, struct intel_pt_recording, itr);
struct perf_evsel *evsel;
evlist__for_each(ptr->evlist, evsel) {
if (evsel->attr.type == ptr->intel_pt_pmu->type)
return perf_evlist__enable_event(ptr->evlist, evsel);
}
return -EINVAL;
}
static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx)
{
const size_t sz = sizeof(struct intel_pt_snapshot_ref);
int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2;
struct intel_pt_snapshot_ref *refs;
if (!new_cnt)
new_cnt = 16;
while (new_cnt <= idx)
new_cnt *= 2;
refs = calloc(new_cnt, sz);
if (!refs)
return -ENOMEM;
memcpy(refs, ptr->snapshot_refs, cnt * sz);
ptr->snapshot_refs = refs;
ptr->snapshot_ref_cnt = new_cnt;
return 0;
}
static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr)
{
int i;
for (i = 0; i < ptr->snapshot_ref_cnt; i++)
zfree(&ptr->snapshot_refs[i].ref_buf);
zfree(&ptr->snapshot_refs);
}
static void intel_pt_recording_free(struct auxtrace_record *itr)
{
struct intel_pt_recording *ptr =
container_of(itr, struct intel_pt_recording, itr);
intel_pt_free_snapshot_refs(ptr);
free(ptr);
}
static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx,
size_t snapshot_buf_size)
{
size_t ref_buf_size = ptr->snapshot_ref_buf_size;
void *ref_buf;
ref_buf = zalloc(ref_buf_size);
if (!ref_buf)
return -ENOMEM;
ptr->snapshot_refs[idx].ref_buf = ref_buf;
ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size;
return 0;
}
static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr,
size_t snapshot_buf_size)
{
const size_t max_size = 256 * 1024;
size_t buf_size = 0, psb_period;
if (ptr->snapshot_size <= 64 * 1024)
return 0;
psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist);
if (psb_period)
buf_size = psb_period * 2;
if (!buf_size || buf_size > max_size)
buf_size = max_size;
if (buf_size >= snapshot_buf_size)
return 0;
if (buf_size >= ptr->snapshot_size / 2)
return 0;
return buf_size;
}
static int intel_pt_snapshot_init(struct intel_pt_recording *ptr,
size_t snapshot_buf_size)
{
if (ptr->snapshot_init_done)
return 0;
ptr->snapshot_init_done = true;
ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr,
snapshot_buf_size);
return 0;
}
/**
* intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer.
* @buf1: first buffer
* @compare_size: number of bytes to compare
* @buf2: second buffer (a circular buffer)
* @offs2: offset in second buffer
* @buf2_size: size of second buffer
*
* The comparison allows for the possibility that the bytes to compare in the
* circular buffer are not contiguous. It is assumed that @compare_size <=
* @buf2_size. This function returns %false if the bytes are identical, %true
* otherwise.
*/
static bool intel_pt_compare_buffers(void *buf1, size_t compare_size,
void *buf2, size_t offs2, size_t buf2_size)
{
size_t end2 = offs2 + compare_size, part_size;
if (end2 <= buf2_size)
return memcmp(buf1, buf2 + offs2, compare_size);
part_size = end2 - buf2_size;
if (memcmp(buf1, buf2 + offs2, part_size))
return true;
compare_size -= part_size;
return memcmp(buf1 + part_size, buf2, compare_size);
}
static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset,
size_t ref_size, size_t buf_size,
void *data, size_t head)
{
size_t ref_end = ref_offset + ref_size;
if (ref_end > buf_size) {
if (head > ref_offset || head < ref_end - buf_size)
return true;
} else if (head > ref_offset && head < ref_end) {
return true;
}
return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset,
buf_size);
}
static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size,
void *data, size_t head)
{
if (head >= ref_size) {
memcpy(ref_buf, data + head - ref_size, ref_size);
} else {
memcpy(ref_buf, data, head);
ref_size -= head;
memcpy(ref_buf + head, data + buf_size - ref_size, ref_size);
}
}
static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx,
struct auxtrace_mmap *mm, unsigned char *data,
u64 head)
{
struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx];
bool wrapped;
wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset,
ptr->snapshot_ref_buf_size, mm->len,
data, head);
intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len,
data, head);
return wrapped;
}
static bool intel_pt_first_wrap(u64 *data, size_t buf_size)
{
int i, a, b;
b = buf_size >> 3;
a = b - 512;
if (a < 0)
a = 0;
for (i = a; i < b; i++) {
if (data[i])
return true;
}
return false;
}
static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx,
struct auxtrace_mmap *mm, unsigned char *data,
u64 *head, u64 *old)
{
struct intel_pt_recording *ptr =
container_of(itr, struct intel_pt_recording, itr);
bool wrapped;
int err;
pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
__func__, idx, (size_t)*old, (size_t)*head);
err = intel_pt_snapshot_init(ptr, mm->len);
if (err)
goto out_err;
if (idx >= ptr->snapshot_ref_cnt) {
err = intel_pt_alloc_snapshot_refs(ptr, idx);
if (err)
goto out_err;
}
if (ptr->snapshot_ref_buf_size) {
if (!ptr->snapshot_refs[idx].ref_buf) {
err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len);
if (err)
goto out_err;
}
wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head);
} else {
wrapped = ptr->snapshot_refs[idx].wrapped;
if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) {
ptr->snapshot_refs[idx].wrapped = true;
wrapped = true;
}
}
/*
* In full trace mode 'head' continually increases. However in snapshot
* mode 'head' is an offset within the buffer. Here 'old' and 'head'
* are adjusted to match the full trace case which expects that 'old' is
* always less than 'head'.
*/
if (wrapped) {
*old = *head;
*head += mm->len;
} else {
if (mm->mask)
*old &= mm->mask;
else
*old %= mm->len;
if (*old > *head)
*head += mm->len;
}
pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
__func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
return 0;
out_err:
pr_err("%s: failed, error %d\n", __func__, err);
return err;
}
static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
{
return rdtsc();
}
static int intel_pt_read_finish(struct auxtrace_record *itr, int idx)
{
struct intel_pt_recording *ptr =
container_of(itr, struct intel_pt_recording, itr);
struct perf_evsel *evsel;
evlist__for_each(ptr->evlist, evsel) {
if (evsel->attr.type == ptr->intel_pt_pmu->type)
return perf_evlist__enable_event_idx(ptr->evlist, evsel,
idx);
}
return -EINVAL;
}
struct auxtrace_record *intel_pt_recording_init(int *err)
{
struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
struct intel_pt_recording *ptr;
if (!intel_pt_pmu)
return NULL;
ptr = zalloc(sizeof(struct intel_pt_recording));
if (!ptr) {
*err = -ENOMEM;
return NULL;
}
ptr->intel_pt_pmu = intel_pt_pmu;
ptr->itr.recording_options = intel_pt_recording_options;
ptr->itr.info_priv_size = intel_pt_info_priv_size;
ptr->itr.info_fill = intel_pt_info_fill;
ptr->itr.free = intel_pt_recording_free;
ptr->itr.snapshot_start = intel_pt_snapshot_start;
ptr->itr.snapshot_finish = intel_pt_snapshot_finish;
ptr->itr.find_snapshot = intel_pt_find_snapshot;
ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
ptr->itr.reference = intel_pt_reference;
ptr->itr.read_finish = intel_pt_read_finish;
return &ptr->itr;
}
#include <string.h>
#include <linux/perf_event.h>
#include "../../util/intel-pt.h"
#include "../../util/pmu.h"
struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
{
#ifdef HAVE_AUXTRACE_SUPPORT
if (!strcmp(pmu->name, INTEL_PT_PMU_NAME))
return intel_pt_pmu_default_config(pmu);
#endif
return NULL;
}
...@@ -1561,6 +1561,22 @@ static int have_cmd(int argc, const char **argv) ...@@ -1561,6 +1561,22 @@ static int have_cmd(int argc, const char **argv)
return 0; return 0;
} }
static void script__setup_sample_type(struct perf_script *script)
{
struct perf_session *session = script->session;
u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
(sample_type & PERF_SAMPLE_STACK_USER))
callchain_param.record_mode = CALLCHAIN_DWARF;
else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
callchain_param.record_mode = CALLCHAIN_LBR;
else
callchain_param.record_mode = CALLCHAIN_FP;
}
}
int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
{ {
bool show_full_info = false; bool show_full_info = false;
...@@ -1849,6 +1865,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -1849,6 +1865,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
goto out_delete; goto out_delete;
script.session = session; script.session = session;
script__setup_sample_type(&script);
session->itrace_synth_opts = &itrace_synth_opts; session->itrace_synth_opts = &itrace_synth_opts;
......
...@@ -1315,7 +1315,10 @@ struct thread_trace { ...@@ -1315,7 +1315,10 @@ struct thread_trace {
double runtime_ms; double runtime_ms;
struct { struct {
unsigned long ptr; unsigned long ptr;
int entry_str_pos; short int entry_str_pos;
bool pending_open;
unsigned int namelen;
char *name;
} filename; } filename;
struct { struct {
int max; int max;
...@@ -1391,7 +1394,6 @@ struct trace { ...@@ -1391,7 +1394,6 @@ struct trace {
size_t nr; size_t nr;
int *entries; int *entries;
} ev_qualifier_ids; } ev_qualifier_ids;
const char *last_vfs_getname;
struct intlist *tid_list; struct intlist *tid_list;
struct intlist *pid_list; struct intlist *pid_list;
struct { struct {
...@@ -1966,8 +1968,11 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, ...@@ -1966,8 +1968,11 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
fprintf(trace->output, "%-70s\n", ttrace->entry_str); fprintf(trace->output, "%-70s\n", ttrace->entry_str);
} }
} else } else {
ttrace->entry_pending = true; ttrace->entry_pending = true;
/* See trace__vfs_getname & trace__sys_exit */
ttrace->filename.pending_open = false;
}
if (trace->current != thread) { if (trace->current != thread) {
thread__put(trace->current); thread__put(trace->current);
...@@ -2003,9 +2008,9 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, ...@@ -2003,9 +2008,9 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
ret = perf_evsel__sc_tp_uint(evsel, ret, sample); ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); trace__set_fd_pathname(thread, ret, ttrace->filename.name);
trace->last_vfs_getname = NULL; ttrace->filename.pending_open = false;
++trace->stats.vfs_getname; ++trace->stats.vfs_getname;
} }
...@@ -2065,9 +2070,7 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, ...@@ -2065,9 +2070,7 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
size_t filename_len, entry_str_len, to_move; size_t filename_len, entry_str_len, to_move;
ssize_t remaining_space; ssize_t remaining_space;
char *pos; char *pos;
const char *filename; const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
if (!thread) if (!thread)
goto out; goto out;
...@@ -2076,6 +2079,21 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, ...@@ -2076,6 +2079,21 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
if (!ttrace) if (!ttrace)
goto out; goto out;
filename_len = strlen(filename);
if (ttrace->filename.namelen < filename_len) {
char *f = realloc(ttrace->filename.name, filename_len + 1);
if (f == NULL)
goto out;
ttrace->filename.namelen = filename_len;
ttrace->filename.name = f;
}
strcpy(ttrace->filename.name, filename);
ttrace->filename.pending_open = true;
if (!ttrace->filename.ptr) if (!ttrace->filename.ptr)
goto out; goto out;
...@@ -2084,8 +2102,6 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, ...@@ -2084,8 +2102,6 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
if (remaining_space <= 0) if (remaining_space <= 0)
goto out; goto out;
filename = trace->last_vfs_getname;
filename_len = strlen(filename);
if (filename_len > (size_t)remaining_space) { if (filename_len > (size_t)remaining_space) {
filename += filename_len - remaining_space; filename += filename_len - remaining_space;
filename_len = remaining_space; filename_len = remaining_space;
......
...@@ -297,7 +297,11 @@ ifndef NO_LIBELF ...@@ -297,7 +297,11 @@ ifndef NO_LIBELF
else else
CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
LDFLAGS += $(LIBDW_LDFLAGS) LDFLAGS += $(LIBDW_LDFLAGS)
EXTLIBS += -ldw DWARFLIBS := -ldw
ifeq ($(findstring -static,${LDFLAGS}),-static)
DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
endif
EXTLIBS += ${DWARFLIBS}
$(call detected,CONFIG_DWARF) $(call detected,CONFIG_DWARF)
endif # PERF_HAVE_DWARF_REGS endif # PERF_HAVE_DWARF_REGS
endif # NO_DWARF endif # NO_DWARF
......
...@@ -78,6 +78,8 @@ libperf-$(CONFIG_X86) += tsc.o ...@@ -78,6 +78,8 @@ libperf-$(CONFIG_X86) += tsc.o
libperf-y += cloexec.o libperf-y += cloexec.o
libperf-y += thread-stack.o libperf-y += thread-stack.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o libperf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
libperf-$(CONFIG_AUXTRACE) += intel-pt.o
libperf-y += parse-branch-options.o libperf-y += parse-branch-options.o
libperf-$(CONFIG_LIBELF) += symbol-elf.o libperf-$(CONFIG_LIBELF) += symbol-elf.o
......
...@@ -621,7 +621,7 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, ...@@ -621,7 +621,7 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
struct addr_map_symbol *start, struct addr_map_symbol *start,
unsigned cycles) unsigned cycles)
{ {
unsigned long saddr = 0; u64 saddr = 0;
int err; int err;
if (!cycles) if (!cycles)
...@@ -640,7 +640,7 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, ...@@ -640,7 +640,7 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
start->addr == ams->sym->start + ams->map->start))) start->addr == ams->sym->start + ams->map->start)))
saddr = start->al_addr; saddr = start->al_addr;
if (saddr == 0) if (saddr == 0)
pr_debug2("BB with bad start: addr %lx start %lx sym %lx saddr %lx\n", pr_debug2("BB with bad start: addr %"PRIx64" start %"PRIx64" sym %"PRIx64" saddr %"PRIx64"\n",
ams->addr, ams->addr,
start ? start->addr : 0, start ? start->addr : 0,
ams->sym ? ams->sym->start + ams->map->start : 0, ams->sym ? ams->sym->start + ams->map->start : 0,
......
...@@ -47,6 +47,8 @@ ...@@ -47,6 +47,8 @@
#include "debug.h" #include "debug.h"
#include "parse-options.h" #include "parse-options.h"
#include "intel-pt.h"
int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
struct auxtrace_mmap_params *mp, struct auxtrace_mmap_params *mp,
void *userpg, int fd) void *userpg, int fd)
...@@ -876,7 +878,7 @@ static bool auxtrace__dont_decode(struct perf_session *session) ...@@ -876,7 +878,7 @@ static bool auxtrace__dont_decode(struct perf_session *session)
int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
union perf_event *event, union perf_event *event,
struct perf_session *session __maybe_unused) struct perf_session *session)
{ {
enum auxtrace_type type = event->auxtrace_info.type; enum auxtrace_type type = event->auxtrace_info.type;
...@@ -884,6 +886,8 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, ...@@ -884,6 +886,8 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
fprintf(stdout, " type: %u\n", type); fprintf(stdout, " type: %u\n", type);
switch (type) { switch (type) {
case PERF_AUXTRACE_INTEL_PT:
return intel_pt_process_auxtrace_info(event, session);
case PERF_AUXTRACE_UNKNOWN: case PERF_AUXTRACE_UNKNOWN:
default: default:
return -EINVAL; return -EINVAL;
......
...@@ -39,6 +39,7 @@ struct events_stats; ...@@ -39,6 +39,7 @@ struct events_stats;
enum auxtrace_type { enum auxtrace_type {
PERF_AUXTRACE_UNKNOWN, PERF_AUXTRACE_UNKNOWN,
PERF_AUXTRACE_INTEL_PT,
}; };
enum itrace_period_type { enum itrace_period_type {
......
...@@ -770,7 +770,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) ...@@ -770,7 +770,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
continue; continue;
} }
/* Filter lines based on address */ /* Filter lines based on address */
if (rt_die != cu_die) if (rt_die != cu_die) {
/* /*
* Address filtering * Address filtering
* The line is included in given function, and * The line is included in given function, and
...@@ -784,6 +784,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) ...@@ -784,6 +784,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
decf != dwarf_decl_file(&die_mem)) decf != dwarf_decl_file(&die_mem))
continue; continue;
} }
}
/* Get source line */ /* Get source line */
fname = dwarf_linesrc(line, NULL, NULL); fname = dwarf_linesrc(line, NULL, NULL);
......
...@@ -115,6 +115,7 @@ void perf_evlist__close(struct perf_evlist *evlist); ...@@ -115,6 +115,7 @@ void perf_evlist__close(struct perf_evlist *evlist);
void perf_evlist__set_id_pos(struct perf_evlist *evlist); void perf_evlist__set_id_pos(struct perf_evlist *evlist);
bool perf_can_sample_identifier(void); bool perf_can_sample_identifier(void);
bool perf_can_record_switch_events(void); bool perf_can_record_switch_events(void);
bool perf_can_record_cpu_wide(void);
void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts); void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts);
int record_opts__config(struct record_opts *opts); int record_opts__config(struct record_opts *opts);
......
libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk
inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt
$(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
@$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder -Wno-override-init
#!/bin/awk -f
# gen-insn-attr-x86.awk: Instruction attribute table generator
# Written by Masami Hiramatsu <mhiramat@redhat.com>
#
# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
# Awk implementation sanity check
function check_awk_implement() {
if (sprintf("%x", 0) != "0")
return "Your awk has a printf-format problem."
return ""
}
# Clear working vars
function clear_vars() {
delete table
delete lptable2
delete lptable1
delete lptable3
eid = -1 # escape id
gid = -1 # group id
aid = -1 # AVX id
tname = ""
}
BEGIN {
# Implementation error checking
awkchecked = check_awk_implement()
if (awkchecked != "") {
print "Error: " awkchecked > "/dev/stderr"
print "Please try to use gawk." > "/dev/stderr"
exit 1
}
# Setup generating tables
print "/* x86 opcode map generated from x86-opcode-map.txt */"
print "/* Do not change this code. */\n"
ggid = 1
geid = 1
gaid = 0
delete etable
delete gtable
delete atable
opnd_expr = "^[A-Za-z/]"
ext_expr = "^\\("
sep_expr = "^\\|$"
group_expr = "^Grp[0-9A-Za-z]+"
imm_expr = "^[IJAOL][a-z]"
imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
imm_flag["Ob"] = "INAT_MOFFSET"
imm_flag["Ov"] = "INAT_MOFFSET"
imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
force64_expr = "\\([df]64\\)"
rex_expr = "^REX(\\.[XRWB]+)*"
fpu_expr = "^ESC" # TODO
lprefix1_expr = "\\((66|!F3)\\)"
lprefix2_expr = "\\(F3\\)"
lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)"
lprefix_expr = "\\((66|F2|F3)\\)"
max_lprefix = 4
# All opcodes starting with lower-case 'v' or with (v1) superscript
# accepts VEX prefix
vexok_opcode_expr = "^v.*"
vexok_expr = "\\(v1\\)"
# All opcodes with (v) superscript supports *only* VEX prefix
vexonly_expr = "\\(v\\)"
prefix_expr = "\\(Prefix\\)"
prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
prefix_num["REPNE"] = "INAT_PFX_REPNE"
prefix_num["REP/REPE"] = "INAT_PFX_REPE"
prefix_num["XACQUIRE"] = "INAT_PFX_REPNE"
prefix_num["XRELEASE"] = "INAT_PFX_REPE"
prefix_num["LOCK"] = "INAT_PFX_LOCK"
prefix_num["SEG=CS"] = "INAT_PFX_CS"
prefix_num["SEG=DS"] = "INAT_PFX_DS"
prefix_num["SEG=ES"] = "INAT_PFX_ES"
prefix_num["SEG=FS"] = "INAT_PFX_FS"
prefix_num["SEG=GS"] = "INAT_PFX_GS"
prefix_num["SEG=SS"] = "INAT_PFX_SS"
prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
clear_vars()
}
function semantic_error(msg) {
print "Semantic error at " NR ": " msg > "/dev/stderr"
exit 1
}
function debug(msg) {
print "DEBUG: " msg
}
function array_size(arr, i,c) {
c = 0
for (i in arr)
c++
return c
}
/^Table:/ {
print "/* " $0 " */"
if (tname != "")
semantic_error("Hit Table: before EndTable:.");
}
/^Referrer:/ {
if (NF != 1) {
# escape opcode table
ref = ""
for (i = 2; i <= NF; i++)
ref = ref $i
eid = escape[ref]
tname = sprintf("inat_escape_table_%d", eid)
}
}
/^AVXcode:/ {
if (NF != 1) {
# AVX/escape opcode table
aid = $2
if (gaid <= aid)
gaid = aid + 1
if (tname == "") # AVX only opcode table
tname = sprintf("inat_avx_table_%d", $2)
}
if (aid == -1 && eid == -1) # primary opcode table
tname = "inat_primary_table"
}
/^GrpTable:/ {
print "/* " $0 " */"
if (!($2 in group))
semantic_error("No group: " $2 )
gid = group[$2]
tname = "inat_group_table_" gid
}
function print_table(tbl,name,fmt,n)
{
print "const insn_attr_t " name " = {"
for (i = 0; i < n; i++) {
id = sprintf(fmt, i)
if (tbl[id])
print " [" id "] = " tbl[id] ","
}
print "};"
}
/^EndTable/ {
if (gid != -1) {
# print group tables
if (array_size(table) != 0) {
print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
"0x%x", 8)
gtable[gid,0] = tname
}
if (array_size(lptable1) != 0) {
print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
"0x%x", 8)
gtable[gid,1] = tname "_1"
}
if (array_size(lptable2) != 0) {
print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
"0x%x", 8)
gtable[gid,2] = tname "_2"
}
if (array_size(lptable3) != 0) {
print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
"0x%x", 8)
gtable[gid,3] = tname "_3"
}
} else {
# print primary/escaped tables
if (array_size(table) != 0) {
print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
"0x%02x", 256)
etable[eid,0] = tname
if (aid >= 0)
atable[aid,0] = tname
}
if (array_size(lptable1) != 0) {
print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
"0x%02x", 256)
etable[eid,1] = tname "_1"
if (aid >= 0)
atable[aid,1] = tname "_1"
}
if (array_size(lptable2) != 0) {
print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
"0x%02x", 256)
etable[eid,2] = tname "_2"
if (aid >= 0)
atable[aid,2] = tname "_2"
}
if (array_size(lptable3) != 0) {
print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
"0x%02x", 256)
etable[eid,3] = tname "_3"
if (aid >= 0)
atable[aid,3] = tname "_3"
}
}
print ""
clear_vars()
}
function add_flags(old,new) {
if (old && new)
return old " | " new
else if (old)
return old
else
return new
}
# convert operands to flags.
function convert_operands(count,opnd, i,j,imm,mod)
{
imm = null
mod = null
for (j = 1; j <= count; j++) {
i = opnd[j]
if (match(i, imm_expr) == 1) {
if (!imm_flag[i])
semantic_error("Unknown imm opnd: " i)
if (imm) {
if (i != "Ib")
semantic_error("Second IMM error")
imm = add_flags(imm, "INAT_SCNDIMM")
} else
imm = imm_flag[i]
} else if (match(i, modrm_expr))
mod = "INAT_MODRM"
}
return add_flags(imm, mod)
}
/^[0-9a-f]+\:/ {
if (NR == 1)
next
# get index
idx = "0x" substr($1, 1, index($1,":") - 1)
if (idx in table)
semantic_error("Redefine " idx " in " tname)
# check if escaped opcode
if ("escape" == $2) {
if ($3 != "#")
semantic_error("No escaped name")
ref = ""
for (i = 4; i <= NF; i++)
ref = ref $i
if (ref in escape)
semantic_error("Redefine escape (" ref ")")
escape[ref] = geid
geid++
table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
next
}
variant = null
# converts
i = 2
while (i <= NF) {
opcode = $(i++)
delete opnds
ext = null
flags = null
opnd = null
# parse one opcode
if (match($i, opnd_expr)) {
opnd = $i
count = split($(i++), opnds, ",")
flags = convert_operands(count, opnds)
}
if (match($i, ext_expr))
ext = $(i++)
if (match($i, sep_expr))
i++
else if (i < NF)
semantic_error($i " is not a separator")
# check if group opcode
if (match(opcode, group_expr)) {
if (!(opcode in group)) {
group[opcode] = ggid
ggid++
}
flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
}
# check force(or default) 64bit
if (match(ext, force64_expr))
flags = add_flags(flags, "INAT_FORCE64")
# check REX prefix
if (match(opcode, rex_expr))
flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
# check coprocessor escape : TODO
if (match(opcode, fpu_expr))
flags = add_flags(flags, "INAT_MODRM")
# check VEX codes
if (match(ext, vexonly_expr))
flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
flags = add_flags(flags, "INAT_VEXOK")
# check prefixes
if (match(ext, prefix_expr)) {
if (!prefix_num[opcode])
semantic_error("Unknown prefix: " opcode)
flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
}
if (length(flags) == 0)
continue
# check if last prefix
if (match(ext, lprefix1_expr)) {
lptable1[idx] = add_flags(lptable1[idx],flags)
variant = "INAT_VARIANT"
}
if (match(ext, lprefix2_expr)) {
lptable2[idx] = add_flags(lptable2[idx],flags)
variant = "INAT_VARIANT"
}
if (match(ext, lprefix3_expr)) {
lptable3[idx] = add_flags(lptable3[idx],flags)
variant = "INAT_VARIANT"
}
if (!match(ext, lprefix_expr)){
table[idx] = add_flags(table[idx],flags)
}
}
if (variant)
table[idx] = add_flags(table[idx],variant)
}
END {
if (awkchecked != "")
exit 1
# print escape opcode map's array
print "/* Escape opcode map array */"
print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
"[INAT_LSTPFX_MAX + 1] = {"
for (i = 0; i < geid; i++)
for (j = 0; j < max_lprefix; j++)
if (etable[i,j])
print " ["i"]["j"] = "etable[i,j]","
print "};\n"
# print group opcode map's array
print "/* Group opcode map array */"
print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\
"[INAT_LSTPFX_MAX + 1] = {"
for (i = 0; i < ggid; i++)
for (j = 0; j < max_lprefix; j++)
if (gtable[i,j])
print " ["i"]["j"] = "gtable[i,j]","
print "};\n"
# print AVX opcode map's array
print "/* AVX opcode map array */"
print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\
"[INAT_LSTPFX_MAX + 1] = {"
for (i = 0; i < gaid; i++)
for (j = 0; j < max_lprefix; j++)
if (atable[i,j])
print " ["i"]["j"] = "atable[i,j]","
print "};"
}
/*
* x86 instruction attribute tables
*
* Written by Masami Hiramatsu <mhiramat@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
#include <asm/insn.h>
/* Attribute tables are generated from opcode map */
#include "inat-tables.c"
/* Attribute search APIs */
insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
{
return inat_primary_table[opcode];
}
int inat_get_last_prefix_id(insn_byte_t last_pfx)
{
insn_attr_t lpfx_attr;
lpfx_attr = inat_get_opcode_attribute(last_pfx);
return inat_last_prefix_id(lpfx_attr);
}
insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id,
insn_attr_t esc_attr)
{
const insn_attr_t *table;
int n;
n = inat_escape_id(esc_attr);
table = inat_escape_tables[n][0];
if (!table)
return 0;
if (inat_has_variant(table[opcode]) && lpfx_id) {
table = inat_escape_tables[n][lpfx_id];
if (!table)
return 0;
}
return table[opcode];
}
insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id,
insn_attr_t grp_attr)
{
const insn_attr_t *table;
int n;
n = inat_group_id(grp_attr);
table = inat_group_tables[n][0];
if (!table)
return inat_group_common_attribute(grp_attr);
if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) {
table = inat_group_tables[n][lpfx_id];
if (!table)
return inat_group_common_attribute(grp_attr);
}
return table[X86_MODRM_REG(modrm)] |
inat_group_common_attribute(grp_attr);
}
insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
insn_byte_t vex_p)
{
const insn_attr_t *table;
if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
return 0;
/* At first, this checks the master table */
table = inat_avx_tables[vex_m][0];
if (!table)
return 0;
if (!inat_is_group(table[opcode]) && vex_p) {
/* If this is not a group, get attribute directly */
table = inat_avx_tables[vex_m][vex_p];
if (!table)
return 0;
}
return table[opcode];
}
#ifndef _ASM_X86_INAT_H
#define _ASM_X86_INAT_H
/*
* x86 instruction attributes
*
* Written by Masami Hiramatsu <mhiramat@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
#include <asm/inat_types.h>
/*
* Internal bits. Don't use bitmasks directly, because these bits are
* unstable. You should use checking functions.
*/
#define INAT_OPCODE_TABLE_SIZE 256
#define INAT_GROUP_TABLE_SIZE 8
/* Legacy last prefixes */
#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */
#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */
#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */
/* Other Legacy prefixes */
#define INAT_PFX_LOCK 4 /* 0xF0 */
#define INAT_PFX_CS 5 /* 0x2E */
#define INAT_PFX_DS 6 /* 0x3E */
#define INAT_PFX_ES 7 /* 0x26 */
#define INAT_PFX_FS 8 /* 0x64 */
#define INAT_PFX_GS 9 /* 0x65 */
#define INAT_PFX_SS 10 /* 0x36 */
#define INAT_PFX_ADDRSZ 11 /* 0x67 */
/* x86-64 REX prefix */
#define INAT_PFX_REX 12 /* 0x4X */
/* AVX VEX prefixes */
#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */
#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */
#define INAT_LSTPFX_MAX 3
#define INAT_LGCPFX_MAX 11
/* Immediate size */
#define INAT_IMM_BYTE 1
#define INAT_IMM_WORD 2
#define INAT_IMM_DWORD 3
#define INAT_IMM_QWORD 4
#define INAT_IMM_PTR 5
#define INAT_IMM_VWORD32 6
#define INAT_IMM_VWORD 7
/* Legacy prefix */
#define INAT_PFX_OFFS 0
#define INAT_PFX_BITS 4
#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1)
#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS)
/* Escape opcodes */
#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS)
#define INAT_ESC_BITS 2
#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1)
#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS)
/* Group opcodes (1-16) */
#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS)
#define INAT_GRP_BITS 5
#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1)
#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS)
/* Immediates */
#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS)
#define INAT_IMM_BITS 3
#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
/* Flags */
#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS)
#define INAT_MODRM (1 << (INAT_FLAG_OFFS))
#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1))
#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2))
#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3))
#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
/* Attribute making macros for attribute tables */
#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM)
#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS)
/* Attribute search APIs */
extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
int lpfx_id,
insn_attr_t esc_attr);
extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
int lpfx_id,
insn_attr_t esc_attr);
extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
insn_byte_t vex_m,
insn_byte_t vex_pp);
/* Attribute checking functions */
static inline int inat_is_legacy_prefix(insn_attr_t attr)
{
attr &= INAT_PFX_MASK;
return attr && attr <= INAT_LGCPFX_MAX;
}
static inline int inat_is_address_size_prefix(insn_attr_t attr)
{
return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ;
}
static inline int inat_is_operand_size_prefix(insn_attr_t attr)
{
return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
}
static inline int inat_is_rex_prefix(insn_attr_t attr)
{
return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
}
static inline int inat_last_prefix_id(insn_attr_t attr)
{
if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
return 0;
else
return attr & INAT_PFX_MASK;
}
static inline int inat_is_vex_prefix(insn_attr_t attr)
{
attr &= INAT_PFX_MASK;
return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3;
}
static inline int inat_is_vex3_prefix(insn_attr_t attr)
{
return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
}
static inline int inat_is_escape(insn_attr_t attr)
{
return attr & INAT_ESC_MASK;
}
static inline int inat_escape_id(insn_attr_t attr)
{
return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS;
}
static inline int inat_is_group(insn_attr_t attr)
{
return attr & INAT_GRP_MASK;
}
static inline int inat_group_id(insn_attr_t attr)
{
return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS;
}
static inline int inat_group_common_attribute(insn_attr_t attr)
{
return attr & ~INAT_GRP_MASK;
}
static inline int inat_has_immediate(insn_attr_t attr)
{
return attr & INAT_IMM_MASK;
}
static inline int inat_immediate_size(insn_attr_t attr)
{
return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
}
static inline int inat_has_modrm(insn_attr_t attr)
{
return attr & INAT_MODRM;
}
static inline int inat_is_force64(insn_attr_t attr)
{
return attr & INAT_FORCE64;
}
static inline int inat_has_second_immediate(insn_attr_t attr)
{
return attr & INAT_SCNDIMM;
}
static inline int inat_has_moffset(insn_attr_t attr)
{
return attr & INAT_MOFFSET;
}
static inline int inat_has_variant(insn_attr_t attr)
{
return attr & INAT_VARIANT;
}
static inline int inat_accept_vex(insn_attr_t attr)
{
return attr & INAT_VEXOK;
}
static inline int inat_must_vex(insn_attr_t attr)
{
return attr & INAT_VEXONLY;
}
#endif
/*
* x86 instruction analysis
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2002, 2004, 2009
*/
#ifdef __KERNEL__
#include <linux/string.h>
#else
#include <string.h>
#endif
#include <asm/inat.h>
#include <asm/insn.h>
/* Verify next sizeof(t) bytes can be on the same instruction */
#define validate_next(t, insn, n) \
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
#define __get_next(t, insn) \
({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
#define __peek_nbyte_next(t, insn, n) \
({ t r = *(t*)((insn)->next_byte + n); r; })
#define get_next(t, insn) \
({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
#define peek_nbyte_next(t, insn, n) \
({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
#define peek_next(t, insn) peek_nbyte_next(t, insn, 0)
/**
* insn_init() - initialize struct insn
* @insn: &struct insn to be initialized
* @kaddr: address (in kernel memory) of instruction (or copy thereof)
* @x86_64: !0 for 64-bit kernel or 64-bit app
*/
void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
{
/*
* Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
* even if the input buffer is long enough to hold them.
*/
if (buf_len > MAX_INSN_SIZE)
buf_len = MAX_INSN_SIZE;
memset(insn, 0, sizeof(*insn));
insn->kaddr = kaddr;
insn->end_kaddr = kaddr + buf_len;
insn->next_byte = kaddr;
insn->x86_64 = x86_64 ? 1 : 0;
insn->opnd_bytes = 4;
if (x86_64)
insn->addr_bytes = 8;
else
insn->addr_bytes = 4;
}
/**
* insn_get_prefixes - scan x86 instruction prefix bytes
* @insn: &struct insn containing instruction
*
* Populates the @insn->prefixes bitmap, and updates @insn->next_byte
* to point to the (first) opcode. No effect if @insn->prefixes.got
* is already set.
*/
void insn_get_prefixes(struct insn *insn)
{
struct insn_field *prefixes = &insn->prefixes;
insn_attr_t attr;
insn_byte_t b, lb;
int i, nb;
if (prefixes->got)
return;
nb = 0;
lb = 0;
b = peek_next(insn_byte_t, insn);
attr = inat_get_opcode_attribute(b);
while (inat_is_legacy_prefix(attr)) {
/* Skip if same prefix */
for (i = 0; i < nb; i++)
if (prefixes->bytes[i] == b)
goto found;
if (nb == 4)
/* Invalid instruction */
break;
prefixes->bytes[nb++] = b;
if (inat_is_address_size_prefix(attr)) {
/* address size switches 2/4 or 4/8 */
if (insn->x86_64)
insn->addr_bytes ^= 12;
else
insn->addr_bytes ^= 6;
} else if (inat_is_operand_size_prefix(attr)) {
/* oprand size switches 2/4 */
insn->opnd_bytes ^= 6;
}
found:
prefixes->nbytes++;
insn->next_byte++;
lb = b;
b = peek_next(insn_byte_t, insn);
attr = inat_get_opcode_attribute(b);
}
/* Set the last prefix */
if (lb && lb != insn->prefixes.bytes[3]) {
if (unlikely(insn->prefixes.bytes[3])) {
/* Swap the last prefix */
b = insn->prefixes.bytes[3];
for (i = 0; i < nb; i++)
if (prefixes->bytes[i] == lb)
prefixes->bytes[i] = b;
}
insn->prefixes.bytes[3] = lb;
}
/* Decode REX prefix */
if (insn->x86_64) {
b = peek_next(insn_byte_t, insn);
attr = inat_get_opcode_attribute(b);
if (inat_is_rex_prefix(attr)) {
insn->rex_prefix.value = b;
insn->rex_prefix.nbytes = 1;
insn->next_byte++;
if (X86_REX_W(b))
/* REX.W overrides opnd_size */
insn->opnd_bytes = 8;
}
}
insn->rex_prefix.got = 1;
/* Decode VEX prefix */
b = peek_next(insn_byte_t, insn);
attr = inat_get_opcode_attribute(b);
if (inat_is_vex_prefix(attr)) {
insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
if (!insn->x86_64) {
/*
* In 32-bits mode, if the [7:6] bits (mod bits of
* ModRM) on the second byte are not 11b, it is
* LDS or LES.
*/
if (X86_MODRM_MOD(b2) != 3)
goto vex_end;
}
insn->vex_prefix.bytes[0] = b;
insn->vex_prefix.bytes[1] = b2;
if (inat_is_vex3_prefix(attr)) {
b2 = peek_nbyte_next(insn_byte_t, insn, 2);
insn->vex_prefix.bytes[2] = b2;
insn->vex_prefix.nbytes = 3;
insn->next_byte += 3;
if (insn->x86_64 && X86_VEX_W(b2))
/* VEX.W overrides opnd_size */
insn->opnd_bytes = 8;
} else {
/*
* For VEX2, fake VEX3-like byte#2.
* Makes it easier to decode vex.W, vex.vvvv,
* vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
*/
insn->vex_prefix.bytes[2] = b2 & 0x7f;
insn->vex_prefix.nbytes = 2;
insn->next_byte += 2;
}
}
vex_end:
insn->vex_prefix.got = 1;
prefixes->got = 1;
err_out:
return;
}
/**
* insn_get_opcode - collect opcode(s)
* @insn: &struct insn containing instruction
*
* Populates @insn->opcode, updates @insn->next_byte to point past the
* opcode byte(s), and set @insn->attr (except for groups).
* If necessary, first collects any preceding (prefix) bytes.
* Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
* is already 1.
*/
void insn_get_opcode(struct insn *insn)
{
struct insn_field *opcode = &insn->opcode;
insn_byte_t op;
int pfx_id;
if (opcode->got)
return;
if (!insn->prefixes.got)
insn_get_prefixes(insn);
/* Get first opcode */
op = get_next(insn_byte_t, insn);
opcode->bytes[0] = op;
opcode->nbytes = 1;
/* Check if there is VEX prefix or not */
if (insn_is_avx(insn)) {
insn_byte_t m, p;
m = insn_vex_m_bits(insn);
p = insn_vex_p_bits(insn);
insn->attr = inat_get_avx_attribute(op, m, p);
if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
insn->attr = 0; /* This instruction is bad */
goto end; /* VEX has only 1 byte for opcode */
}
insn->attr = inat_get_opcode_attribute(op);
while (inat_is_escape(insn->attr)) {
/* Get escaped opcode */
op = get_next(insn_byte_t, insn);
opcode->bytes[opcode->nbytes++] = op;
pfx_id = insn_last_prefix_id(insn);
insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
}
if (inat_must_vex(insn->attr))
insn->attr = 0; /* This instruction is bad */
end:
opcode->got = 1;
err_out:
return;
}
/**
* insn_get_modrm - collect ModRM byte, if any
* @insn: &struct insn containing instruction
*
* Populates @insn->modrm and updates @insn->next_byte to point past the
* ModRM byte, if any. If necessary, first collects the preceding bytes
* (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
*/
void insn_get_modrm(struct insn *insn)
{
struct insn_field *modrm = &insn->modrm;
insn_byte_t pfx_id, mod;
if (modrm->got)
return;
if (!insn->opcode.got)
insn_get_opcode(insn);
if (inat_has_modrm(insn->attr)) {
mod = get_next(insn_byte_t, insn);
modrm->value = mod;
modrm->nbytes = 1;
if (inat_is_group(insn->attr)) {
pfx_id = insn_last_prefix_id(insn);
insn->attr = inat_get_group_attribute(mod, pfx_id,
insn->attr);
if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
insn->attr = 0; /* This is bad */
}
}
if (insn->x86_64 && inat_is_force64(insn->attr))
insn->opnd_bytes = 8;
modrm->got = 1;
err_out:
return;
}
/**
* insn_rip_relative() - Does instruction use RIP-relative addressing mode?
* @insn: &struct insn containing instruction
*
* If necessary, first collects the instruction up to and including the
* ModRM byte. No effect if @insn->x86_64 is 0.
*/
int insn_rip_relative(struct insn *insn)
{
struct insn_field *modrm = &insn->modrm;
if (!insn->x86_64)
return 0;
if (!modrm->got)
insn_get_modrm(insn);
/*
* For rip-relative instructions, the mod field (top 2 bits)
* is zero and the r/m field (bottom 3 bits) is 0x5.
*/
return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
}
/**
* insn_get_sib() - Get the SIB byte of instruction
* @insn: &struct insn containing instruction
*
* If necessary, first collects the instruction up to and including the
* ModRM byte.
*/
void insn_get_sib(struct insn *insn)
{
insn_byte_t modrm;
if (insn->sib.got)
return;
if (!insn->modrm.got)
insn_get_modrm(insn);
if (insn->modrm.nbytes) {
modrm = (insn_byte_t)insn->modrm.value;
if (insn->addr_bytes != 2 &&
X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
insn->sib.value = get_next(insn_byte_t, insn);
insn->sib.nbytes = 1;
}
}
insn->sib.got = 1;
err_out:
return;
}
/**
* insn_get_displacement() - Get the displacement of instruction
* @insn: &struct insn containing instruction
*
* If necessary, first collects the instruction up to and including the
* SIB byte.
* Displacement value is sign-expanded.
*/
void insn_get_displacement(struct insn *insn)
{
insn_byte_t mod, rm, base;
if (insn->displacement.got)
return;
if (!insn->sib.got)
insn_get_sib(insn);
if (insn->modrm.nbytes) {
/*
* Interpreting the modrm byte:
* mod = 00 - no displacement fields (exceptions below)
* mod = 01 - 1-byte displacement field
* mod = 10 - displacement field is 4 bytes, or 2 bytes if
* address size = 2 (0x67 prefix in 32-bit mode)
* mod = 11 - no memory operand
*
* If address size = 2...
* mod = 00, r/m = 110 - displacement field is 2 bytes
*
* If address size != 2...
* mod != 11, r/m = 100 - SIB byte exists
* mod = 00, SIB base = 101 - displacement field is 4 bytes
* mod = 00, r/m = 101 - rip-relative addressing, displacement
* field is 4 bytes
*/
mod = X86_MODRM_MOD(insn->modrm.value);
rm = X86_MODRM_RM(insn->modrm.value);
base = X86_SIB_BASE(insn->sib.value);
if (mod == 3)
goto out;
if (mod == 1) {
insn->displacement.value = get_next(char, insn);
insn->displacement.nbytes = 1;
} else if (insn->addr_bytes == 2) {
if ((mod == 0 && rm == 6) || mod == 2) {
insn->displacement.value =
get_next(short, insn);
insn->displacement.nbytes = 2;
}
} else {
if ((mod == 0 && rm == 5) || mod == 2 ||
(mod == 0 && base == 5)) {
insn->displacement.value = get_next(int, insn);
insn->displacement.nbytes = 4;
}
}
}
out:
insn->displacement.got = 1;
err_out:
return;
}
/* Decode moffset16/32/64. Return 0 if failed */
static int __get_moffset(struct insn *insn)
{
switch (insn->addr_bytes) {
case 2:
insn->moffset1.value = get_next(short, insn);
insn->moffset1.nbytes = 2;
break;
case 4:
insn->moffset1.value = get_next(int, insn);
insn->moffset1.nbytes = 4;
break;
case 8:
insn->moffset1.value = get_next(int, insn);
insn->moffset1.nbytes = 4;
insn->moffset2.value = get_next(int, insn);
insn->moffset2.nbytes = 4;
break;
default: /* opnd_bytes must be modified manually */
goto err_out;
}
insn->moffset1.got = insn->moffset2.got = 1;
return 1;
err_out:
return 0;
}
/* Decode imm v32(Iz). Return 0 if failed */
static int __get_immv32(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
insn->immediate.value = get_next(short, insn);
insn->immediate.nbytes = 2;
break;
case 4:
case 8:
insn->immediate.value = get_next(int, insn);
insn->immediate.nbytes = 4;
break;
default: /* opnd_bytes must be modified manually */
goto err_out;
}
return 1;
err_out:
return 0;
}
/* Decode imm v64(Iv/Ov), Return 0 if failed */
static int __get_immv(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
insn->immediate1.value = get_next(short, insn);
insn->immediate1.nbytes = 2;
break;
case 4:
insn->immediate1.value = get_next(int, insn);
insn->immediate1.nbytes = 4;
break;
case 8:
insn->immediate1.value = get_next(int, insn);
insn->immediate1.nbytes = 4;
insn->immediate2.value = get_next(int, insn);
insn->immediate2.nbytes = 4;
break;
default: /* opnd_bytes must be modified manually */
goto err_out;
}
insn->immediate1.got = insn->immediate2.got = 1;
return 1;
err_out:
return 0;
}
/* Decode ptr16:16/32(Ap) */
static int __get_immptr(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
insn->immediate1.value = get_next(short, insn);
insn->immediate1.nbytes = 2;
break;
case 4:
insn->immediate1.value = get_next(int, insn);
insn->immediate1.nbytes = 4;
break;
case 8:
/* ptr16:64 is not exist (no segment) */
return 0;
default: /* opnd_bytes must be modified manually */
goto err_out;
}
insn->immediate2.value = get_next(unsigned short, insn);
insn->immediate2.nbytes = 2;
insn->immediate1.got = insn->immediate2.got = 1;
return 1;
err_out:
return 0;
}
/**
* insn_get_immediate() - Get the immediates of instruction
* @insn: &struct insn containing instruction
*
* If necessary, first collects the instruction up to and including the
* displacement bytes.
* Basically, most of immediates are sign-expanded. Unsigned-value can be
* get by bit masking with ((1 << (nbytes * 8)) - 1)
*/
void insn_get_immediate(struct insn *insn)
{
if (insn->immediate.got)
return;
if (!insn->displacement.got)
insn_get_displacement(insn);
if (inat_has_moffset(insn->attr)) {
if (!__get_moffset(insn))
goto err_out;
goto done;
}
if (!inat_has_immediate(insn->attr))
/* no immediates */
goto done;
switch (inat_immediate_size(insn->attr)) {
case INAT_IMM_BYTE:
insn->immediate.value = get_next(char, insn);
insn->immediate.nbytes = 1;
break;
case INAT_IMM_WORD:
insn->immediate.value = get_next(short, insn);
insn->immediate.nbytes = 2;
break;
case INAT_IMM_DWORD:
insn->immediate.value = get_next(int, insn);
insn->immediate.nbytes = 4;
break;
case INAT_IMM_QWORD:
insn->immediate1.value = get_next(int, insn);
insn->immediate1.nbytes = 4;
insn->immediate2.value = get_next(int, insn);
insn->immediate2.nbytes = 4;
break;
case INAT_IMM_PTR:
if (!__get_immptr(insn))
goto err_out;
break;
case INAT_IMM_VWORD32:
if (!__get_immv32(insn))
goto err_out;
break;
case INAT_IMM_VWORD:
if (!__get_immv(insn))
goto err_out;
break;
default:
/* Here, insn must have an immediate, but failed */
goto err_out;
}
if (inat_has_second_immediate(insn->attr)) {
insn->immediate2.value = get_next(char, insn);
insn->immediate2.nbytes = 1;
}
done:
insn->immediate.got = 1;
err_out:
return;
}
/**
* insn_get_length() - Get the length of instruction
* @insn: &struct insn containing instruction
*
* If necessary, first collects the instruction up to and including the
* immediates bytes.
*/
void insn_get_length(struct insn *insn)
{
if (insn->length)
return;
if (!insn->immediate.got)
insn_get_immediate(insn);
insn->length = (unsigned char)((unsigned long)insn->next_byte
- (unsigned long)insn->kaddr);
}
#ifndef _ASM_X86_INSN_H
#define _ASM_X86_INSN_H
/*
* x86 instruction analysis
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2009
*/
/* insn_attr_t is defined in inat.h */
#include <asm/inat.h>
struct insn_field {
union {
insn_value_t value;
insn_byte_t bytes[4];
};
/* !0 if we've run insn_get_xxx() for this field */
unsigned char got;
unsigned char nbytes;
};
struct insn {
struct insn_field prefixes; /*
* Prefixes
* prefixes.bytes[3]: last prefix
*/
struct insn_field rex_prefix; /* REX prefix */
struct insn_field vex_prefix; /* VEX prefix */
struct insn_field opcode; /*
* opcode.bytes[0]: opcode1
* opcode.bytes[1]: opcode2
* opcode.bytes[2]: opcode3
*/
struct insn_field modrm;
struct insn_field sib;
struct insn_field displacement;
union {
struct insn_field immediate;
struct insn_field moffset1; /* for 64bit MOV */
struct insn_field immediate1; /* for 64bit imm or off16/32 */
};
union {
struct insn_field moffset2; /* for 64bit MOV */
struct insn_field immediate2; /* for 64bit imm or seg16 */
};
insn_attr_t attr;
unsigned char opnd_bytes;
unsigned char addr_bytes;
unsigned char length;
unsigned char x86_64;
const insn_byte_t *kaddr; /* kernel address of insn to analyze */
const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */
const insn_byte_t *next_byte;
};
#define MAX_INSN_SIZE 15
#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6)
#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
#define X86_SIB_BASE(sib) ((sib) & 0x07)
#define X86_REX_W(rex) ((rex) & 8)
#define X86_REX_R(rex) ((rex) & 4)
#define X86_REX_X(rex) ((rex) & 2)
#define X86_REX_B(rex) ((rex) & 1)
/* VEX bit flags */
#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */
#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */
#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */
#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
/* VEX bit fields */
#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
#define X86_VEX2_M 1 /* VEX2.M always 1 */
#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
extern void insn_get_prefixes(struct insn *insn);
extern void insn_get_opcode(struct insn *insn);
extern void insn_get_modrm(struct insn *insn);
extern void insn_get_sib(struct insn *insn);
extern void insn_get_displacement(struct insn *insn);
extern void insn_get_immediate(struct insn *insn);
extern void insn_get_length(struct insn *insn);
/* Attribute will be determined after getting ModRM (for opcode groups) */
static inline void insn_get_attribute(struct insn *insn)
{
insn_get_modrm(insn);
}
/* Instruction uses RIP-relative addressing */
extern int insn_rip_relative(struct insn *insn);
/* Init insn for kernel text */
static inline void kernel_insn_init(struct insn *insn,
const void *kaddr, int buf_len)
{
#ifdef CONFIG_X86_64
insn_init(insn, kaddr, buf_len, 1);
#else /* CONFIG_X86_32 */
insn_init(insn, kaddr, buf_len, 0);
#endif
}
static inline int insn_is_avx(struct insn *insn)
{
if (!insn->prefixes.got)
insn_get_prefixes(insn);
return (insn->vex_prefix.value != 0);
}
/* Ensure this instruction is decoded completely */
static inline int insn_complete(struct insn *insn)
{
return insn->opcode.got && insn->modrm.got && insn->sib.got &&
insn->displacement.got && insn->immediate.got;
}
static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
{
if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
return X86_VEX2_M;
else
return X86_VEX3_M(insn->vex_prefix.bytes[1]);
}
static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
{
if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
return X86_VEX_P(insn->vex_prefix.bytes[1]);
else
return X86_VEX_P(insn->vex_prefix.bytes[2]);
}
/* Get the last prefix id from last prefix or VEX prefix */
static inline int insn_last_prefix_id(struct insn *insn)
{
if (insn_is_avx(insn))
return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
if (insn->prefixes.bytes[3])
return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
return 0;
}
/* Offset of each field from kaddr */
static inline int insn_offset_rex_prefix(struct insn *insn)
{
return insn->prefixes.nbytes;
}
static inline int insn_offset_vex_prefix(struct insn *insn)
{
return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
}
static inline int insn_offset_opcode(struct insn *insn)
{
return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes;
}
static inline int insn_offset_modrm(struct insn *insn)
{
return insn_offset_opcode(insn) + insn->opcode.nbytes;
}
static inline int insn_offset_sib(struct insn *insn)
{
return insn_offset_modrm(insn) + insn->modrm.nbytes;
}
static inline int insn_offset_displacement(struct insn *insn)
{
return insn_offset_sib(insn) + insn->sib.nbytes;
}
static inline int insn_offset_immediate(struct insn *insn)
{
return insn_offset_displacement(insn) + insn->displacement.nbytes;
}
#endif /* _ASM_X86_INSN_H */
/*
* intel_pt_decoder.c: Intel Processor Trace support
* Copyright (c) 2013-2014, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <errno.h>
#include <stdint.h>
#include <inttypes.h>
#include "../cache.h"
#include "../util.h"
#include "intel-pt-insn-decoder.h"
#include "intel-pt-pkt-decoder.h"
#include "intel-pt-decoder.h"
#include "intel-pt-log.h"
#define INTEL_PT_BLK_SIZE 1024
#define BIT63 (((uint64_t)1 << 63))
#define INTEL_PT_RETURN 1
/* Maximum number of loops with no packets consumed i.e. stuck in a loop */
#define INTEL_PT_MAX_LOOPS 10000
struct intel_pt_blk {
struct intel_pt_blk *prev;
uint64_t ip[INTEL_PT_BLK_SIZE];
};
struct intel_pt_stack {
struct intel_pt_blk *blk;
struct intel_pt_blk *spare;
int pos;
};
enum intel_pt_pkt_state {
INTEL_PT_STATE_NO_PSB,
INTEL_PT_STATE_NO_IP,
INTEL_PT_STATE_ERR_RESYNC,
INTEL_PT_STATE_IN_SYNC,
INTEL_PT_STATE_TNT,
INTEL_PT_STATE_TIP,
INTEL_PT_STATE_TIP_PGD,
INTEL_PT_STATE_FUP,
INTEL_PT_STATE_FUP_NO_TIP,
};
#ifdef INTEL_PT_STRICT
#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB
#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB
#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_NO_PSB
#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_NO_PSB
#else
#define INTEL_PT_STATE_ERR1 (decoder->pkt_state)
#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_IP
#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_ERR_RESYNC
#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_IN_SYNC
#endif
struct intel_pt_decoder {
int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
uint64_t max_insn_cnt, void *data);
void *data;
struct intel_pt_state state;
const unsigned char *buf;
size_t len;
bool return_compression;
bool pge;
uint64_t pos;
uint64_t last_ip;
uint64_t ip;
uint64_t cr3;
uint64_t timestamp;
uint64_t tsc_timestamp;
uint64_t ref_timestamp;
uint64_t ret_addr;
struct intel_pt_stack stack;
enum intel_pt_pkt_state pkt_state;
struct intel_pt_pkt packet;
struct intel_pt_pkt tnt;
int pkt_step;
int pkt_len;
unsigned int cbr;
unsigned int max_non_turbo_ratio;
int exec_mode;
unsigned int insn_bytes;
uint64_t sign_bit;
uint64_t sign_bits;
uint64_t period;
enum intel_pt_period_type period_type;
uint64_t period_insn_cnt;
uint64_t period_mask;
uint64_t period_ticks;
uint64_t last_masked_timestamp;
bool continuous_period;
bool overflow;
bool set_fup_tx_flags;
unsigned int fup_tx_flags;
unsigned int tx_flags;
uint64_t timestamp_insn_cnt;
uint64_t stuck_ip;
int no_progress;
int stuck_ip_prd;
int stuck_ip_cnt;
const unsigned char *next_buf;
size_t next_len;
unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
};
static uint64_t intel_pt_lower_power_of_2(uint64_t x)
{
int i;
for (i = 0; x != 1; i++)
x >>= 1;
return x << i;
}
static void intel_pt_setup_period(struct intel_pt_decoder *decoder)
{
if (decoder->period_type == INTEL_PT_PERIOD_TICKS) {
uint64_t period;
period = intel_pt_lower_power_of_2(decoder->period);
decoder->period_mask = ~(period - 1);
decoder->period_ticks = period;
}
}
struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
{
struct intel_pt_decoder *decoder;
if (!params->get_trace || !params->walk_insn)
return NULL;
decoder = zalloc(sizeof(struct intel_pt_decoder));
if (!decoder)
return NULL;
decoder->get_trace = params->get_trace;
decoder->walk_insn = params->walk_insn;
decoder->data = params->data;
decoder->return_compression = params->return_compression;
decoder->sign_bit = (uint64_t)1 << 47;
decoder->sign_bits = ~(((uint64_t)1 << 48) - 1);
decoder->period = params->period;
decoder->period_type = params->period_type;
decoder->max_non_turbo_ratio = params->max_non_turbo_ratio;
intel_pt_setup_period(decoder);
return decoder;
}
static void intel_pt_pop_blk(struct intel_pt_stack *stack)
{
struct intel_pt_blk *blk = stack->blk;
stack->blk = blk->prev;
if (!stack->spare)
stack->spare = blk;
else
free(blk);
}
static uint64_t intel_pt_pop(struct intel_pt_stack *stack)
{
if (!stack->pos) {
if (!stack->blk)
return 0;
intel_pt_pop_blk(stack);
if (!stack->blk)
return 0;
stack->pos = INTEL_PT_BLK_SIZE;
}
return stack->blk->ip[--stack->pos];
}
static int intel_pt_alloc_blk(struct intel_pt_stack *stack)
{
struct intel_pt_blk *blk;
if (stack->spare) {
blk = stack->spare;
stack->spare = NULL;
} else {
blk = malloc(sizeof(struct intel_pt_blk));
if (!blk)
return -ENOMEM;
}
blk->prev = stack->blk;
stack->blk = blk;
stack->pos = 0;
return 0;
}
static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip)
{
int err;
if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) {
err = intel_pt_alloc_blk(stack);
if (err)
return err;
}
stack->blk->ip[stack->pos++] = ip;
return 0;
}
static void intel_pt_clear_stack(struct intel_pt_stack *stack)
{
while (stack->blk)
intel_pt_pop_blk(stack);
stack->pos = 0;
}
static void intel_pt_free_stack(struct intel_pt_stack *stack)
{
intel_pt_clear_stack(stack);
zfree(&stack->blk);
zfree(&stack->spare);
}
void intel_pt_decoder_free(struct intel_pt_decoder *decoder)
{
intel_pt_free_stack(&decoder->stack);
free(decoder);
}
static int intel_pt_ext_err(int code)
{
switch (code) {
case -ENOMEM:
return INTEL_PT_ERR_NOMEM;
case -ENOSYS:
return INTEL_PT_ERR_INTERN;
case -EBADMSG:
return INTEL_PT_ERR_BADPKT;
case -ENODATA:
return INTEL_PT_ERR_NODATA;
case -EILSEQ:
return INTEL_PT_ERR_NOINSN;
case -ENOENT:
return INTEL_PT_ERR_MISMAT;
case -EOVERFLOW:
return INTEL_PT_ERR_OVR;
case -ENOSPC:
return INTEL_PT_ERR_LOST;
case -ELOOP:
return INTEL_PT_ERR_NELOOP;
default:
return INTEL_PT_ERR_UNK;
}
}
static const char *intel_pt_err_msgs[] = {
[INTEL_PT_ERR_NOMEM] = "Memory allocation failed",
[INTEL_PT_ERR_INTERN] = "Internal error",
[INTEL_PT_ERR_BADPKT] = "Bad packet",
[INTEL_PT_ERR_NODATA] = "No more data",
[INTEL_PT_ERR_NOINSN] = "Failed to get instruction",
[INTEL_PT_ERR_MISMAT] = "Trace doesn't match instruction",
[INTEL_PT_ERR_OVR] = "Overflow packet",
[INTEL_PT_ERR_LOST] = "Lost trace data",
[INTEL_PT_ERR_UNK] = "Unknown error!",
[INTEL_PT_ERR_NELOOP] = "Never-ending loop",
};
int intel_pt__strerror(int code, char *buf, size_t buflen)
{
if (code < 1 || code > INTEL_PT_ERR_MAX)
code = INTEL_PT_ERR_UNK;
strlcpy(buf, intel_pt_err_msgs[code], buflen);
return 0;
}
static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder,
const struct intel_pt_pkt *packet,
uint64_t last_ip)
{
uint64_t ip;
switch (packet->count) {
case 2:
ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
packet->payload;
break;
case 4:
ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
packet->payload;
break;
case 6:
ip = packet->payload;
break;
default:
return 0;
}
if (ip & decoder->sign_bit)
return ip | decoder->sign_bits;
return ip;
}
static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
{
decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet,
decoder->last_ip);
}
static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
{
intel_pt_set_last_ip(decoder);
decoder->ip = decoder->last_ip;
}
static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder)
{
intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos,
decoder->buf);
}
static int intel_pt_bug(struct intel_pt_decoder *decoder)
{
intel_pt_log("ERROR: Internal error\n");
decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
return -ENOSYS;
}
static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder)
{
decoder->tx_flags = 0;
}
static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder)
{
decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX;
}
static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
{
intel_pt_clear_tx_flags(decoder);
decoder->pkt_len = 1;
decoder->pkt_step = 1;
intel_pt_decoder_log_packet(decoder);
if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) {
intel_pt_log("ERROR: Bad packet\n");
decoder->pkt_state = INTEL_PT_STATE_ERR1;
}
return -EBADMSG;
}
static int intel_pt_get_data(struct intel_pt_decoder *decoder)
{
struct intel_pt_buffer buffer = { .buf = 0, };
int ret;
decoder->pkt_step = 0;
intel_pt_log("Getting more data\n");
ret = decoder->get_trace(&buffer, decoder->data);
if (ret)
return ret;
decoder->buf = buffer.buf;
decoder->len = buffer.len;
if (!decoder->len) {
intel_pt_log("No more data\n");
return -ENODATA;
}
if (!buffer.consecutive) {
decoder->ip = 0;
decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
decoder->ref_timestamp = buffer.ref_timestamp;
decoder->timestamp = 0;
decoder->state.trace_nr = buffer.trace_nr;
intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
decoder->ref_timestamp);
return -ENOLINK;
}
return 0;
}
static int intel_pt_get_next_data(struct intel_pt_decoder *decoder)
{
if (!decoder->next_buf)
return intel_pt_get_data(decoder);
decoder->buf = decoder->next_buf;
decoder->len = decoder->next_len;
decoder->next_buf = 0;
decoder->next_len = 0;
return 0;
}
static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
{
unsigned char *buf = decoder->temp_buf;
size_t old_len, len, n;
int ret;
old_len = decoder->len;
len = decoder->len;
memcpy(buf, decoder->buf, len);
ret = intel_pt_get_data(decoder);
if (ret) {
decoder->pos += old_len;
return ret < 0 ? ret : -EINVAL;
}
n = INTEL_PT_PKT_MAX_SZ - len;
if (n > decoder->len)
n = decoder->len;
memcpy(buf + len, decoder->buf, n);
len += n;
ret = intel_pt_get_packet(buf, len, &decoder->packet);
if (ret < (int)old_len) {
decoder->next_buf = decoder->buf;
decoder->next_len = decoder->len;
decoder->buf = buf;
decoder->len = old_len;
return intel_pt_bad_packet(decoder);
}
decoder->next_buf = decoder->buf + (ret - old_len);
decoder->next_len = decoder->len - (ret - old_len);
decoder->buf = buf;
decoder->len = ret;
return ret;
}
static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
{
int ret;
do {
decoder->pos += decoder->pkt_step;
decoder->buf += decoder->pkt_step;
decoder->len -= decoder->pkt_step;
if (!decoder->len) {
ret = intel_pt_get_next_data(decoder);
if (ret)
return ret;
}
ret = intel_pt_get_packet(decoder->buf, decoder->len,
&decoder->packet);
if (ret == INTEL_PT_NEED_MORE_BYTES &&
decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
ret = intel_pt_get_split_packet(decoder);
if (ret < 0)
return ret;
}
if (ret <= 0)
return intel_pt_bad_packet(decoder);
decoder->pkt_len = ret;
decoder->pkt_step = ret;
intel_pt_decoder_log_packet(decoder);
} while (decoder->packet.type == INTEL_PT_PAD);
return 0;
}
static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
{
uint64_t timestamp, masked_timestamp;
timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
masked_timestamp = timestamp & decoder->period_mask;
if (decoder->continuous_period) {
if (masked_timestamp != decoder->last_masked_timestamp)
return 1;
} else {
timestamp += 1;
masked_timestamp = timestamp & decoder->period_mask;
if (masked_timestamp != decoder->last_masked_timestamp) {
decoder->last_masked_timestamp = masked_timestamp;
decoder->continuous_period = true;
}
}
return decoder->period_ticks - (timestamp - masked_timestamp);
}
static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder)
{
switch (decoder->period_type) {
case INTEL_PT_PERIOD_INSTRUCTIONS:
return decoder->period - decoder->period_insn_cnt;
case INTEL_PT_PERIOD_TICKS:
return intel_pt_next_period(decoder);
case INTEL_PT_PERIOD_NONE:
default:
return 0;
}
}
static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
{
uint64_t timestamp, masked_timestamp;
switch (decoder->period_type) {
case INTEL_PT_PERIOD_INSTRUCTIONS:
decoder->period_insn_cnt = 0;
break;
case INTEL_PT_PERIOD_TICKS:
timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
masked_timestamp = timestamp & decoder->period_mask;
decoder->last_masked_timestamp = masked_timestamp;
break;
case INTEL_PT_PERIOD_NONE:
default:
break;
}
decoder->state.type |= INTEL_PT_INSTRUCTION;
}
static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
struct intel_pt_insn *intel_pt_insn, uint64_t ip)
{
uint64_t max_insn_cnt, insn_cnt = 0;
int err;
max_insn_cnt = intel_pt_next_sample(decoder);
err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip,
max_insn_cnt, decoder->data);
decoder->timestamp_insn_cnt += insn_cnt;
decoder->period_insn_cnt += insn_cnt;
if (err) {
decoder->no_progress = 0;
decoder->pkt_state = INTEL_PT_STATE_ERR2;
intel_pt_log_at("ERROR: Failed to get instruction",
decoder->ip);
if (err == -ENOENT)
return -ENOLINK;
return -EILSEQ;
}
if (ip && decoder->ip == ip) {
err = -EAGAIN;
goto out;
}
if (max_insn_cnt && insn_cnt >= max_insn_cnt)
intel_pt_sample_insn(decoder);
if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) {
decoder->state.type = INTEL_PT_INSTRUCTION;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
decoder->ip += intel_pt_insn->length;
err = INTEL_PT_RETURN;
goto out;
}
if (intel_pt_insn->op == INTEL_PT_OP_CALL) {
/* Zero-length calls are excluded */
if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL ||
intel_pt_insn->rel) {
err = intel_pt_push(&decoder->stack, decoder->ip +
intel_pt_insn->length);
if (err)
goto out;
}
} else if (intel_pt_insn->op == INTEL_PT_OP_RET) {
decoder->ret_addr = intel_pt_pop(&decoder->stack);
}
if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) {
int cnt = decoder->no_progress++;
decoder->state.from_ip = decoder->ip;
decoder->ip += intel_pt_insn->length +
intel_pt_insn->rel;
decoder->state.to_ip = decoder->ip;
err = INTEL_PT_RETURN;
/*
* Check for being stuck in a loop. This can happen if a
* decoder error results in the decoder erroneously setting the
* ip to an address that is itself in an infinite loop that
* consumes no packets. When that happens, there must be an
* unconditional branch.
*/
if (cnt) {
if (cnt == 1) {
decoder->stuck_ip = decoder->state.to_ip;
decoder->stuck_ip_prd = 1;
decoder->stuck_ip_cnt = 1;
} else if (cnt > INTEL_PT_MAX_LOOPS ||
decoder->state.to_ip == decoder->stuck_ip) {
intel_pt_log_at("ERROR: Never-ending loop",
decoder->state.to_ip);
decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
err = -ELOOP;
goto out;
} else if (!--decoder->stuck_ip_cnt) {
decoder->stuck_ip_prd += 1;
decoder->stuck_ip_cnt = decoder->stuck_ip_prd;
decoder->stuck_ip = decoder->state.to_ip;
}
}
goto out_no_progress;
}
out:
decoder->no_progress = 0;
out_no_progress:
decoder->state.insn_op = intel_pt_insn->op;
decoder->state.insn_len = intel_pt_insn->length;
if (decoder->tx_flags & INTEL_PT_IN_TX)
decoder->state.flags |= INTEL_PT_IN_TX;
return err;
}
static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
{
struct intel_pt_insn intel_pt_insn;
uint64_t ip;
int err;
ip = decoder->last_ip;
while (1) {
err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
if (err == INTEL_PT_RETURN)
return 0;
if (err == -EAGAIN) {
if (decoder->set_fup_tx_flags) {
decoder->set_fup_tx_flags = false;
decoder->tx_flags = decoder->fup_tx_flags;
decoder->state.type = INTEL_PT_TRANSACTION;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
decoder->state.flags = decoder->fup_tx_flags;
return 0;
}
return err;
}
decoder->set_fup_tx_flags = false;
if (err)
return err;
if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
intel_pt_log_at("ERROR: Unexpected indirect branch",
decoder->ip);
decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
return -ENOENT;
}
if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
intel_pt_log_at("ERROR: Unexpected conditional branch",
decoder->ip);
decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
return -ENOENT;
}
intel_pt_bug(decoder);
}
}
static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
{
struct intel_pt_insn intel_pt_insn;
int err;
err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
if (err == INTEL_PT_RETURN)
return 0;
if (err)
return err;
if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
decoder->pge = false;
decoder->continuous_period = false;
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
if (decoder->packet.count != 0)
decoder->ip = decoder->last_ip;
} else {
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
decoder->state.from_ip = decoder->ip;
if (decoder->packet.count == 0) {
decoder->state.to_ip = 0;
} else {
decoder->state.to_ip = decoder->last_ip;
decoder->ip = decoder->last_ip;
}
}
return 0;
}
if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
decoder->ip);
decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
return -ENOENT;
}
return intel_pt_bug(decoder);
}
static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
{
struct intel_pt_insn intel_pt_insn;
int err;
while (1) {
err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
if (err == INTEL_PT_RETURN)
return 0;
if (err)
return err;
if (intel_pt_insn.op == INTEL_PT_OP_RET) {
if (!decoder->return_compression) {
intel_pt_log_at("ERROR: RET when expecting conditional branch",
decoder->ip);
decoder->pkt_state = INTEL_PT_STATE_ERR3;
return -ENOENT;
}
if (!decoder->ret_addr) {
intel_pt_log_at("ERROR: Bad RET compression (stack empty)",
decoder->ip);
decoder->pkt_state = INTEL_PT_STATE_ERR3;
return -ENOENT;
}
if (!(decoder->tnt.payload & BIT63)) {
intel_pt_log_at("ERROR: Bad RET compression (TNT=N)",
decoder->ip);
decoder->pkt_state = INTEL_PT_STATE_ERR3;
return -ENOENT;
}
decoder->tnt.count -= 1;
if (!decoder->tnt.count)
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
decoder->tnt.payload <<= 1;
decoder->state.from_ip = decoder->ip;
decoder->ip = decoder->ret_addr;
decoder->state.to_ip = decoder->ip;
return 0;
}
if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
/* Handle deferred TIPs */
err = intel_pt_get_next_packet(decoder);
if (err)
return err;
if (decoder->packet.type != INTEL_PT_TIP ||
decoder->packet.count == 0) {
intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch",
decoder->ip);
decoder->pkt_state = INTEL_PT_STATE_ERR3;
decoder->pkt_step = 0;
return -ENOENT;
}
intel_pt_set_last_ip(decoder);
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = decoder->last_ip;
decoder->ip = decoder->last_ip;
return 0;
}
if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
decoder->tnt.count -= 1;
if (!decoder->tnt.count)
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
if (decoder->tnt.payload & BIT63) {
decoder->tnt.payload <<= 1;
decoder->state.from_ip = decoder->ip;
decoder->ip += intel_pt_insn.length +
intel_pt_insn.rel;
decoder->state.to_ip = decoder->ip;
return 0;
}
/* Instruction sample for a non-taken branch */
if (decoder->state.type & INTEL_PT_INSTRUCTION) {
decoder->tnt.payload <<= 1;
decoder->state.type = INTEL_PT_INSTRUCTION;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
decoder->ip += intel_pt_insn.length;
return 0;
}
decoder->ip += intel_pt_insn.length;
if (!decoder->tnt.count)
return -EAGAIN;
decoder->tnt.payload <<= 1;
continue;
}
return intel_pt_bug(decoder);
}
}
static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
{
unsigned int fup_tx_flags;
int err;
fup_tx_flags = decoder->packet.payload &
(INTEL_PT_IN_TX | INTEL_PT_ABORT_TX);
err = intel_pt_get_next_packet(decoder);
if (err)
return err;
if (decoder->packet.type == INTEL_PT_FUP) {
decoder->fup_tx_flags = fup_tx_flags;
decoder->set_fup_tx_flags = true;
if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX))
*no_tip = true;
} else {
intel_pt_log_at("ERROR: Missing FUP after MODE.TSX",
decoder->pos);
intel_pt_update_in_tx(decoder);
}
return 0;
}
static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
{
uint64_t timestamp;
if (decoder->ref_timestamp) {
timestamp = decoder->packet.payload |
(decoder->ref_timestamp & (0xffULL << 56));
if (timestamp < decoder->ref_timestamp) {
if (decoder->ref_timestamp - timestamp > (1ULL << 55))
timestamp += (1ULL << 56);
} else {
if (timestamp - decoder->ref_timestamp > (1ULL << 55))
timestamp -= (1ULL << 56);
}
decoder->tsc_timestamp = timestamp;
decoder->timestamp = timestamp;
decoder->ref_timestamp = 0;
decoder->timestamp_insn_cnt = 0;
} else if (decoder->timestamp) {
timestamp = decoder->packet.payload |
(decoder->timestamp & (0xffULL << 56));
if (timestamp < decoder->timestamp &&
decoder->timestamp - timestamp < 0x100) {
intel_pt_log_to("ERROR: Suppressing backwards timestamp",
timestamp);
timestamp = decoder->timestamp;
}
while (timestamp < decoder->timestamp) {
intel_pt_log_to("Wraparound timestamp", timestamp);
timestamp += (1ULL << 56);
}
decoder->tsc_timestamp = timestamp;
decoder->timestamp = timestamp;
decoder->timestamp_insn_cnt = 0;
}
intel_pt_log_to("Setting timestamp", decoder->timestamp);
}
static int intel_pt_overflow(struct intel_pt_decoder *decoder)
{
intel_pt_log("ERROR: Buffer overflow\n");
intel_pt_clear_tx_flags(decoder);
decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
decoder->overflow = true;
return -EOVERFLOW;
}
/* Walk PSB+ packets when already in sync. */
static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
{
int err;
while (1) {
err = intel_pt_get_next_packet(decoder);
if (err)
return err;
switch (decoder->packet.type) {
case INTEL_PT_PSBEND:
return 0;
case INTEL_PT_TIP_PGD:
case INTEL_PT_TIP_PGE:
case INTEL_PT_TIP:
case INTEL_PT_TNT:
case INTEL_PT_BAD:
case INTEL_PT_PSB:
intel_pt_log("ERROR: Unexpected packet\n");
return -EAGAIN;
case INTEL_PT_OVF:
return intel_pt_overflow(decoder);
case INTEL_PT_TSC:
intel_pt_calc_tsc_timestamp(decoder);
break;
case INTEL_PT_CBR:
decoder->cbr = decoder->packet.payload;
break;
case INTEL_PT_MODE_EXEC:
decoder->exec_mode = decoder->packet.payload;
break;
case INTEL_PT_PIP:
decoder->cr3 = decoder->packet.payload;
break;
case INTEL_PT_FUP:
decoder->pge = true;
intel_pt_set_last_ip(decoder);
break;
case INTEL_PT_MODE_TSX:
intel_pt_update_in_tx(decoder);
break;
case INTEL_PT_PAD:
default:
break;
}
}
}
static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
{
int err;
if (decoder->tx_flags & INTEL_PT_ABORT_TX) {
decoder->tx_flags = 0;
decoder->state.flags &= ~INTEL_PT_IN_TX;
decoder->state.flags |= INTEL_PT_ABORT_TX;
} else {
decoder->state.flags |= INTEL_PT_ASYNC;
}
while (1) {
err = intel_pt_get_next_packet(decoder);
if (err)
return err;
switch (decoder->packet.type) {
case INTEL_PT_TNT:
case INTEL_PT_FUP:
case INTEL_PT_PSB:
case INTEL_PT_TSC:
case INTEL_PT_CBR:
case INTEL_PT_MODE_TSX:
case INTEL_PT_BAD:
case INTEL_PT_PSBEND:
intel_pt_log("ERROR: Missing TIP after FUP\n");
decoder->pkt_state = INTEL_PT_STATE_ERR3;
return -ENOENT;
case INTEL_PT_OVF:
return intel_pt_overflow(decoder);
case INTEL_PT_TIP_PGD:
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
if (decoder->packet.count != 0) {
intel_pt_set_ip(decoder);
intel_pt_log("Omitting PGD ip " x64_fmt "\n",
decoder->ip);
}
decoder->pge = false;
decoder->continuous_period = false;
return 0;
case INTEL_PT_TIP_PGE:
decoder->pge = true;
intel_pt_log("Omitting PGE ip " x64_fmt "\n",
decoder->ip);
decoder->state.from_ip = 0;
if (decoder->packet.count == 0) {
decoder->state.to_ip = 0;
} else {
intel_pt_set_ip(decoder);
decoder->state.to_ip = decoder->ip;
}
return 0;
case INTEL_PT_TIP:
decoder->state.from_ip = decoder->ip;
if (decoder->packet.count == 0) {
decoder->state.to_ip = 0;
} else {
intel_pt_set_ip(decoder);
decoder->state.to_ip = decoder->ip;
}
return 0;
case INTEL_PT_PIP:
decoder->cr3 = decoder->packet.payload;
break;
case INTEL_PT_MODE_EXEC:
decoder->exec_mode = decoder->packet.payload;
break;
case INTEL_PT_PAD:
break;
default:
return intel_pt_bug(decoder);
}
}
}
static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
{
bool no_tip = false;
int err;
while (1) {
err = intel_pt_get_next_packet(decoder);
if (err)
return err;
next:
switch (decoder->packet.type) {
case INTEL_PT_TNT:
if (!decoder->packet.count)
break;
decoder->tnt = decoder->packet;
decoder->pkt_state = INTEL_PT_STATE_TNT;
err = intel_pt_walk_tnt(decoder);
if (err == -EAGAIN)
break;
return err;
case INTEL_PT_TIP_PGD:
if (decoder->packet.count != 0)
intel_pt_set_last_ip(decoder);
decoder->pkt_state = INTEL_PT_STATE_TIP_PGD;
return intel_pt_walk_tip(decoder);
case INTEL_PT_TIP_PGE: {
decoder->pge = true;
if (decoder->packet.count == 0) {
intel_pt_log_at("Skipping zero TIP.PGE",
decoder->pos);
break;
}
intel_pt_set_ip(decoder);
decoder->state.from_ip = 0;
decoder->state.to_ip = decoder->ip;
return 0;
}
case INTEL_PT_OVF:
return intel_pt_overflow(decoder);
case INTEL_PT_TIP:
if (decoder->packet.count != 0)
intel_pt_set_last_ip(decoder);
decoder->pkt_state = INTEL_PT_STATE_TIP;
return intel_pt_walk_tip(decoder);
case INTEL_PT_FUP:
if (decoder->packet.count == 0) {
intel_pt_log_at("Skipping zero FUP",
decoder->pos);
no_tip = false;
break;
}
intel_pt_set_last_ip(decoder);
err = intel_pt_walk_fup(decoder);
if (err != -EAGAIN) {
if (err)
return err;
if (no_tip)
decoder->pkt_state =
INTEL_PT_STATE_FUP_NO_TIP;
else
decoder->pkt_state = INTEL_PT_STATE_FUP;
return 0;
}
if (no_tip) {
no_tip = false;
break;
}
return intel_pt_walk_fup_tip(decoder);
case INTEL_PT_PSB:
intel_pt_clear_stack(&decoder->stack);
err = intel_pt_walk_psbend(decoder);
if (err == -EAGAIN)
goto next;
if (err)
return err;
break;
case INTEL_PT_PIP:
decoder->cr3 = decoder->packet.payload;
break;
case INTEL_PT_TSC:
intel_pt_calc_tsc_timestamp(decoder);
break;
case INTEL_PT_CBR:
decoder->cbr = decoder->packet.payload;
break;
case INTEL_PT_MODE_EXEC:
decoder->exec_mode = decoder->packet.payload;
break;
case INTEL_PT_MODE_TSX:
/* MODE_TSX need not be followed by FUP */
if (!decoder->pge) {
intel_pt_update_in_tx(decoder);
break;
}
err = intel_pt_mode_tsx(decoder, &no_tip);
if (err)
return err;
goto next;
case INTEL_PT_BAD: /* Does not happen */
return intel_pt_bug(decoder);
case INTEL_PT_PSBEND:
case INTEL_PT_PAD:
break;
default:
return intel_pt_bug(decoder);
}
}
}
/* Walk PSB+ packets to get in sync. */
static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
{
int err;
while (1) {
err = intel_pt_get_next_packet(decoder);
if (err)
return err;
switch (decoder->packet.type) {
case INTEL_PT_TIP_PGD:
decoder->continuous_period = false;
case INTEL_PT_TIP_PGE:
case INTEL_PT_TIP:
intel_pt_log("ERROR: Unexpected packet\n");
return -ENOENT;
case INTEL_PT_FUP:
decoder->pge = true;
if (decoder->last_ip || decoder->packet.count == 6 ||
decoder->packet.count == 0) {
uint64_t current_ip = decoder->ip;
intel_pt_set_ip(decoder);
if (current_ip)
intel_pt_log_to("Setting IP",
decoder->ip);
}
break;
case INTEL_PT_TSC:
intel_pt_calc_tsc_timestamp(decoder);
break;
case INTEL_PT_CBR:
decoder->cbr = decoder->packet.payload;
break;
case INTEL_PT_PIP:
decoder->cr3 = decoder->packet.payload;
break;
case INTEL_PT_MODE_EXEC:
decoder->exec_mode = decoder->packet.payload;
break;
case INTEL_PT_MODE_TSX:
intel_pt_update_in_tx(decoder);
break;
case INTEL_PT_TNT:
intel_pt_log("ERROR: Unexpected packet\n");
if (decoder->ip)
decoder->pkt_state = INTEL_PT_STATE_ERR4;
else
decoder->pkt_state = INTEL_PT_STATE_ERR3;
return -ENOENT;
case INTEL_PT_BAD: /* Does not happen */
return intel_pt_bug(decoder);
case INTEL_PT_OVF:
return intel_pt_overflow(decoder);
case INTEL_PT_PSBEND:
return 0;
case INTEL_PT_PSB:
case INTEL_PT_PAD:
default:
break;
}
}
}
static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
{
int err;
while (1) {
err = intel_pt_get_next_packet(decoder);
if (err)
return err;
switch (decoder->packet.type) {
case INTEL_PT_TIP_PGD:
decoder->continuous_period = false;
case INTEL_PT_TIP_PGE:
case INTEL_PT_TIP:
decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
if (decoder->last_ip || decoder->packet.count == 6 ||
decoder->packet.count == 0)
intel_pt_set_ip(decoder);
if (decoder->ip)
return 0;
break;
case INTEL_PT_FUP:
if (decoder->overflow) {
if (decoder->last_ip ||
decoder->packet.count == 6 ||
decoder->packet.count == 0)
intel_pt_set_ip(decoder);
if (decoder->ip)
return 0;
}
if (decoder->packet.count)
intel_pt_set_last_ip(decoder);
break;
case INTEL_PT_TSC:
intel_pt_calc_tsc_timestamp(decoder);
break;
case INTEL_PT_CBR:
decoder->cbr = decoder->packet.payload;
break;
case INTEL_PT_PIP:
decoder->cr3 = decoder->packet.payload;
break;
case INTEL_PT_MODE_EXEC:
decoder->exec_mode = decoder->packet.payload;
break;
case INTEL_PT_MODE_TSX:
intel_pt_update_in_tx(decoder);
break;
case INTEL_PT_OVF:
return intel_pt_overflow(decoder);
case INTEL_PT_BAD: /* Does not happen */
return intel_pt_bug(decoder);
case INTEL_PT_PSB:
err = intel_pt_walk_psb(decoder);
if (err)
return err;
if (decoder->ip) {
/* Do not have a sample */
decoder->state.type = 0;
return 0;
}
break;
case INTEL_PT_TNT:
case INTEL_PT_PSBEND:
case INTEL_PT_PAD:
default:
break;
}
}
}
static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
{
int err;
intel_pt_log("Scanning for full IP\n");
err = intel_pt_walk_to_ip(decoder);
if (err)
return err;
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
decoder->overflow = false;
decoder->state.from_ip = 0;
decoder->state.to_ip = decoder->ip;
intel_pt_log_to("Setting IP", decoder->ip);
return 0;
}
static int intel_pt_part_psb(struct intel_pt_decoder *decoder)
{
const unsigned char *end = decoder->buf + decoder->len;
size_t i;
for (i = INTEL_PT_PSB_LEN - 1; i; i--) {
if (i > decoder->len)
continue;
if (!memcmp(end - i, INTEL_PT_PSB_STR, i))
return i;
}
return 0;
}
static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb)
{
size_t rest_psb = INTEL_PT_PSB_LEN - part_psb;
const char *psb = INTEL_PT_PSB_STR;
if (rest_psb > decoder->len ||
memcmp(decoder->buf, psb + part_psb, rest_psb))
return 0;
return rest_psb;
}
static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
int part_psb)
{
int rest_psb, ret;
decoder->pos += decoder->len;
decoder->len = 0;
ret = intel_pt_get_next_data(decoder);
if (ret)
return ret;
rest_psb = intel_pt_rest_psb(decoder, part_psb);
if (!rest_psb)
return 0;
decoder->pos -= part_psb;
decoder->next_buf = decoder->buf + rest_psb;
decoder->next_len = decoder->len - rest_psb;
memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
decoder->buf = decoder->temp_buf;
decoder->len = INTEL_PT_PSB_LEN;
return 0;
}
static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
{
unsigned char *next;
int ret;
intel_pt_log("Scanning for PSB\n");
while (1) {
if (!decoder->len) {
ret = intel_pt_get_next_data(decoder);
if (ret)
return ret;
}
next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR,
INTEL_PT_PSB_LEN);
if (!next) {
int part_psb;
part_psb = intel_pt_part_psb(decoder);
if (part_psb) {
ret = intel_pt_get_split_psb(decoder, part_psb);
if (ret)
return ret;
} else {
decoder->pos += decoder->len;
decoder->len = 0;
}
continue;
}
decoder->pkt_step = next - decoder->buf;
return intel_pt_get_next_packet(decoder);
}
}
static int intel_pt_sync(struct intel_pt_decoder *decoder)
{
int err;
decoder->pge = false;
decoder->continuous_period = false;
decoder->last_ip = 0;
decoder->ip = 0;
intel_pt_clear_stack(&decoder->stack);
err = intel_pt_scan_for_psb(decoder);
if (err)
return err;
decoder->pkt_state = INTEL_PT_STATE_NO_IP;
err = intel_pt_walk_psb(decoder);
if (err)
return err;
if (decoder->ip) {
decoder->state.type = 0; /* Do not have a sample */
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
} else {
return intel_pt_sync_ip(decoder);
}
return 0;
}
static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
{
uint64_t est = decoder->timestamp_insn_cnt << 1;
if (!decoder->cbr || !decoder->max_non_turbo_ratio)
goto out;
est *= decoder->max_non_turbo_ratio;
est /= decoder->cbr;
out:
return decoder->timestamp + est;
}
const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
{
int err;
do {
decoder->state.type = INTEL_PT_BRANCH;
decoder->state.flags = 0;
switch (decoder->pkt_state) {
case INTEL_PT_STATE_NO_PSB:
err = intel_pt_sync(decoder);
break;
case INTEL_PT_STATE_NO_IP:
decoder->last_ip = 0;
/* Fall through */
case INTEL_PT_STATE_ERR_RESYNC:
err = intel_pt_sync_ip(decoder);
break;
case INTEL_PT_STATE_IN_SYNC:
err = intel_pt_walk_trace(decoder);
break;
case INTEL_PT_STATE_TNT:
err = intel_pt_walk_tnt(decoder);
if (err == -EAGAIN)
err = intel_pt_walk_trace(decoder);
break;
case INTEL_PT_STATE_TIP:
case INTEL_PT_STATE_TIP_PGD:
err = intel_pt_walk_tip(decoder);
break;
case INTEL_PT_STATE_FUP:
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
err = intel_pt_walk_fup(decoder);
if (err == -EAGAIN)
err = intel_pt_walk_fup_tip(decoder);
else if (!err)
decoder->pkt_state = INTEL_PT_STATE_FUP;
break;
case INTEL_PT_STATE_FUP_NO_TIP:
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
err = intel_pt_walk_fup(decoder);
if (err == -EAGAIN)
err = intel_pt_walk_trace(decoder);
break;
default:
err = intel_pt_bug(decoder);
break;
}
} while (err == -ENOLINK);
decoder->state.err = err ? intel_pt_ext_err(err) : 0;
decoder->state.timestamp = decoder->timestamp;
decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
decoder->state.cr3 = decoder->cr3;
if (err)
decoder->state.from_ip = decoder->ip;
return &decoder->state;
}
static bool intel_pt_at_psb(unsigned char *buf, size_t len)
{
if (len < INTEL_PT_PSB_LEN)
return false;
return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR,
INTEL_PT_PSB_LEN);
}
/**
* intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
* @buf: pointer to buffer pointer
* @len: size of buffer
*
* Updates the buffer pointer to point to the start of the next PSB packet if
* there is one, otherwise the buffer pointer is unchanged. If @buf is updated,
* @len is adjusted accordingly.
*
* Return: %true if a PSB packet is found, %false otherwise.
*/
static bool intel_pt_next_psb(unsigned char **buf, size_t *len)
{
unsigned char *next;
next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
if (next) {
*len -= next - *buf;
*buf = next;
return true;
}
return false;
}
/**
* intel_pt_step_psb - move buffer pointer to the start of the following PSB
* packet.
* @buf: pointer to buffer pointer
* @len: size of buffer
*
* Updates the buffer pointer to point to the start of the following PSB packet
* (skipping the PSB at @buf itself) if there is one, otherwise the buffer
* pointer is unchanged. If @buf is updated, @len is adjusted accordingly.
*
* Return: %true if a PSB packet is found, %false otherwise.
*/
static bool intel_pt_step_psb(unsigned char **buf, size_t *len)
{
unsigned char *next;
if (!*len)
return false;
next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
if (next) {
*len -= next - *buf;
*buf = next;
return true;
}
return false;
}
/**
* intel_pt_last_psb - find the last PSB packet in a buffer.
* @buf: buffer
* @len: size of buffer
*
* This function finds the last PSB in a buffer.
*
* Return: A pointer to the last PSB in @buf if found, %NULL otherwise.
*/
static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
{
const char *n = INTEL_PT_PSB_STR;
unsigned char *p;
size_t k;
if (len < INTEL_PT_PSB_LEN)
return NULL;
k = len - INTEL_PT_PSB_LEN + 1;
while (1) {
p = memrchr(buf, n[0], k);
if (!p)
return NULL;
if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1))
return p;
k = p - buf;
if (!k)
return NULL;
}
}
/**
* intel_pt_next_tsc - find and return next TSC.
* @buf: buffer
* @len: size of buffer
* @tsc: TSC value returned
*
* Find a TSC packet in @buf and return the TSC value. This function assumes
* that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
* PSBEND packet is found.
*
* Return: %true if TSC is found, false otherwise.
*/
static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc)
{
struct intel_pt_pkt packet;
int ret;
while (len) {
ret = intel_pt_get_packet(buf, len, &packet);
if (ret <= 0)
return false;
if (packet.type == INTEL_PT_TSC) {
*tsc = packet.payload;
return true;
}
if (packet.type == INTEL_PT_PSBEND)
return false;
buf += ret;
len -= ret;
}
return false;
}
/**
* intel_pt_tsc_cmp - compare 7-byte TSCs.
* @tsc1: first TSC to compare
* @tsc2: second TSC to compare
*
* This function compares 7-byte TSC values allowing for the possibility that
* TSC wrapped around. Generally it is not possible to know if TSC has wrapped
* around so for that purpose this function assumes the absolute difference is
* less than half the maximum difference.
*
* Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is
* after @tsc2.
*/
static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
{
const uint64_t halfway = (1ULL << 55);
if (tsc1 == tsc2)
return 0;
if (tsc1 < tsc2) {
if (tsc2 - tsc1 < halfway)
return -1;
else
return 1;
} else {
if (tsc1 - tsc2 < halfway)
return 1;
else
return -1;
}
}
/**
* intel_pt_find_overlap_tsc - determine start of non-overlapped trace data
* using TSC.
* @buf_a: first buffer
* @len_a: size of first buffer
* @buf_b: second buffer
* @len_b: size of second buffer
*
* If the trace contains TSC we can look at the last TSC of @buf_a and the
* first TSC of @buf_b in order to determine if the buffers overlap, and then
* walk forward in @buf_b until a later TSC is found. A precondition is that
* @buf_a and @buf_b are positioned at a PSB.
*
* Return: A pointer into @buf_b from where non-overlapped data starts, or
* @buf_b + @len_b if there is no non-overlapped data.
*/
static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
size_t len_a,
unsigned char *buf_b,
size_t len_b)
{
uint64_t tsc_a, tsc_b;
unsigned char *p;
size_t len;
p = intel_pt_last_psb(buf_a, len_a);
if (!p)
return buf_b; /* No PSB in buf_a => no overlap */
len = len_a - (p - buf_a);
if (!intel_pt_next_tsc(p, len, &tsc_a)) {
/* The last PSB+ in buf_a is incomplete, so go back one more */
len_a -= len;
p = intel_pt_last_psb(buf_a, len_a);
if (!p)
return buf_b; /* No full PSB+ => assume no overlap */
len = len_a - (p - buf_a);
if (!intel_pt_next_tsc(p, len, &tsc_a))
return buf_b; /* No TSC in buf_a => assume no overlap */
}
while (1) {
/* Ignore PSB+ with no TSC */
if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) &&
intel_pt_tsc_cmp(tsc_a, tsc_b) < 0)
return buf_b; /* tsc_a < tsc_b => no overlap */
if (!intel_pt_step_psb(&buf_b, &len_b))
return buf_b + len_b; /* No PSB in buf_b => no data */
}
}
/**
* intel_pt_find_overlap - determine start of non-overlapped trace data.
* @buf_a: first buffer
* @len_a: size of first buffer
* @buf_b: second buffer
* @len_b: size of second buffer
* @have_tsc: can use TSC packets to detect overlap
*
* When trace samples or snapshots are recorded there is the possibility that
* the data overlaps. Note that, for the purposes of decoding, data is only
* useful if it begins with a PSB packet.
*
* Return: A pointer into @buf_b from where non-overlapped data starts, or
* @buf_b + @len_b if there is no non-overlapped data.
*/
unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
unsigned char *buf_b, size_t len_b,
bool have_tsc)
{
unsigned char *found;
/* Buffer 'b' must start at PSB so throw away everything before that */
if (!intel_pt_next_psb(&buf_b, &len_b))
return buf_b + len_b; /* No PSB */
if (!intel_pt_next_psb(&buf_a, &len_a))
return buf_b; /* No overlap */
if (have_tsc) {
found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b);
if (found)
return found;
}
/*
* Buffer 'b' cannot end within buffer 'a' so, for comparison purposes,
* we can ignore the first part of buffer 'a'.
*/
while (len_b < len_a) {
if (!intel_pt_step_psb(&buf_a, &len_a))
return buf_b; /* No overlap */
}
/* Now len_b >= len_a */
if (len_b > len_a) {
/* The leftover buffer 'b' must start at a PSB */
while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
if (!intel_pt_step_psb(&buf_a, &len_a))
return buf_b; /* No overlap */
}
}
while (1) {
/* Potential overlap so check the bytes */
found = memmem(buf_a, len_a, buf_b, len_a);
if (found)
return buf_b + len_a;
/* Try again at next PSB in buffer 'a' */
if (!intel_pt_step_psb(&buf_a, &len_a))
return buf_b; /* No overlap */
/* The leftover buffer 'b' must start at a PSB */
while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
if (!intel_pt_step_psb(&buf_a, &len_a))
return buf_b; /* No overlap */
}
}
}
/*
* intel_pt_decoder.h: Intel Processor Trace support
* Copyright (c) 2013-2014, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
*/
#ifndef INCLUDE__INTEL_PT_DECODER_H__
#define INCLUDE__INTEL_PT_DECODER_H__
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
#include "intel-pt-insn-decoder.h"
#define INTEL_PT_IN_TX (1 << 0)
#define INTEL_PT_ABORT_TX (1 << 1)
#define INTEL_PT_ASYNC (1 << 2)
enum intel_pt_sample_type {
INTEL_PT_BRANCH = 1 << 0,
INTEL_PT_INSTRUCTION = 1 << 1,
INTEL_PT_TRANSACTION = 1 << 2,
};
enum intel_pt_period_type {
INTEL_PT_PERIOD_NONE,
INTEL_PT_PERIOD_INSTRUCTIONS,
INTEL_PT_PERIOD_TICKS,
};
enum {
INTEL_PT_ERR_NOMEM = 1,
INTEL_PT_ERR_INTERN,
INTEL_PT_ERR_BADPKT,
INTEL_PT_ERR_NODATA,
INTEL_PT_ERR_NOINSN,
INTEL_PT_ERR_MISMAT,
INTEL_PT_ERR_OVR,
INTEL_PT_ERR_LOST,
INTEL_PT_ERR_UNK,
INTEL_PT_ERR_NELOOP,
INTEL_PT_ERR_MAX,
};
struct intel_pt_state {
enum intel_pt_sample_type type;
int err;
uint64_t from_ip;
uint64_t to_ip;
uint64_t cr3;
uint64_t timestamp;
uint64_t est_timestamp;
uint64_t trace_nr;
uint32_t flags;
enum intel_pt_insn_op insn_op;
int insn_len;
};
struct intel_pt_insn;
struct intel_pt_buffer {
const unsigned char *buf;
size_t len;
bool consecutive;
uint64_t ref_timestamp;
uint64_t trace_nr;
};
struct intel_pt_params {
int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
uint64_t max_insn_cnt, void *data);
void *data;
bool return_compression;
uint64_t period;
enum intel_pt_period_type period_type;
unsigned max_non_turbo_ratio;
};
struct intel_pt_decoder;
struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params);
void intel_pt_decoder_free(struct intel_pt_decoder *decoder);
const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
unsigned char *buf_b, size_t len_b,
bool have_tsc);
int intel_pt__strerror(int code, char *buf, size_t buflen);
#endif
/*
* intel_pt_insn_decoder.c: Intel Processor Trace support
* Copyright (c) 2013-2014, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
*/
#include <stdio.h>
#include <string.h>
#include <endian.h>
#include <byteswap.h>
#include "event.h"
#include <asm/insn.h>
#include "inat.c"
#include "insn.c"
#include "intel-pt-insn-decoder.h"
/* Based on branch_type() from perf_event_intel_lbr.c */
static void intel_pt_insn_decoder(struct insn *insn,
struct intel_pt_insn *intel_pt_insn)
{
enum intel_pt_insn_op op = INTEL_PT_OP_OTHER;
enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH;
int ext;
if (insn_is_avx(insn)) {
intel_pt_insn->op = INTEL_PT_OP_OTHER;
intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH;
intel_pt_insn->length = insn->length;
return;
}
switch (insn->opcode.bytes[0]) {
case 0xf:
switch (insn->opcode.bytes[1]) {
case 0x05: /* syscall */
case 0x34: /* sysenter */
op = INTEL_PT_OP_SYSCALL;
branch = INTEL_PT_BR_INDIRECT;
break;
case 0x07: /* sysret */
case 0x35: /* sysexit */
op = INTEL_PT_OP_SYSRET;
branch = INTEL_PT_BR_INDIRECT;
break;
case 0x80 ... 0x8f: /* jcc */
op = INTEL_PT_OP_JCC;
branch = INTEL_PT_BR_CONDITIONAL;
break;
default:
break;
}
break;
case 0x70 ... 0x7f: /* jcc */
op = INTEL_PT_OP_JCC;
branch = INTEL_PT_BR_CONDITIONAL;
break;
case 0xc2: /* near ret */
case 0xc3: /* near ret */
case 0xca: /* far ret */
case 0xcb: /* far ret */
op = INTEL_PT_OP_RET;
branch = INTEL_PT_BR_INDIRECT;
break;
case 0xcf: /* iret */
op = INTEL_PT_OP_IRET;
branch = INTEL_PT_BR_INDIRECT;
break;
case 0xcc ... 0xce: /* int */
op = INTEL_PT_OP_INT;
branch = INTEL_PT_BR_INDIRECT;
break;
case 0xe8: /* call near rel */
op = INTEL_PT_OP_CALL;
branch = INTEL_PT_BR_UNCONDITIONAL;
break;
case 0x9a: /* call far absolute */
op = INTEL_PT_OP_CALL;
branch = INTEL_PT_BR_INDIRECT;
break;
case 0xe0 ... 0xe2: /* loop */
op = INTEL_PT_OP_LOOP;
branch = INTEL_PT_BR_CONDITIONAL;
break;
case 0xe3: /* jcc */
op = INTEL_PT_OP_JCC;
branch = INTEL_PT_BR_CONDITIONAL;
break;
case 0xe9: /* jmp */
case 0xeb: /* jmp */
op = INTEL_PT_OP_JMP;
branch = INTEL_PT_BR_UNCONDITIONAL;
break;
case 0xea: /* far jmp */
op = INTEL_PT_OP_JMP;
branch = INTEL_PT_BR_INDIRECT;
break;
case 0xff: /* call near absolute, call far absolute ind */
ext = (insn->modrm.bytes[0] >> 3) & 0x7;
switch (ext) {
case 2: /* near ind call */
case 3: /* far ind call */
op = INTEL_PT_OP_CALL;
branch = INTEL_PT_BR_INDIRECT;
break;
case 4:
case 5:
op = INTEL_PT_OP_JMP;
branch = INTEL_PT_BR_INDIRECT;
break;
default:
break;
}
break;
default:
break;
}
intel_pt_insn->op = op;
intel_pt_insn->branch = branch;
intel_pt_insn->length = insn->length;
if (branch == INTEL_PT_BR_CONDITIONAL ||
branch == INTEL_PT_BR_UNCONDITIONAL) {
#if __BYTE_ORDER == __BIG_ENDIAN
switch (insn->immediate.nbytes) {
case 1:
intel_pt_insn->rel = insn->immediate.value;
break;
case 2:
intel_pt_insn->rel =
bswap_16((short)insn->immediate.value);
break;
case 4:
intel_pt_insn->rel = bswap_32(insn->immediate.value);
break;
}
#else
intel_pt_insn->rel = insn->immediate.value;
#endif
}
}
int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
struct intel_pt_insn *intel_pt_insn)
{
struct insn insn;
insn_init(&insn, buf, len, x86_64);
insn_get_length(&insn);
if (!insn_complete(&insn) || insn.length > len)
return -1;
intel_pt_insn_decoder(&insn, intel_pt_insn);
if (insn.length < INTEL_PT_INSN_DBG_BUF_SZ)
memcpy(intel_pt_insn->buf, buf, insn.length);
else
memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_DBG_BUF_SZ);
return 0;
}
const char *branch_name[] = {
[INTEL_PT_OP_OTHER] = "Other",
[INTEL_PT_OP_CALL] = "Call",
[INTEL_PT_OP_RET] = "Ret",
[INTEL_PT_OP_JCC] = "Jcc",
[INTEL_PT_OP_JMP] = "Jmp",
[INTEL_PT_OP_LOOP] = "Loop",
[INTEL_PT_OP_IRET] = "IRet",
[INTEL_PT_OP_INT] = "Int",
[INTEL_PT_OP_SYSCALL] = "Syscall",
[INTEL_PT_OP_SYSRET] = "Sysret",
};
const char *intel_pt_insn_name(enum intel_pt_insn_op op)
{
return branch_name[op];
}
int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
size_t buf_len)
{
switch (intel_pt_insn->branch) {
case INTEL_PT_BR_CONDITIONAL:
case INTEL_PT_BR_UNCONDITIONAL:
return snprintf(buf, buf_len, "%s %s%d",
intel_pt_insn_name(intel_pt_insn->op),
intel_pt_insn->rel > 0 ? "+" : "",
intel_pt_insn->rel);
case INTEL_PT_BR_NO_BRANCH:
case INTEL_PT_BR_INDIRECT:
return snprintf(buf, buf_len, "%s",
intel_pt_insn_name(intel_pt_insn->op));
default:
break;
}
return 0;
}
size_t intel_pt_insn_max_size(void)
{
return MAX_INSN_SIZE;
}
int intel_pt_insn_type(enum intel_pt_insn_op op)
{
switch (op) {
case INTEL_PT_OP_OTHER:
return 0;
case INTEL_PT_OP_CALL:
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL;
case INTEL_PT_OP_RET:
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN;
case INTEL_PT_OP_JCC:
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
case INTEL_PT_OP_JMP:
return PERF_IP_FLAG_BRANCH;
case INTEL_PT_OP_LOOP:
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
case INTEL_PT_OP_IRET:
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
PERF_IP_FLAG_INTERRUPT;
case INTEL_PT_OP_INT:
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
PERF_IP_FLAG_INTERRUPT;
case INTEL_PT_OP_SYSCALL:
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
PERF_IP_FLAG_SYSCALLRET;
case INTEL_PT_OP_SYSRET:
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
PERF_IP_FLAG_SYSCALLRET;
default:
return 0;
}
}
/*
* intel_pt_insn_decoder.h: Intel Processor Trace support
* Copyright (c) 2013-2014, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
*/
#ifndef INCLUDE__INTEL_PT_INSN_DECODER_H__
#define INCLUDE__INTEL_PT_INSN_DECODER_H__
#include <stddef.h>
#include <stdint.h>
#define INTEL_PT_INSN_DESC_MAX 32
#define INTEL_PT_INSN_DBG_BUF_SZ 16
enum intel_pt_insn_op {
INTEL_PT_OP_OTHER,
INTEL_PT_OP_CALL,
INTEL_PT_OP_RET,
INTEL_PT_OP_JCC,
INTEL_PT_OP_JMP,
INTEL_PT_OP_LOOP,
INTEL_PT_OP_IRET,
INTEL_PT_OP_INT,
INTEL_PT_OP_SYSCALL,
INTEL_PT_OP_SYSRET,
};
enum intel_pt_insn_branch {
INTEL_PT_BR_NO_BRANCH,
INTEL_PT_BR_INDIRECT,
INTEL_PT_BR_CONDITIONAL,
INTEL_PT_BR_UNCONDITIONAL,
};
struct intel_pt_insn {
enum intel_pt_insn_op op;
enum intel_pt_insn_branch branch;
int length;
int32_t rel;
unsigned char buf[INTEL_PT_INSN_DBG_BUF_SZ];
};
int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
struct intel_pt_insn *intel_pt_insn);
const char *intel_pt_insn_name(enum intel_pt_insn_op op);
int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
size_t buf_len);
size_t intel_pt_insn_max_size(void);
int intel_pt_insn_type(enum intel_pt_insn_op op);
#endif
/*
* intel_pt_log.c: Intel Processor Trace support
* Copyright (c) 2013-2014, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
*/
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include <stdarg.h>
#include <stdbool.h>
#include <string.h>
#include "intel-pt-log.h"
#include "intel-pt-insn-decoder.h"
#include "intel-pt-pkt-decoder.h"
#define MAX_LOG_NAME 256
static FILE *f;
static char log_name[MAX_LOG_NAME];
static bool enable_logging;
void intel_pt_log_enable(void)
{
enable_logging = true;
}
void intel_pt_log_disable(void)
{
if (f)
fflush(f);
enable_logging = false;
}
void intel_pt_log_set_name(const char *name)
{
strncpy(log_name, name, MAX_LOG_NAME - 5);
strcat(log_name, ".log");
}
static void intel_pt_print_data(const unsigned char *buf, int len, uint64_t pos,
int indent)
{
int i;
for (i = 0; i < indent; i++)
fprintf(f, " ");
fprintf(f, " %08" PRIx64 ": ", pos);
for (i = 0; i < len; i++)
fprintf(f, " %02x", buf[i]);
for (; i < 16; i++)
fprintf(f, " ");
fprintf(f, " ");
}
static void intel_pt_print_no_data(uint64_t pos, int indent)
{
int i;
for (i = 0; i < indent; i++)
fprintf(f, " ");
fprintf(f, " %08" PRIx64 ": ", pos);
for (i = 0; i < 16; i++)
fprintf(f, " ");
fprintf(f, " ");
}
static int intel_pt_log_open(void)
{
if (!enable_logging)
return -1;
if (f)
return 0;
if (!log_name[0])
return -1;
f = fopen(log_name, "w+");
if (!f) {
enable_logging = false;
return -1;
}
return 0;
}
void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
uint64_t pos, const unsigned char *buf)
{
char desc[INTEL_PT_PKT_DESC_MAX];
if (intel_pt_log_open())
return;
intel_pt_print_data(buf, pkt_len, pos, 0);
intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX);
fprintf(f, "%s\n", desc);
}
void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
{
char desc[INTEL_PT_INSN_DESC_MAX];
size_t len = intel_pt_insn->length;
if (intel_pt_log_open())
return;
if (len > INTEL_PT_INSN_DBG_BUF_SZ)
len = INTEL_PT_INSN_DBG_BUF_SZ;
intel_pt_print_data(intel_pt_insn->buf, len, ip, 8);
if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
fprintf(f, "%s\n", desc);
else
fprintf(f, "Bad instruction!\n");
}
void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
{
char desc[INTEL_PT_INSN_DESC_MAX];
if (intel_pt_log_open())
return;
intel_pt_print_no_data(ip, 8);
if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
fprintf(f, "%s\n", desc);
else
fprintf(f, "Bad instruction!\n");
}
void intel_pt_log(const char *fmt, ...)
{
va_list args;
if (intel_pt_log_open())
return;
va_start(args, fmt);
vfprintf(f, fmt, args);
va_end(args);
}
/*
* intel_pt_log.h: Intel Processor Trace support
* Copyright (c) 2013-2014, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
*/
#ifndef INCLUDE__INTEL_PT_LOG_H__
#define INCLUDE__INTEL_PT_LOG_H__
#include <stdint.h>
#include <inttypes.h>
struct intel_pt_pkt;
void intel_pt_log_enable(void);
void intel_pt_log_disable(void);
void intel_pt_log_set_name(const char *name);
void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
uint64_t pos, const unsigned char *buf);
struct intel_pt_insn;
void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip);
void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
uint64_t ip);
__attribute__((format(printf, 1, 2)))
void intel_pt_log(const char *fmt, ...);
#define x64_fmt "0x%" PRIx64
static inline void intel_pt_log_at(const char *msg, uint64_t u)
{
intel_pt_log("%s at " x64_fmt "\n", msg, u);
}
static inline void intel_pt_log_to(const char *msg, uint64_t u)
{
intel_pt_log("%s to " x64_fmt "\n", msg, u);
}
#endif
/*
* intel_pt_pkt_decoder.c: Intel Processor Trace support
* Copyright (c) 2013-2014, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
*/
#include <stdio.h>
#include <string.h>
#include <endian.h>
#include <byteswap.h>
#include "intel-pt-pkt-decoder.h"
#define BIT(n) (1 << (n))
#define BIT63 ((uint64_t)1 << 63)
#if __BYTE_ORDER == __BIG_ENDIAN
#define le16_to_cpu bswap_16
#define le32_to_cpu bswap_32
#define le64_to_cpu bswap_64
#define memcpy_le64(d, s, n) do { \
memcpy((d), (s), (n)); \
*(d) = le64_to_cpu(*(d)); \
} while (0)
#else
#define le16_to_cpu
#define le32_to_cpu
#define le64_to_cpu
#define memcpy_le64 memcpy
#endif
static const char * const packet_name[] = {
[INTEL_PT_BAD] = "Bad Packet!",
[INTEL_PT_PAD] = "PAD",
[INTEL_PT_TNT] = "TNT",
[INTEL_PT_TIP_PGD] = "TIP.PGD",
[INTEL_PT_TIP_PGE] = "TIP.PGE",
[INTEL_PT_TSC] = "TSC",
[INTEL_PT_MODE_EXEC] = "MODE.Exec",
[INTEL_PT_MODE_TSX] = "MODE.TSX",
[INTEL_PT_TIP] = "TIP",
[INTEL_PT_FUP] = "FUP",
[INTEL_PT_PSB] = "PSB",
[INTEL_PT_PSBEND] = "PSBEND",
[INTEL_PT_CBR] = "CBR",
[INTEL_PT_PIP] = "PIP",
[INTEL_PT_OVF] = "OVF",
};
const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
{
return packet_name[type];
}
static int intel_pt_get_long_tnt(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
uint64_t payload;
int count;
if (len < 8)
return INTEL_PT_NEED_MORE_BYTES;
payload = le64_to_cpu(*(uint64_t *)buf);
for (count = 47; count; count--) {
if (payload & BIT63)
break;
payload <<= 1;
}
packet->type = INTEL_PT_TNT;
packet->count = count;
packet->payload = payload << 1;
return 8;
}
static int intel_pt_get_pip(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
uint64_t payload = 0;
if (len < 8)
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_PIP;
memcpy_le64(&payload, buf + 2, 6);
packet->payload = payload >> 1;
return 8;
}
static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
if (len < 4)
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_CBR;
packet->payload = buf[2];
return 4;
}
static int intel_pt_get_ovf(struct intel_pt_pkt *packet)
{
packet->type = INTEL_PT_OVF;
return 2;
}
static int intel_pt_get_psb(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
int i;
if (len < 16)
return INTEL_PT_NEED_MORE_BYTES;
for (i = 2; i < 16; i += 2) {
if (buf[i] != 2 || buf[i + 1] != 0x82)
return INTEL_PT_BAD_PACKET;
}
packet->type = INTEL_PT_PSB;
return 16;
}
static int intel_pt_get_psbend(struct intel_pt_pkt *packet)
{
packet->type = INTEL_PT_PSBEND;
return 2;
}
static int intel_pt_get_pad(struct intel_pt_pkt *packet)
{
packet->type = INTEL_PT_PAD;
return 1;
}
static int intel_pt_get_ext(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
if (len < 2)
return INTEL_PT_NEED_MORE_BYTES;
switch (buf[1]) {
case 0xa3: /* Long TNT */
return intel_pt_get_long_tnt(buf, len, packet);
case 0x43: /* PIP */
return intel_pt_get_pip(buf, len, packet);
case 0x03: /* CBR */
return intel_pt_get_cbr(buf, len, packet);
case 0xf3: /* OVF */
return intel_pt_get_ovf(packet);
case 0x82: /* PSB */
return intel_pt_get_psb(buf, len, packet);
case 0x23: /* PSBEND */
return intel_pt_get_psbend(packet);
default:
return INTEL_PT_BAD_PACKET;
}
}
static int intel_pt_get_short_tnt(unsigned int byte,
struct intel_pt_pkt *packet)
{
int count;
for (count = 6; count; count--) {
if (byte & BIT(7))
break;
byte <<= 1;
}
packet->type = INTEL_PT_TNT;
packet->count = count;
packet->payload = (uint64_t)byte << 57;
return 1;
}
static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte,
const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
switch (byte >> 5) {
case 0:
packet->count = 0;
break;
case 1:
if (len < 3)
return INTEL_PT_NEED_MORE_BYTES;
packet->count = 2;
packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
break;
case 2:
if (len < 5)
return INTEL_PT_NEED_MORE_BYTES;
packet->count = 4;
packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1));
break;
case 3:
case 6:
if (len < 7)
return INTEL_PT_NEED_MORE_BYTES;
packet->count = 6;
memcpy_le64(&packet->payload, buf + 1, 6);
break;
default:
return INTEL_PT_BAD_PACKET;
}
packet->type = type;
return packet->count + 1;
}
static int intel_pt_get_mode(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
if (len < 2)
return INTEL_PT_NEED_MORE_BYTES;
switch (buf[1] >> 5) {
case 0:
packet->type = INTEL_PT_MODE_EXEC;
switch (buf[1] & 3) {
case 0:
packet->payload = 16;
break;
case 1:
packet->payload = 64;
break;
case 2:
packet->payload = 32;
break;
default:
return INTEL_PT_BAD_PACKET;
}
break;
case 1:
packet->type = INTEL_PT_MODE_TSX;
if ((buf[1] & 3) == 3)
return INTEL_PT_BAD_PACKET;
packet->payload = buf[1] & 3;
break;
default:
return INTEL_PT_BAD_PACKET;
}
return 2;
}
static int intel_pt_get_tsc(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
if (len < 8)
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_TSC;
memcpy_le64(&packet->payload, buf + 1, 7);
return 8;
}
static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
unsigned int byte;
memset(packet, 0, sizeof(struct intel_pt_pkt));
if (!len)
return INTEL_PT_NEED_MORE_BYTES;
byte = buf[0];
if (!(byte & BIT(0))) {
if (byte == 0)
return intel_pt_get_pad(packet);
if (byte == 2)
return intel_pt_get_ext(buf, len, packet);
return intel_pt_get_short_tnt(byte, packet);
}
switch (byte & 0x1f) {
case 0x0D:
return intel_pt_get_ip(INTEL_PT_TIP, byte, buf, len, packet);
case 0x11:
return intel_pt_get_ip(INTEL_PT_TIP_PGE, byte, buf, len,
packet);
case 0x01:
return intel_pt_get_ip(INTEL_PT_TIP_PGD, byte, buf, len,
packet);
case 0x1D:
return intel_pt_get_ip(INTEL_PT_FUP, byte, buf, len, packet);
case 0x19:
switch (byte) {
case 0x99:
return intel_pt_get_mode(buf, len, packet);
case 0x19:
return intel_pt_get_tsc(buf, len, packet);
default:
return INTEL_PT_BAD_PACKET;
}
default:
return INTEL_PT_BAD_PACKET;
}
}
int intel_pt_get_packet(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
int ret;
ret = intel_pt_do_get_packet(buf, len, packet);
if (ret > 0) {
while (ret < 8 && len > (size_t)ret && !buf[ret])
ret += 1;
}
return ret;
}
int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
size_t buf_len)
{
int ret, i;
unsigned long long payload = packet->payload;
const char *name = intel_pt_pkt_name(packet->type);
switch (packet->type) {
case INTEL_PT_BAD:
case INTEL_PT_PAD:
case INTEL_PT_PSB:
case INTEL_PT_PSBEND:
case INTEL_PT_OVF:
return snprintf(buf, buf_len, "%s", name);
case INTEL_PT_TNT: {
size_t blen = buf_len;
ret = snprintf(buf, blen, "%s ", name);
if (ret < 0)
return ret;
buf += ret;
blen -= ret;
for (i = 0; i < packet->count; i++) {
if (payload & BIT63)
ret = snprintf(buf, blen, "T");
else
ret = snprintf(buf, blen, "N");
if (ret < 0)
return ret;
buf += ret;
blen -= ret;
payload <<= 1;
}
ret = snprintf(buf, blen, " (%d)", packet->count);
if (ret < 0)
return ret;
blen -= ret;
return buf_len - blen;
}
case INTEL_PT_TIP_PGD:
case INTEL_PT_TIP_PGE:
case INTEL_PT_TIP:
case INTEL_PT_FUP:
if (!(packet->count))
return snprintf(buf, buf_len, "%s no ip", name);
case INTEL_PT_CBR:
return snprintf(buf, buf_len, "%s 0x%llx", name, payload);
case INTEL_PT_TSC:
if (packet->count)
return snprintf(buf, buf_len,
"%s 0x%llx CTC 0x%x FC 0x%x",
name, payload, packet->count & 0xffff,
(packet->count >> 16) & 0x1ff);
else
return snprintf(buf, buf_len, "%s 0x%llx",
name, payload);
case INTEL_PT_MODE_EXEC:
return snprintf(buf, buf_len, "%s %lld", name, payload);
case INTEL_PT_MODE_TSX:
return snprintf(buf, buf_len, "%s TXAbort:%u InTX:%u",
name, (unsigned)(payload >> 1) & 1,
(unsigned)payload & 1);
case INTEL_PT_PIP:
ret = snprintf(buf, buf_len, "%s 0x%llx",
name, payload);
return ret;
default:
break;
}
return snprintf(buf, buf_len, "%s 0x%llx (%d)",
name, payload, packet->count);
}
/*
* intel_pt_pkt_decoder.h: Intel Processor Trace support
* Copyright (c) 2013-2014, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
*/
#ifndef INCLUDE__INTEL_PT_PKT_DECODER_H__
#define INCLUDE__INTEL_PT_PKT_DECODER_H__
#include <stddef.h>
#include <stdint.h>
#define INTEL_PT_PKT_DESC_MAX 256
#define INTEL_PT_NEED_MORE_BYTES -1
#define INTEL_PT_BAD_PACKET -2
#define INTEL_PT_PSB_STR "\002\202\002\202\002\202\002\202" \
"\002\202\002\202\002\202\002\202"
#define INTEL_PT_PSB_LEN 16
#define INTEL_PT_PKT_MAX_SZ 16
enum intel_pt_pkt_type {
INTEL_PT_BAD,
INTEL_PT_PAD,
INTEL_PT_TNT,
INTEL_PT_TIP_PGD,
INTEL_PT_TIP_PGE,
INTEL_PT_TSC,
INTEL_PT_MODE_EXEC,
INTEL_PT_MODE_TSX,
INTEL_PT_TIP,
INTEL_PT_FUP,
INTEL_PT_PSB,
INTEL_PT_PSBEND,
INTEL_PT_CBR,
INTEL_PT_PIP,
INTEL_PT_OVF,
};
struct intel_pt_pkt {
enum intel_pt_pkt_type type;
int count;
uint64_t payload;
};
const char *intel_pt_pkt_name(enum intel_pt_pkt_type);
int intel_pt_get_packet(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet);
int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len);
#endif
# x86 Opcode Maps
#
# This is (mostly) based on following documentations.
# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C
# (#326018-047US, June 2013)
#
#<Opcode maps>
# Table: table-name
# Referrer: escaped-name
# AVXcode: avx-code
# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
# (or)
# opcode: escape # escaped-name
# EndTable
#
#<group maps>
# GrpTable: GrpXXX
# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
# EndTable
#
# AVX Superscripts
# (v): this opcode requires VEX prefix.
# (v1): this opcode only supports 128bit VEX.
#
# Last Prefix Superscripts
# - (66): the last prefix is 0x66
# - (F3): the last prefix is 0xF3
# - (F2): the last prefix is 0xF2
# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
# - (66&F2): Both 0x66 and 0xF2 prefixes are specified.
Table: one byte opcode
Referrer:
AVXcode:
# 0x00 - 0x0f
00: ADD Eb,Gb
01: ADD Ev,Gv
02: ADD Gb,Eb
03: ADD Gv,Ev
04: ADD AL,Ib
05: ADD rAX,Iz
06: PUSH ES (i64)
07: POP ES (i64)
08: OR Eb,Gb
09: OR Ev,Gv
0a: OR Gb,Eb
0b: OR Gv,Ev
0c: OR AL,Ib
0d: OR rAX,Iz
0e: PUSH CS (i64)
0f: escape # 2-byte escape
# 0x10 - 0x1f
10: ADC Eb,Gb
11: ADC Ev,Gv
12: ADC Gb,Eb
13: ADC Gv,Ev
14: ADC AL,Ib
15: ADC rAX,Iz
16: PUSH SS (i64)
17: POP SS (i64)
18: SBB Eb,Gb
19: SBB Ev,Gv
1a: SBB Gb,Eb
1b: SBB Gv,Ev
1c: SBB AL,Ib
1d: SBB rAX,Iz
1e: PUSH DS (i64)
1f: POP DS (i64)
# 0x20 - 0x2f
20: AND Eb,Gb
21: AND Ev,Gv
22: AND Gb,Eb
23: AND Gv,Ev
24: AND AL,Ib
25: AND rAx,Iz
26: SEG=ES (Prefix)
27: DAA (i64)
28: SUB Eb,Gb
29: SUB Ev,Gv
2a: SUB Gb,Eb
2b: SUB Gv,Ev
2c: SUB AL,Ib
2d: SUB rAX,Iz
2e: SEG=CS (Prefix)
2f: DAS (i64)
# 0x30 - 0x3f
30: XOR Eb,Gb
31: XOR Ev,Gv
32: XOR Gb,Eb
33: XOR Gv,Ev
34: XOR AL,Ib
35: XOR rAX,Iz
36: SEG=SS (Prefix)
37: AAA (i64)
38: CMP Eb,Gb
39: CMP Ev,Gv
3a: CMP Gb,Eb
3b: CMP Gv,Ev
3c: CMP AL,Ib
3d: CMP rAX,Iz
3e: SEG=DS (Prefix)
3f: AAS (i64)
# 0x40 - 0x4f
40: INC eAX (i64) | REX (o64)
41: INC eCX (i64) | REX.B (o64)
42: INC eDX (i64) | REX.X (o64)
43: INC eBX (i64) | REX.XB (o64)
44: INC eSP (i64) | REX.R (o64)
45: INC eBP (i64) | REX.RB (o64)
46: INC eSI (i64) | REX.RX (o64)
47: INC eDI (i64) | REX.RXB (o64)
48: DEC eAX (i64) | REX.W (o64)
49: DEC eCX (i64) | REX.WB (o64)
4a: DEC eDX (i64) | REX.WX (o64)
4b: DEC eBX (i64) | REX.WXB (o64)
4c: DEC eSP (i64) | REX.WR (o64)
4d: DEC eBP (i64) | REX.WRB (o64)
4e: DEC eSI (i64) | REX.WRX (o64)
4f: DEC eDI (i64) | REX.WRXB (o64)
# 0x50 - 0x5f
50: PUSH rAX/r8 (d64)
51: PUSH rCX/r9 (d64)
52: PUSH rDX/r10 (d64)
53: PUSH rBX/r11 (d64)
54: PUSH rSP/r12 (d64)
55: PUSH rBP/r13 (d64)
56: PUSH rSI/r14 (d64)
57: PUSH rDI/r15 (d64)
58: POP rAX/r8 (d64)
59: POP rCX/r9 (d64)
5a: POP rDX/r10 (d64)
5b: POP rBX/r11 (d64)
5c: POP rSP/r12 (d64)
5d: POP rBP/r13 (d64)
5e: POP rSI/r14 (d64)
5f: POP rDI/r15 (d64)
# 0x60 - 0x6f
60: PUSHA/PUSHAD (i64)
61: POPA/POPAD (i64)
62: BOUND Gv,Ma (i64)
63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
64: SEG=FS (Prefix)
65: SEG=GS (Prefix)
66: Operand-Size (Prefix)
67: Address-Size (Prefix)
68: PUSH Iz (d64)
69: IMUL Gv,Ev,Iz
6a: PUSH Ib (d64)
6b: IMUL Gv,Ev,Ib
6c: INS/INSB Yb,DX
6d: INS/INSW/INSD Yz,DX
6e: OUTS/OUTSB DX,Xb
6f: OUTS/OUTSW/OUTSD DX,Xz
# 0x70 - 0x7f
70: JO Jb
71: JNO Jb
72: JB/JNAE/JC Jb
73: JNB/JAE/JNC Jb
74: JZ/JE Jb
75: JNZ/JNE Jb
76: JBE/JNA Jb
77: JNBE/JA Jb
78: JS Jb
79: JNS Jb
7a: JP/JPE Jb
7b: JNP/JPO Jb
7c: JL/JNGE Jb
7d: JNL/JGE Jb
7e: JLE/JNG Jb
7f: JNLE/JG Jb
# 0x80 - 0x8f
80: Grp1 Eb,Ib (1A)
81: Grp1 Ev,Iz (1A)
82: Grp1 Eb,Ib (1A),(i64)
83: Grp1 Ev,Ib (1A)
84: TEST Eb,Gb
85: TEST Ev,Gv
86: XCHG Eb,Gb
87: XCHG Ev,Gv
88: MOV Eb,Gb
89: MOV Ev,Gv
8a: MOV Gb,Eb
8b: MOV Gv,Ev
8c: MOV Ev,Sw
8d: LEA Gv,M
8e: MOV Sw,Ew
8f: Grp1A (1A) | POP Ev (d64)
# 0x90 - 0x9f
90: NOP | PAUSE (F3) | XCHG r8,rAX
91: XCHG rCX/r9,rAX
92: XCHG rDX/r10,rAX
93: XCHG rBX/r11,rAX
94: XCHG rSP/r12,rAX
95: XCHG rBP/r13,rAX
96: XCHG rSI/r14,rAX
97: XCHG rDI/r15,rAX
98: CBW/CWDE/CDQE
99: CWD/CDQ/CQO
9a: CALLF Ap (i64)
9b: FWAIT/WAIT
9c: PUSHF/D/Q Fv (d64)
9d: POPF/D/Q Fv (d64)
9e: SAHF
9f: LAHF
# 0xa0 - 0xaf
a0: MOV AL,Ob
a1: MOV rAX,Ov
a2: MOV Ob,AL
a3: MOV Ov,rAX
a4: MOVS/B Yb,Xb
a5: MOVS/W/D/Q Yv,Xv
a6: CMPS/B Xb,Yb
a7: CMPS/W/D Xv,Yv
a8: TEST AL,Ib
a9: TEST rAX,Iz
aa: STOS/B Yb,AL
ab: STOS/W/D/Q Yv,rAX
ac: LODS/B AL,Xb
ad: LODS/W/D/Q rAX,Xv
ae: SCAS/B AL,Yb
# Note: The May 2011 Intel manual shows Xv for the second parameter of the
# next instruction but Yv is correct
af: SCAS/W/D/Q rAX,Yv
# 0xb0 - 0xbf
b0: MOV AL/R8L,Ib
b1: MOV CL/R9L,Ib
b2: MOV DL/R10L,Ib
b3: MOV BL/R11L,Ib
b4: MOV AH/R12L,Ib
b5: MOV CH/R13L,Ib
b6: MOV DH/R14L,Ib
b7: MOV BH/R15L,Ib
b8: MOV rAX/r8,Iv
b9: MOV rCX/r9,Iv
ba: MOV rDX/r10,Iv
bb: MOV rBX/r11,Iv
bc: MOV rSP/r12,Iv
bd: MOV rBP/r13,Iv
be: MOV rSI/r14,Iv
bf: MOV rDI/r15,Iv
# 0xc0 - 0xcf
c0: Grp2 Eb,Ib (1A)
c1: Grp2 Ev,Ib (1A)
c2: RETN Iw (f64)
c3: RETN
c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
c6: Grp11A Eb,Ib (1A)
c7: Grp11B Ev,Iz (1A)
c8: ENTER Iw,Ib
c9: LEAVE (d64)
ca: RETF Iw
cb: RETF
cc: INT3
cd: INT Ib
ce: INTO (i64)
cf: IRET/D/Q
# 0xd0 - 0xdf
d0: Grp2 Eb,1 (1A)
d1: Grp2 Ev,1 (1A)
d2: Grp2 Eb,CL (1A)
d3: Grp2 Ev,CL (1A)
d4: AAM Ib (i64)
d5: AAD Ib (i64)
d6:
d7: XLAT/XLATB
d8: ESC
d9: ESC
da: ESC
db: ESC
dc: ESC
dd: ESC
de: ESC
df: ESC
# 0xe0 - 0xef
# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
e0: LOOPNE/LOOPNZ Jb (f64)
e1: LOOPE/LOOPZ Jb (f64)
e2: LOOP Jb (f64)
e3: JrCXZ Jb (f64)
e4: IN AL,Ib
e5: IN eAX,Ib
e6: OUT Ib,AL
e7: OUT Ib,eAX
# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
# in "near" jumps and calls is 16-bit. For CALL,
# push of return address is 16-bit wide, RSP is decremented by 2
# but is not truncated to 16 bits, unlike RIP.
e8: CALL Jz (f64)
e9: JMP-near Jz (f64)
ea: JMP-far Ap (i64)
eb: JMP-short Jb (f64)
ec: IN AL,DX
ed: IN eAX,DX
ee: OUT DX,AL
ef: OUT DX,eAX
# 0xf0 - 0xff
f0: LOCK (Prefix)
f1:
f2: REPNE (Prefix) | XACQUIRE (Prefix)
f3: REP/REPE (Prefix) | XRELEASE (Prefix)
f4: HLT
f5: CMC
f6: Grp3_1 Eb (1A)
f7: Grp3_2 Ev (1A)
f8: CLC
f9: STC
fa: CLI
fb: STI
fc: CLD
fd: STD
fe: Grp4 (1A)
ff: Grp5 (1A)
EndTable
Table: 2-byte opcode (0x0f)
Referrer: 2-byte escape
AVXcode: 1
# 0x0f 0x00-0x0f
00: Grp6 (1A)
01: Grp7 (1A)
02: LAR Gv,Ew
03: LSL Gv,Ew
04:
05: SYSCALL (o64)
06: CLTS
07: SYSRET (o64)
08: INVD
09: WBINVD
0a:
0b: UD2 (1B)
0c:
# AMD's prefetch group. Intel supports prefetchw(/1) only.
0d: GrpP
0e: FEMMS
# 3DNow! uses the last imm byte as opcode extension.
0f: 3DNow! Pq,Qq,Ib
# 0x0f 0x10-0x1f
# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands
# but it actually has operands. And also, vmovss and vmovsd only accept 128bit.
# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form.
# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming
# Reference A.1
10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1)
11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1)
12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2)
13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1)
14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66)
15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66)
16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3)
17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
18: Grp16 (1A)
19:
1a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv
1b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv
1c:
1d:
1e:
1f: NOP Ev
# 0x0f 0x20-0x2f
20: MOV Rd,Cd
21: MOV Rd,Dd
22: MOV Cd,Rd
23: MOV Dd,Rd
24:
25:
26:
27:
28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66)
29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66)
2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1)
2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66)
2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1)
2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1)
2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1)
2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1)
# 0x0f 0x30-0x3f
30: WRMSR
31: RDTSC
32: RDMSR
33: RDPMC
34: SYSENTER
35: SYSEXIT
36:
37: GETSEC
38: escape # 3-byte escape 1
39:
3a: escape # 3-byte escape 2
3b:
3c:
3d:
3e:
3f:
# 0x0f 0x40-0x4f
40: CMOVO Gv,Ev
41: CMOVNO Gv,Ev
42: CMOVB/C/NAE Gv,Ev
43: CMOVAE/NB/NC Gv,Ev
44: CMOVE/Z Gv,Ev
45: CMOVNE/NZ Gv,Ev
46: CMOVBE/NA Gv,Ev
47: CMOVA/NBE Gv,Ev
48: CMOVS Gv,Ev
49: CMOVNS Gv,Ev
4a: CMOVP/PE Gv,Ev
4b: CMOVNP/PO Gv,Ev
4c: CMOVL/NGE Gv,Ev
4d: CMOVNL/GE Gv,Ev
4e: CMOVLE/NG Gv,Ev
4f: CMOVNLE/G Gv,Ev
# 0x0f 0x50-0x5f
50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66)
51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1)
52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1)
53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1)
54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66)
55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66)
56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66)
57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66)
58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1)
# 0x0f 0x60-0x6f
60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1)
61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1)
62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1)
63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1)
64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1)
65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1)
66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1)
67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1)
68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1)
69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1)
6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1)
6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1)
6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3)
# 0x0f 0x70-0x7f
70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
71: Grp12 (1A)
72: Grp13 (1A)
73: Grp14 (1A)
74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1)
75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1)
76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
77: emms | vzeroupper | vzeroall
78: VMREAD Ey,Gy
79: VMWRITE Gy,Ey
7a:
7b:
7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
# 0x0f 0x80-0x8f
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
80: JO Jz (f64)
81: JNO Jz (f64)
82: JB/JC/JNAE Jz (f64)
83: JAE/JNB/JNC Jz (f64)
84: JE/JZ Jz (f64)
85: JNE/JNZ Jz (f64)
86: JBE/JNA Jz (f64)
87: JA/JNBE Jz (f64)
88: JS Jz (f64)
89: JNS Jz (f64)
8a: JP/JPE Jz (f64)
8b: JNP/JPO Jz (f64)
8c: JL/JNGE Jz (f64)
8d: JNL/JGE Jz (f64)
8e: JLE/JNG Jz (f64)
8f: JNLE/JG Jz (f64)
# 0x0f 0x90-0x9f
90: SETO Eb
91: SETNO Eb
92: SETB/C/NAE Eb
93: SETAE/NB/NC Eb
94: SETE/Z Eb
95: SETNE/NZ Eb
96: SETBE/NA Eb
97: SETA/NBE Eb
98: SETS Eb
99: SETNS Eb
9a: SETP/PE Eb
9b: SETNP/PO Eb
9c: SETL/NGE Eb
9d: SETNL/GE Eb
9e: SETLE/NG Eb
9f: SETNLE/G Eb
# 0x0f 0xa0-0xaf
a0: PUSH FS (d64)
a1: POP FS (d64)
a2: CPUID
a3: BT Ev,Gv
a4: SHLD Ev,Gv,Ib
a5: SHLD Ev,Gv,CL
a6: GrpPDLK
a7: GrpRNG
a8: PUSH GS (d64)
a9: POP GS (d64)
aa: RSM
ab: BTS Ev,Gv
ac: SHRD Ev,Gv,Ib
ad: SHRD Ev,Gv,CL
ae: Grp15 (1A),(1C)
af: IMUL Gv,Ev
# 0x0f 0xb0-0xbf
b0: CMPXCHG Eb,Gb
b1: CMPXCHG Ev,Gv
b2: LSS Gv,Mp
b3: BTR Ev,Gv
b4: LFS Gv,Mp
b5: LGS Gv,Mp
b6: MOVZX Gv,Eb
b7: MOVZX Gv,Ew
b8: JMPE (!F3) | POPCNT Gv,Ev (F3)
b9: Grp10 (1A)
ba: Grp8 Ev,Ib (1A)
bb: BTC Ev,Gv
bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3)
bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3)
be: MOVSX Gv,Eb
bf: MOVSX Gv,Ew
# 0x0f 0xc0-0xcf
c0: XADD Eb,Gb
c1: XADD Ev,Gv
c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1)
c3: movnti My,Gy
c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1)
c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1)
c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66)
c7: Grp9 (1A)
c8: BSWAP RAX/EAX/R8/R8D
c9: BSWAP RCX/ECX/R9/R9D
ca: BSWAP RDX/EDX/R10/R10D
cb: BSWAP RBX/EBX/R11/R11D
cc: BSWAP RSP/ESP/R12/R12D
cd: BSWAP RBP/EBP/R13/R13D
ce: BSWAP RSI/ESI/R14/R14D
cf: BSWAP RDI/EDI/R15/R15D
# 0x0f 0xd0-0xdf
d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2)
d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1)
d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1)
d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1)
d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1)
d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1)
d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1)
dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1)
# 0x0f 0xe0-0xef
e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2)
e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1)
ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1)
# 0x0f 0xf0-0xff
f0: vlddqu Vx,Mx (F2)
f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1)
f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1)
f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1)
f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1)
f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1)
f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1)
f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1)
f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1)
fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1)
fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
ff:
EndTable
Table: 3-byte opcode 1 (0x0f 0x38)
Referrer: 3-byte escape 1
AVXcode: 2
# 0x0f 0x38 0x00-0x0f
00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1)
01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1)
02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1)
03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1)
04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1)
05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1)
06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1)
07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1)
08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1)
09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1)
0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1)
0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1)
0c: vpermilps Vx,Hx,Wx (66),(v)
0d: vpermilpd Vx,Hx,Wx (66),(v)
0e: vtestps Vx,Wx (66),(v)
0f: vtestpd Vx,Wx (66),(v)
# 0x0f 0x38 0x10-0x1f
10: pblendvb Vdq,Wdq (66)
11:
12:
13: vcvtph2ps Vx,Wx,Ib (66),(v)
14: blendvps Vdq,Wdq (66)
15: blendvpd Vdq,Wdq (66)
16: vpermps Vqq,Hqq,Wqq (66),(v)
17: vptest Vx,Wx (66)
18: vbroadcastss Vx,Wd (66),(v)
19: vbroadcastsd Vqq,Wq (66),(v)
1a: vbroadcastf128 Vqq,Mdq (66),(v)
1b:
1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
1f:
# 0x0f 0x38 0x20-0x2f
20: vpmovsxbw Vx,Ux/Mq (66),(v1)
21: vpmovsxbd Vx,Ux/Md (66),(v1)
22: vpmovsxbq Vx,Ux/Mw (66),(v1)
23: vpmovsxwd Vx,Ux/Mq (66),(v1)
24: vpmovsxwq Vx,Ux/Md (66),(v1)
25: vpmovsxdq Vx,Ux/Mq (66),(v1)
26:
27:
28: vpmuldq Vx,Hx,Wx (66),(v1)
29: vpcmpeqq Vx,Hx,Wx (66),(v1)
2a: vmovntdqa Vx,Mx (66),(v1)
2b: vpackusdw Vx,Hx,Wx (66),(v1)
2c: vmaskmovps Vx,Hx,Mx (66),(v)
2d: vmaskmovpd Vx,Hx,Mx (66),(v)
2e: vmaskmovps Mx,Hx,Vx (66),(v)
2f: vmaskmovpd Mx,Hx,Vx (66),(v)
# 0x0f 0x38 0x30-0x3f
30: vpmovzxbw Vx,Ux/Mq (66),(v1)
31: vpmovzxbd Vx,Ux/Md (66),(v1)
32: vpmovzxbq Vx,Ux/Mw (66),(v1)
33: vpmovzxwd Vx,Ux/Mq (66),(v1)
34: vpmovzxwq Vx,Ux/Md (66),(v1)
35: vpmovzxdq Vx,Ux/Mq (66),(v1)
36: vpermd Vqq,Hqq,Wqq (66),(v)
37: vpcmpgtq Vx,Hx,Wx (66),(v1)
38: vpminsb Vx,Hx,Wx (66),(v1)
39: vpminsd Vx,Hx,Wx (66),(v1)
3a: vpminuw Vx,Hx,Wx (66),(v1)
3b: vpminud Vx,Hx,Wx (66),(v1)
3c: vpmaxsb Vx,Hx,Wx (66),(v1)
3d: vpmaxsd Vx,Hx,Wx (66),(v1)
3e: vpmaxuw Vx,Hx,Wx (66),(v1)
3f: vpmaxud Vx,Hx,Wx (66),(v1)
# 0x0f 0x38 0x40-0x8f
40: vpmulld Vx,Hx,Wx (66),(v1)
41: vphminposuw Vdq,Wdq (66),(v1)
42:
43:
44:
45: vpsrlvd/q Vx,Hx,Wx (66),(v)
46: vpsravd Vx,Hx,Wx (66),(v)
47: vpsllvd/q Vx,Hx,Wx (66),(v)
# Skip 0x48-0x57
58: vpbroadcastd Vx,Wx (66),(v)
59: vpbroadcastq Vx,Wx (66),(v)
5a: vbroadcasti128 Vqq,Mdq (66),(v)
# Skip 0x5b-0x77
78: vpbroadcastb Vx,Wx (66),(v)
79: vpbroadcastw Vx,Wx (66),(v)
# Skip 0x7a-0x7f
80: INVEPT Gy,Mdq (66)
81: INVPID Gy,Mdq (66)
82: INVPCID Gy,Mdq (66)
8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
# 0x0f 0x38 0x90-0xbf (FMA)
90: vgatherdd/q Vx,Hx,Wx (66),(v)
91: vgatherqd/q Vx,Hx,Wx (66),(v)
92: vgatherdps/d Vx,Hx,Wx (66),(v)
93: vgatherqps/d Vx,Hx,Wx (66),(v)
94:
95:
96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v)
97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
98: vfmadd132ps/d Vx,Hx,Wx (66),(v)
99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
9a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
ba: vfmsub231ps/d Vx,Hx,Wx (66),(v)
bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v)
bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
# 0x0f 0x38 0xc0-0xff
db: VAESIMC Vdq,Wdq (66),(v1)
dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2)
f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2)
f2: ANDN Gy,By,Ey (v)
f3: Grp17 (1A)
f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
EndTable
Table: 3-byte opcode 2 (0x0f 0x3a)
Referrer: 3-byte escape 2
AVXcode: 3
# 0x0f 0x3a 0x00-0xff
00: vpermq Vqq,Wqq,Ib (66),(v)
01: vpermpd Vqq,Wqq,Ib (66),(v)
02: vpblendd Vx,Hx,Wx,Ib (66),(v)
03:
04: vpermilps Vx,Wx,Ib (66),(v)
05: vpermilpd Vx,Wx,Ib (66),(v)
06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
07:
08: vroundps Vx,Wx,Ib (66)
09: vroundpd Vx,Wx,Ib (66)
0a: vroundss Vss,Wss,Ib (66),(v1)
0b: vroundsd Vsd,Wsd,Ib (66),(v1)
0c: vblendps Vx,Hx,Wx,Ib (66)
0d: vblendpd Vx,Hx,Wx,Ib (66)
0e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1)
14: vpextrb Rd/Mb,Vdq,Ib (66),(v1)
15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
16: vpextrd/q Ey,Vdq,Ib (66),(v1)
17: vextractps Ed,Vdq,Ib (66),(v1)
18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v)
19: vextractf128 Wdq,Vqq,Ib (66),(v)
1d: vcvtps2ph Wx,Vx,Ib (66),(v)
20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v)
39: vextracti128 Wdq,Vqq,Ib (66),(v)
40: vdpps Vx,Hx,Wx,Ib (66)
41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1)
44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
f0: RORX Gy,Ey,Ib (F2),(v)
EndTable
GrpTable: Grp1
0: ADD
1: OR
2: ADC
3: SBB
4: AND
5: SUB
6: XOR
7: CMP
EndTable
GrpTable: Grp1A
0: POP
EndTable
GrpTable: Grp2
0: ROL
1: ROR
2: RCL
3: RCR
4: SHL/SAL
5: SHR
6:
7: SAR
EndTable
GrpTable: Grp3_1
0: TEST Eb,Ib
1:
2: NOT Eb
3: NEG Eb
4: MUL AL,Eb
5: IMUL AL,Eb
6: DIV AL,Eb
7: IDIV AL,Eb
EndTable
GrpTable: Grp3_2
0: TEST Ev,Iz
1:
2: NOT Ev
3: NEG Ev
4: MUL rAX,Ev
5: IMUL rAX,Ev
6: DIV rAX,Ev
7: IDIV rAX,Ev
EndTable
GrpTable: Grp4
0: INC Eb
1: DEC Eb
EndTable
GrpTable: Grp5
0: INC Ev
1: DEC Ev
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
2: CALLN Ev (f64)
3: CALLF Ep
4: JMPN Ev (f64)
5: JMPF Mp
6: PUSH Ev (d64)
7:
EndTable
GrpTable: Grp6
0: SLDT Rv/Mw
1: STR Rv/Mw
2: LLDT Ew
3: LTR Ew
4: VERR Ew
5: VERW Ew
EndTable
GrpTable: Grp7
0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B)
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
3: LIDT Ms
4: SMSW Mw/Rv
5:
6: LMSW Ew
7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
EndTable
GrpTable: Grp8
4: BT
5: BTS
6: BTR
7: BTC
EndTable
GrpTable: Grp9
1: CMPXCHG8B/16B Mq/Mdq
6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
EndTable
GrpTable: Grp10
EndTable
# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
GrpTable: Grp11A
0: MOV Eb,Ib
7: XABORT Ib (000),(11B)
EndTable
GrpTable: Grp11B
0: MOV Eb,Iz
7: XBEGIN Jz (000),(11B)
EndTable
GrpTable: Grp12
2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1)
4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1)
6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1)
EndTable
GrpTable: Grp13
2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1)
6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
EndTable
GrpTable: Grp14
2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1)
3: vpsrldq Hx,Ux,Ib (66),(11B),(v1)
6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1)
7: vpslldq Hx,Ux,Ib (66),(11B),(v1)
EndTable
GrpTable: Grp15
0: fxsave | RDFSBASE Ry (F3),(11B)
1: fxstor | RDGSBASE Ry (F3),(11B)
2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
4: XSAVE
5: XRSTOR | lfence (11B)
6: XSAVEOPT | mfence (11B)
7: clflush | sfence (11B)
EndTable
GrpTable: Grp16
0: prefetch NTA
1: prefetch T0
2: prefetch T1
3: prefetch T2
EndTable
GrpTable: Grp17
1: BLSR By,Ey (v)
2: BLSMSK By,Ey (v)
3: BLSI By,Ey (v)
EndTable
# AMD's Prefetch Group
GrpTable: GrpP
0: PREFETCH
1: PREFETCHW
EndTable
GrpTable: GrpPDLK
0: MONTMUL
1: XSHA1
2: XSHA2
EndTable
GrpTable: GrpRNG
0: xstore-rng
1: xcrypt-ecb
2: xcrypt-cbc
4: xcrypt-cfb
5: xcrypt-ofb
EndTable
/*
* intel_pt.c: Intel Processor Trace support
* Copyright (c) 2013-2015, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
*/
#include <stdio.h>
#include <stdbool.h>
#include <errno.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include "../perf.h"
#include "session.h"
#include "machine.h"
#include "tool.h"
#include "event.h"
#include "evlist.h"
#include "evsel.h"
#include "map.h"
#include "color.h"
#include "util.h"
#include "thread.h"
#include "thread-stack.h"
#include "symbol.h"
#include "callchain.h"
#include "dso.h"
#include "debug.h"
#include "auxtrace.h"
#include "tsc.h"
#include "intel-pt.h"
#include "intel-pt-decoder/intel-pt-log.h"
#include "intel-pt-decoder/intel-pt-decoder.h"
#include "intel-pt-decoder/intel-pt-insn-decoder.h"
#include "intel-pt-decoder/intel-pt-pkt-decoder.h"
#define MAX_TIMESTAMP (~0ULL)
struct intel_pt {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
struct auxtrace_heap heap;
u32 auxtrace_type;
struct perf_session *session;
struct machine *machine;
struct perf_evsel *switch_evsel;
struct thread *unknown_thread;
bool timeless_decoding;
bool sampling_mode;
bool snapshot_mode;
bool per_cpu_mmaps;
bool have_tsc;
bool data_queued;
bool est_tsc;
bool sync_switch;
int have_sched_switch;
u32 pmu_type;
u64 kernel_start;
u64 switch_ip;
u64 ptss_ip;
struct perf_tsc_conversion tc;
bool cap_user_time_zero;
struct itrace_synth_opts synth_opts;
bool sample_instructions;
u64 instructions_sample_type;
u64 instructions_sample_period;
u64 instructions_id;
bool sample_branches;
u32 branches_filter;
u64 branches_sample_type;
u64 branches_id;
bool sample_transactions;
u64 transactions_sample_type;
u64 transactions_id;
bool synth_needs_swap;
u64 tsc_bit;
u64 noretcomp_bit;
unsigned max_non_turbo_ratio;
};
enum switch_state {
INTEL_PT_SS_NOT_TRACING,
INTEL_PT_SS_UNKNOWN,
INTEL_PT_SS_TRACING,
INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
INTEL_PT_SS_EXPECTING_SWITCH_IP,
};
struct intel_pt_queue {
struct intel_pt *pt;
unsigned int queue_nr;
struct auxtrace_buffer *buffer;
void *decoder;
const struct intel_pt_state *state;
struct ip_callchain *chain;
union perf_event *event_buf;
bool on_heap;
bool stop;
bool step_through_buffers;
bool use_buffer_pid_tid;
pid_t pid, tid;
int cpu;
int switch_state;
pid_t next_tid;
struct thread *thread;
bool exclude_kernel;
bool have_sample;
u64 time;
u64 timestamp;
u32 flags;
u16 insn_len;
};
static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
unsigned char *buf, size_t len)
{
struct intel_pt_pkt packet;
size_t pos = 0;
int ret, pkt_len, i;
char desc[INTEL_PT_PKT_DESC_MAX];
const char *color = PERF_COLOR_BLUE;
color_fprintf(stdout, color,
". ... Intel Processor Trace data: size %zu bytes\n",
len);
while (len) {
ret = intel_pt_get_packet(buf, len, &packet);
if (ret > 0)
pkt_len = ret;
else
pkt_len = 1;
printf(".");
color_fprintf(stdout, color, " %08x: ", pos);
for (i = 0; i < pkt_len; i++)
color_fprintf(stdout, color, " %02x", buf[i]);
for (; i < 16; i++)
color_fprintf(stdout, color, " ");
if (ret > 0) {
ret = intel_pt_pkt_desc(&packet, desc,
INTEL_PT_PKT_DESC_MAX);
if (ret > 0)
color_fprintf(stdout, color, " %s\n", desc);
} else {
color_fprintf(stdout, color, " Bad packet!\n");
}
pos += pkt_len;
buf += pkt_len;
len -= pkt_len;
}
}
static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
size_t len)
{
printf(".\n");
intel_pt_dump(pt, buf, len);
}
static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
struct auxtrace_buffer *b)
{
void *start;
start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
pt->have_tsc);
if (!start)
return -EINVAL;
b->use_size = b->data + b->size - start;
b->use_data = start;
return 0;
}
static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
struct auxtrace_queue *queue,
struct auxtrace_buffer *buffer)
{
if (queue->cpu == -1 && buffer->cpu != -1)
ptq->cpu = buffer->cpu;
ptq->pid = buffer->pid;
ptq->tid = buffer->tid;
intel_pt_log("queue %u cpu %d pid %d tid %d\n",
ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
thread__zput(ptq->thread);
if (ptq->tid != -1) {
if (ptq->pid != -1)
ptq->thread = machine__findnew_thread(ptq->pt->machine,
ptq->pid,
ptq->tid);
else
ptq->thread = machine__find_thread(ptq->pt->machine, -1,
ptq->tid);
}
}
/* This function assumes data is processed sequentially only */
static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
{
struct intel_pt_queue *ptq = data;
struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
struct auxtrace_queue *queue;
if (ptq->stop) {
b->len = 0;
return 0;
}
queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
buffer = auxtrace_buffer__next(queue, buffer);
if (!buffer) {
if (old_buffer)
auxtrace_buffer__drop_data(old_buffer);
b->len = 0;
return 0;
}
ptq->buffer = buffer;
if (!buffer->data) {
int fd = perf_data_file__fd(ptq->pt->session->file);
buffer->data = auxtrace_buffer__get_data(buffer, fd);
if (!buffer->data)
return -ENOMEM;
}
if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
return -ENOMEM;
if (old_buffer)
auxtrace_buffer__drop_data(old_buffer);
if (buffer->use_data) {
b->len = buffer->use_size;
b->buf = buffer->use_data;
} else {
b->len = buffer->size;
b->buf = buffer->data;
}
b->ref_timestamp = buffer->reference;
if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
!buffer->consecutive)) {
b->consecutive = false;
b->trace_nr = buffer->buffer_nr + 1;
} else {
b->consecutive = true;
}
if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
ptq->tid != buffer->tid))
intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
if (ptq->step_through_buffers)
ptq->stop = true;
if (!b->len)
return intel_pt_get_trace(b, data);
return 0;
}
struct intel_pt_cache_entry {
struct auxtrace_cache_entry entry;
u64 insn_cnt;
u64 byte_cnt;
enum intel_pt_insn_op op;
enum intel_pt_insn_branch branch;
int length;
int32_t rel;
};
static int intel_pt_config_div(const char *var, const char *value, void *data)
{
int *d = data;
long val;
if (!strcmp(var, "intel-pt.cache-divisor")) {
val = strtol(value, NULL, 0);
if (val > 0 && val <= INT_MAX)
*d = val;
}
return 0;
}
static int intel_pt_cache_divisor(void)
{
static int d;
if (d)
return d;
perf_config(intel_pt_config_div, &d);
if (!d)
d = 64;
return d;
}
static unsigned int intel_pt_cache_size(struct dso *dso,
struct machine *machine)
{
off_t size;
size = dso__data_size(dso, machine);
size /= intel_pt_cache_divisor();
if (size < 1000)
return 10;
if (size > (1 << 21))
return 21;
return 32 - __builtin_clz(size);
}
static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
struct machine *machine)
{
struct auxtrace_cache *c;
unsigned int bits;
if (dso->auxtrace_cache)
return dso->auxtrace_cache;
bits = intel_pt_cache_size(dso, machine);
/* Ignoring cache creation failure */
c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
dso->auxtrace_cache = c;
return c;
}
static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
u64 offset, u64 insn_cnt, u64 byte_cnt,
struct intel_pt_insn *intel_pt_insn)
{
struct auxtrace_cache *c = intel_pt_cache(dso, machine);
struct intel_pt_cache_entry *e;
int err;
if (!c)
return -ENOMEM;
e = auxtrace_cache__alloc_entry(c);
if (!e)
return -ENOMEM;
e->insn_cnt = insn_cnt;
e->byte_cnt = byte_cnt;
e->op = intel_pt_insn->op;
e->branch = intel_pt_insn->branch;
e->length = intel_pt_insn->length;
e->rel = intel_pt_insn->rel;
err = auxtrace_cache__add(c, offset, &e->entry);
if (err)
auxtrace_cache__free_entry(c, e);
return err;
}
static struct intel_pt_cache_entry *
intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
{
struct auxtrace_cache *c = intel_pt_cache(dso, machine);
if (!c)
return NULL;
return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
}
static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
uint64_t *insn_cnt_ptr, uint64_t *ip,
uint64_t to_ip, uint64_t max_insn_cnt,
void *data)
{
struct intel_pt_queue *ptq = data;
struct machine *machine = ptq->pt->machine;
struct thread *thread;
struct addr_location al;
unsigned char buf[1024];
size_t bufsz;
ssize_t len;
int x86_64;
u8 cpumode;
u64 offset, start_offset, start_ip;
u64 insn_cnt = 0;
bool one_map = true;
if (to_ip && *ip == to_ip)
goto out_no_cache;
bufsz = intel_pt_insn_max_size();
if (*ip >= ptq->pt->kernel_start)
cpumode = PERF_RECORD_MISC_KERNEL;
else
cpumode = PERF_RECORD_MISC_USER;
thread = ptq->thread;
if (!thread) {
if (cpumode != PERF_RECORD_MISC_KERNEL)
return -EINVAL;
thread = ptq->pt->unknown_thread;
}
while (1) {
thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
if (!al.map || !al.map->dso)
return -EINVAL;
if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
dso__data_status_seen(al.map->dso,
DSO_DATA_STATUS_SEEN_ITRACE))
return -ENOENT;
offset = al.map->map_ip(al.map, *ip);
if (!to_ip && one_map) {
struct intel_pt_cache_entry *e;
e = intel_pt_cache_lookup(al.map->dso, machine, offset);
if (e &&
(!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
*insn_cnt_ptr = e->insn_cnt;
*ip += e->byte_cnt;
intel_pt_insn->op = e->op;
intel_pt_insn->branch = e->branch;
intel_pt_insn->length = e->length;
intel_pt_insn->rel = e->rel;
intel_pt_log_insn_no_data(intel_pt_insn, *ip);
return 0;
}
}
start_offset = offset;
start_ip = *ip;
/* Load maps to ensure dso->is_64_bit has been updated */
map__load(al.map, machine->symbol_filter);
x86_64 = al.map->dso->is_64_bit;
while (1) {
len = dso__data_read_offset(al.map->dso, machine,
offset, buf, bufsz);
if (len <= 0)
return -EINVAL;
if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
return -EINVAL;
intel_pt_log_insn(intel_pt_insn, *ip);
insn_cnt += 1;
if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
goto out;
if (max_insn_cnt && insn_cnt >= max_insn_cnt)
goto out_no_cache;
*ip += intel_pt_insn->length;
if (to_ip && *ip == to_ip)
goto out_no_cache;
if (*ip >= al.map->end)
break;
offset += intel_pt_insn->length;
}
one_map = false;
}
out:
*insn_cnt_ptr = insn_cnt;
if (!one_map)
goto out_no_cache;
/*
* Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
* entries.
*/
if (to_ip) {
struct intel_pt_cache_entry *e;
e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
if (e)
return 0;
}
/* Ignore cache errors */
intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
*ip - start_ip, intel_pt_insn);
return 0;
out_no_cache:
*insn_cnt_ptr = insn_cnt;
return 0;
}
static bool intel_pt_get_config(struct intel_pt *pt,
struct perf_event_attr *attr, u64 *config)
{
if (attr->type == pt->pmu_type) {
if (config)
*config = attr->config;
return true;
}
return false;
}
static bool intel_pt_exclude_kernel(struct intel_pt *pt)
{
struct perf_evsel *evsel;
evlist__for_each(pt->session->evlist, evsel) {
if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
!evsel->attr.exclude_kernel)
return false;
}
return true;
}
static bool intel_pt_return_compression(struct intel_pt *pt)
{
struct perf_evsel *evsel;
u64 config;
if (!pt->noretcomp_bit)
return true;
evlist__for_each(pt->session->evlist, evsel) {
if (intel_pt_get_config(pt, &evsel->attr, &config) &&
(config & pt->noretcomp_bit))
return false;
}
return true;
}
static bool intel_pt_timeless_decoding(struct intel_pt *pt)
{
struct perf_evsel *evsel;
bool timeless_decoding = true;
u64 config;
if (!pt->tsc_bit || !pt->cap_user_time_zero)
return true;
evlist__for_each(pt->session->evlist, evsel) {
if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
return true;
if (intel_pt_get_config(pt, &evsel->attr, &config)) {
if (config & pt->tsc_bit)
timeless_decoding = false;
else
return true;
}
}
return timeless_decoding;
}
static bool intel_pt_tracing_kernel(struct intel_pt *pt)
{
struct perf_evsel *evsel;
evlist__for_each(pt->session->evlist, evsel) {
if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
!evsel->attr.exclude_kernel)
return true;
}
return false;
}
static bool intel_pt_have_tsc(struct intel_pt *pt)
{
struct perf_evsel *evsel;
bool have_tsc = false;
u64 config;
if (!pt->tsc_bit)
return false;
evlist__for_each(pt->session->evlist, evsel) {
if (intel_pt_get_config(pt, &evsel->attr, &config)) {
if (config & pt->tsc_bit)
have_tsc = true;
else
return false;
}
}
return have_tsc;
}
static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
{
u64 quot, rem;
quot = ns / pt->tc.time_mult;
rem = ns % pt->tc.time_mult;
return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
pt->tc.time_mult;
}
static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
unsigned int queue_nr)
{
struct intel_pt_params params = { .get_trace = 0, };
struct intel_pt_queue *ptq;
ptq = zalloc(sizeof(struct intel_pt_queue));
if (!ptq)
return NULL;
if (pt->synth_opts.callchain) {
size_t sz = sizeof(struct ip_callchain);
sz += pt->synth_opts.callchain_sz * sizeof(u64);
ptq->chain = zalloc(sz);
if (!ptq->chain)
goto out_free;
}
ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
if (!ptq->event_buf)
goto out_free;
ptq->pt = pt;
ptq->queue_nr = queue_nr;
ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
ptq->pid = -1;
ptq->tid = -1;
ptq->cpu = -1;
ptq->next_tid = -1;
params.get_trace = intel_pt_get_trace;
params.walk_insn = intel_pt_walk_next_insn;
params.data = ptq;
params.return_compression = intel_pt_return_compression(pt);
params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
if (pt->synth_opts.instructions) {
if (pt->synth_opts.period) {
switch (pt->synth_opts.period_type) {
case PERF_ITRACE_PERIOD_INSTRUCTIONS:
params.period_type =
INTEL_PT_PERIOD_INSTRUCTIONS;
params.period = pt->synth_opts.period;
break;
case PERF_ITRACE_PERIOD_TICKS:
params.period_type = INTEL_PT_PERIOD_TICKS;
params.period = pt->synth_opts.period;
break;
case PERF_ITRACE_PERIOD_NANOSECS:
params.period_type = INTEL_PT_PERIOD_TICKS;
params.period = intel_pt_ns_to_ticks(pt,
pt->synth_opts.period);
break;
default:
break;
}
}
if (!params.period) {
params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
params.period = 1000;
}
}
ptq->decoder = intel_pt_decoder_new(&params);
if (!ptq->decoder)
goto out_free;
return ptq;
out_free:
zfree(&ptq->event_buf);
zfree(&ptq->chain);
free(ptq);
return NULL;
}
static void intel_pt_free_queue(void *priv)
{
struct intel_pt_queue *ptq = priv;
if (!ptq)
return;
thread__zput(ptq->thread);
intel_pt_decoder_free(ptq->decoder);
zfree(&ptq->event_buf);
zfree(&ptq->chain);
free(ptq);
}
static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
struct auxtrace_queue *queue)
{
struct intel_pt_queue *ptq = queue->priv;
if (queue->tid == -1 || pt->have_sched_switch) {
ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
thread__zput(ptq->thread);
}
if (!ptq->thread && ptq->tid != -1)
ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
if (ptq->thread) {
ptq->pid = ptq->thread->pid_;
if (queue->cpu == -1)
ptq->cpu = ptq->thread->cpu;
}
}
static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
{
if (ptq->state->flags & INTEL_PT_ABORT_TX) {
ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
} else if (ptq->state->flags & INTEL_PT_ASYNC) {
if (ptq->state->to_ip)
ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
PERF_IP_FLAG_ASYNC |
PERF_IP_FLAG_INTERRUPT;
else
ptq->flags = PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_TRACE_END;
ptq->insn_len = 0;
} else {
if (ptq->state->from_ip)
ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
else
ptq->flags = PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_TRACE_BEGIN;
if (ptq->state->flags & INTEL_PT_IN_TX)
ptq->flags |= PERF_IP_FLAG_IN_TX;
ptq->insn_len = ptq->state->insn_len;
}
}
static int intel_pt_setup_queue(struct intel_pt *pt,
struct auxtrace_queue *queue,
unsigned int queue_nr)
{
struct intel_pt_queue *ptq = queue->priv;
if (list_empty(&queue->head))
return 0;
if (!ptq) {
ptq = intel_pt_alloc_queue(pt, queue_nr);
if (!ptq)
return -ENOMEM;
queue->priv = ptq;
if (queue->cpu != -1)
ptq->cpu = queue->cpu;
ptq->tid = queue->tid;
if (pt->sampling_mode) {
if (pt->timeless_decoding)
ptq->step_through_buffers = true;
if (pt->timeless_decoding || !pt->have_sched_switch)
ptq->use_buffer_pid_tid = true;
}
}
if (!ptq->on_heap &&
(!pt->sync_switch ||
ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
const struct intel_pt_state *state;
int ret;
if (pt->timeless_decoding)
return 0;
intel_pt_log("queue %u getting timestamp\n", queue_nr);
intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
queue_nr, ptq->cpu, ptq->pid, ptq->tid);
while (1) {
state = intel_pt_decode(ptq->decoder);
if (state->err) {
if (state->err == INTEL_PT_ERR_NODATA) {
intel_pt_log("queue %u has no timestamp\n",
queue_nr);
return 0;
}
continue;
}
if (state->timestamp)
break;
}
ptq->timestamp = state->timestamp;
intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
queue_nr, ptq->timestamp);
ptq->state = state;
ptq->have_sample = true;
intel_pt_sample_flags(ptq);
ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
if (ret)
return ret;
ptq->on_heap = true;
}
return 0;
}
static int intel_pt_setup_queues(struct intel_pt *pt)
{
unsigned int i;
int ret;
for (i = 0; i < pt->queues.nr_queues; i++) {
ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
if (ret)
return ret;
}
return 0;
}
static int intel_pt_inject_event(union perf_event *event,
struct perf_sample *sample, u64 type,
bool swapped)
{
event->header.size = perf_event__sample_event_size(sample, type, 0);
return perf_event__synthesize_sample(event, type, 0, sample, swapped);
}
static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
{
int ret;
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
struct perf_sample sample = { .ip = 0, };
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = PERF_RECORD_MISC_USER;
event->sample.header.size = sizeof(struct perf_event_header);
if (!pt->timeless_decoding)
sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
sample.ip = ptq->state->from_ip;
sample.pid = ptq->pid;
sample.tid = ptq->tid;
sample.addr = ptq->state->to_ip;
sample.id = ptq->pt->branches_id;
sample.stream_id = ptq->pt->branches_id;
sample.period = 1;
sample.cpu = ptq->cpu;
sample.flags = ptq->flags;
sample.insn_len = ptq->insn_len;
if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
return 0;
if (pt->synth_opts.inject) {
ret = intel_pt_inject_event(event, &sample,
pt->branches_sample_type,
pt->synth_needs_swap);
if (ret)
return ret;
}
ret = perf_session__deliver_synth_event(pt->session, event, &sample);
if (ret)
pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
ret);
return ret;
}
static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
{
int ret;
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
struct perf_sample sample = { .ip = 0, };
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = PERF_RECORD_MISC_USER;
event->sample.header.size = sizeof(struct perf_event_header);
if (!pt->timeless_decoding)
sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
sample.ip = ptq->state->from_ip;
sample.pid = ptq->pid;
sample.tid = ptq->tid;
sample.addr = ptq->state->to_ip;
sample.id = ptq->pt->instructions_id;
sample.stream_id = ptq->pt->instructions_id;
sample.period = ptq->pt->instructions_sample_period;
sample.cpu = ptq->cpu;
sample.flags = ptq->flags;
sample.insn_len = ptq->insn_len;
if (pt->synth_opts.callchain) {
thread_stack__sample(ptq->thread, ptq->chain,
pt->synth_opts.callchain_sz, sample.ip);
sample.callchain = ptq->chain;
}
if (pt->synth_opts.inject) {
ret = intel_pt_inject_event(event, &sample,
pt->instructions_sample_type,
pt->synth_needs_swap);
if (ret)
return ret;
}
ret = perf_session__deliver_synth_event(pt->session, event, &sample);
if (ret)
pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
ret);
return ret;
}
static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
{
int ret;
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
struct perf_sample sample = { .ip = 0, };
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = PERF_RECORD_MISC_USER;
event->sample.header.size = sizeof(struct perf_event_header);
if (!pt->timeless_decoding)
sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
sample.ip = ptq->state->from_ip;
sample.pid = ptq->pid;
sample.tid = ptq->tid;
sample.addr = ptq->state->to_ip;
sample.id = ptq->pt->transactions_id;
sample.stream_id = ptq->pt->transactions_id;
sample.period = 1;
sample.cpu = ptq->cpu;
sample.flags = ptq->flags;
sample.insn_len = ptq->insn_len;
if (pt->synth_opts.callchain) {
thread_stack__sample(ptq->thread, ptq->chain,
pt->synth_opts.callchain_sz, sample.ip);
sample.callchain = ptq->chain;
}
if (pt->synth_opts.inject) {
ret = intel_pt_inject_event(event, &sample,
pt->transactions_sample_type,
pt->synth_needs_swap);
if (ret)
return ret;
}
ret = perf_session__deliver_synth_event(pt->session, event, &sample);
if (ret)
pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
ret);
return ret;
}
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
pid_t pid, pid_t tid, u64 ip)
{
union perf_event event;
char msg[MAX_AUXTRACE_ERROR_MSG];
int err;
intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
code, cpu, pid, tid, ip, msg);
err = perf_session__deliver_synth_event(pt->session, &event, NULL);
if (err)
pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
err);
return err;
}
static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
{
struct auxtrace_queue *queue;
pid_t tid = ptq->next_tid;
int err;
if (tid == -1)
return 0;
intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
queue = &pt->queues.queue_array[ptq->queue_nr];
intel_pt_set_pid_tid_cpu(pt, queue);
ptq->next_tid = -1;
return err;
}
static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
{
struct intel_pt *pt = ptq->pt;
return ip == pt->switch_ip &&
(ptq->flags & PERF_IP_FLAG_BRANCH) &&
!(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
}
static int intel_pt_sample(struct intel_pt_queue *ptq)
{
const struct intel_pt_state *state = ptq->state;
struct intel_pt *pt = ptq->pt;
int err;
if (!ptq->have_sample)
return 0;
ptq->have_sample = false;
if (pt->sample_instructions &&
(state->type & INTEL_PT_INSTRUCTION)) {
err = intel_pt_synth_instruction_sample(ptq);
if (err)
return err;
}
if (pt->sample_transactions &&
(state->type & INTEL_PT_TRANSACTION)) {
err = intel_pt_synth_transaction_sample(ptq);
if (err)
return err;
}
if (!(state->type & INTEL_PT_BRANCH))
return 0;
if (pt->synth_opts.callchain)
thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
state->to_ip, ptq->insn_len,
state->trace_nr);
else
thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
if (pt->sample_branches) {
err = intel_pt_synth_branch_sample(ptq);
if (err)
return err;
}
if (!pt->sync_switch)
return 0;
if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
switch (ptq->switch_state) {
case INTEL_PT_SS_UNKNOWN:
case INTEL_PT_SS_EXPECTING_SWITCH_IP:
err = intel_pt_next_tid(pt, ptq);
if (err)
return err;
ptq->switch_state = INTEL_PT_SS_TRACING;
break;
default:
ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
return 1;
}
} else if (!state->to_ip) {
ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
} else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
ptq->switch_state = INTEL_PT_SS_UNKNOWN;
} else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
state->to_ip == pt->ptss_ip &&
(ptq->flags & PERF_IP_FLAG_CALL)) {
ptq->switch_state = INTEL_PT_SS_TRACING;
}
return 0;
}
static u64 intel_pt_switch_ip(struct machine *machine, u64 *ptss_ip)
{
struct map *map;
struct symbol *sym, *start;
u64 ip, switch_ip = 0;
if (ptss_ip)
*ptss_ip = 0;
map = machine__kernel_map(machine, MAP__FUNCTION);
if (!map)
return 0;
if (map__load(map, machine->symbol_filter))
return 0;
start = dso__first_symbol(map->dso, MAP__FUNCTION);
for (sym = start; sym; sym = dso__next_symbol(sym)) {
if (sym->binding == STB_GLOBAL &&
!strcmp(sym->name, "__switch_to")) {
ip = map->unmap_ip(map, sym->start);
if (ip >= map->start && ip < map->end) {
switch_ip = ip;
break;
}
}
}
if (!switch_ip || !ptss_ip)
return 0;
for (sym = start; sym; sym = dso__next_symbol(sym)) {
if (!strcmp(sym->name, "perf_trace_sched_switch")) {
ip = map->unmap_ip(map, sym->start);
if (ip >= map->start && ip < map->end) {
*ptss_ip = ip;
break;
}
}
}
return switch_ip;
}
static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
{
const struct intel_pt_state *state = ptq->state;
struct intel_pt *pt = ptq->pt;
int err;
if (!pt->kernel_start) {
pt->kernel_start = machine__kernel_start(pt->machine);
if (pt->per_cpu_mmaps && pt->have_sched_switch &&
!pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
!pt->sampling_mode) {
pt->switch_ip = intel_pt_switch_ip(pt->machine,
&pt->ptss_ip);
if (pt->switch_ip) {
intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
pt->switch_ip, pt->ptss_ip);
pt->sync_switch = true;
}
}
}
intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
while (1) {
err = intel_pt_sample(ptq);
if (err)
return err;
state = intel_pt_decode(ptq->decoder);
if (state->err) {
if (state->err == INTEL_PT_ERR_NODATA)
return 1;
if (pt->sync_switch &&
state->from_ip >= pt->kernel_start) {
pt->sync_switch = false;
intel_pt_next_tid(pt, ptq);
}
if (pt->synth_opts.errors) {
err = intel_pt_synth_error(pt, state->err,
ptq->cpu, ptq->pid,
ptq->tid,
state->from_ip);
if (err)
return err;
}
continue;
}
ptq->state = state;
ptq->have_sample = true;
intel_pt_sample_flags(ptq);
/* Use estimated TSC upon return to user space */
if (pt->est_tsc &&
(state->from_ip >= pt->kernel_start || !state->from_ip) &&
state->to_ip && state->to_ip < pt->kernel_start) {
intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
state->timestamp, state->est_timestamp);
ptq->timestamp = state->est_timestamp;
/* Use estimated TSC in unknown switch state */
} else if (pt->sync_switch &&
ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
intel_pt_is_switch_ip(ptq, state->to_ip) &&
ptq->next_tid == -1) {
intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
state->timestamp, state->est_timestamp);
ptq->timestamp = state->est_timestamp;
} else if (state->timestamp > ptq->timestamp) {
ptq->timestamp = state->timestamp;
}
if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
*timestamp = ptq->timestamp;
return 0;
}
}
return 0;
}
static inline int intel_pt_update_queues(struct intel_pt *pt)
{
if (pt->queues.new_data) {
pt->queues.new_data = false;
return intel_pt_setup_queues(pt);
}
return 0;
}
static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
{
unsigned int queue_nr;
u64 ts;
int ret;
while (1) {
struct auxtrace_queue *queue;
struct intel_pt_queue *ptq;
if (!pt->heap.heap_cnt)
return 0;
if (pt->heap.heap_array[0].ordinal >= timestamp)
return 0;
queue_nr = pt->heap.heap_array[0].queue_nr;
queue = &pt->queues.queue_array[queue_nr];
ptq = queue->priv;
intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
queue_nr, pt->heap.heap_array[0].ordinal,
timestamp);
auxtrace_heap__pop(&pt->heap);
if (pt->heap.heap_cnt) {
ts = pt->heap.heap_array[0].ordinal + 1;
if (ts > timestamp)
ts = timestamp;
} else {
ts = timestamp;
}
intel_pt_set_pid_tid_cpu(pt, queue);
ret = intel_pt_run_decoder(ptq, &ts);
if (ret < 0) {
auxtrace_heap__add(&pt->heap, queue_nr, ts);
return ret;
}
if (!ret) {
ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
if (ret < 0)
return ret;
} else {
ptq->on_heap = false;
}
}
return 0;
}
static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
u64 time_)
{
struct auxtrace_queues *queues = &pt->queues;
unsigned int i;
u64 ts = 0;
for (i = 0; i < queues->nr_queues; i++) {
struct auxtrace_queue *queue = &pt->queues.queue_array[i];
struct intel_pt_queue *ptq = queue->priv;
if (ptq && (tid == -1 || ptq->tid == tid)) {
ptq->time = time_;
intel_pt_set_pid_tid_cpu(pt, queue);
intel_pt_run_decoder(ptq, &ts);
}
}
return 0;
}
static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
{
return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
sample->pid, sample->tid, 0);
}
static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
{
unsigned i, j;
if (cpu < 0 || !pt->queues.nr_queues)
return NULL;
if ((unsigned)cpu >= pt->queues.nr_queues)
i = pt->queues.nr_queues - 1;
else
i = cpu;
if (pt->queues.queue_array[i].cpu == cpu)
return pt->queues.queue_array[i].priv;
for (j = 0; i > 0; j++) {
if (pt->queues.queue_array[--i].cpu == cpu)
return pt->queues.queue_array[i].priv;
}
for (; j < pt->queues.nr_queues; j++) {
if (pt->queues.queue_array[j].cpu == cpu)
return pt->queues.queue_array[j].priv;
}
return NULL;
}
static int intel_pt_process_switch(struct intel_pt *pt,
struct perf_sample *sample)
{
struct intel_pt_queue *ptq;
struct perf_evsel *evsel;
pid_t tid;
int cpu, err;
evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
if (evsel != pt->switch_evsel)
return 0;
tid = perf_evsel__intval(evsel, sample, "next_pid");
cpu = sample->cpu;
intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
cpu, tid, sample->time, perf_time_to_tsc(sample->time,
&pt->tc));
if (!pt->sync_switch)
goto out;
ptq = intel_pt_cpu_to_ptq(pt, cpu);
if (!ptq)
goto out;
switch (ptq->switch_state) {
case INTEL_PT_SS_NOT_TRACING:
ptq->next_tid = -1;
break;
case INTEL_PT_SS_UNKNOWN:
case INTEL_PT_SS_TRACING:
ptq->next_tid = tid;
ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
return 0;
case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
if (!ptq->on_heap) {
ptq->timestamp = perf_time_to_tsc(sample->time,
&pt->tc);
err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
ptq->timestamp);
if (err)
return err;
ptq->on_heap = true;
}
ptq->switch_state = INTEL_PT_SS_TRACING;
break;
case INTEL_PT_SS_EXPECTING_SWITCH_IP:
ptq->next_tid = tid;
intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
break;
default:
break;
}
out:
return machine__set_current_tid(pt->machine, cpu, -1, tid);
}
static int intel_pt_process_itrace_start(struct intel_pt *pt,
union perf_event *event,
struct perf_sample *sample)
{
if (!pt->per_cpu_mmaps)
return 0;
intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
sample->cpu, event->itrace_start.pid,
event->itrace_start.tid, sample->time,
perf_time_to_tsc(sample->time, &pt->tc));
return machine__set_current_tid(pt->machine, sample->cpu,
event->itrace_start.pid,
event->itrace_start.tid);
}
static int intel_pt_process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
struct perf_tool *tool)
{
struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
auxtrace);
u64 timestamp;
int err = 0;
if (dump_trace)
return 0;
if (!tool->ordered_events) {
pr_err("Intel Processor Trace requires ordered events\n");
return -EINVAL;
}
if (sample->time)
timestamp = perf_time_to_tsc(sample->time, &pt->tc);
else
timestamp = 0;
if (timestamp || pt->timeless_decoding) {
err = intel_pt_update_queues(pt);
if (err)
return err;
}
if (pt->timeless_decoding) {
if (event->header.type == PERF_RECORD_EXIT) {
err = intel_pt_process_timeless_queues(pt,
event->comm.tid,
sample->time);
}
} else if (timestamp) {
err = intel_pt_process_queues(pt, timestamp);
}
if (err)
return err;
if (event->header.type == PERF_RECORD_AUX &&
(event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
pt->synth_opts.errors) {
err = intel_pt_lost(pt, sample);
if (err)
return err;
}
if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
err = intel_pt_process_switch(pt, sample);
else if (event->header.type == PERF_RECORD_ITRACE_START)
err = intel_pt_process_itrace_start(pt, event, sample);
intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
perf_event__name(event->header.type), event->header.type,
sample->cpu, sample->time, timestamp);
return err;
}
static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
{
struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
auxtrace);
int ret;
if (dump_trace)
return 0;
if (!tool->ordered_events)
return -EINVAL;
ret = intel_pt_update_queues(pt);
if (ret < 0)
return ret;
if (pt->timeless_decoding)
return intel_pt_process_timeless_queues(pt, -1,
MAX_TIMESTAMP - 1);
return intel_pt_process_queues(pt, MAX_TIMESTAMP);
}
static void intel_pt_free_events(struct perf_session *session)
{
struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
auxtrace);
struct auxtrace_queues *queues = &pt->queues;
unsigned int i;
for (i = 0; i < queues->nr_queues; i++) {
intel_pt_free_queue(queues->queue_array[i].priv);
queues->queue_array[i].priv = NULL;
}
intel_pt_log_disable();
auxtrace_queues__free(queues);
}
static void intel_pt_free(struct perf_session *session)
{
struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
auxtrace);
auxtrace_heap__free(&pt->heap);
intel_pt_free_events(session);
session->auxtrace = NULL;
thread__delete(pt->unknown_thread);
free(pt);
}
static int intel_pt_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
struct perf_tool *tool __maybe_unused)
{
struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
auxtrace);
if (pt->sampling_mode)
return 0;
if (!pt->data_queued) {
struct auxtrace_buffer *buffer;
off_t data_offset;
int fd = perf_data_file__fd(session->file);
int err;
if (perf_data_file__is_pipe(session->file)) {
data_offset = 0;
} else {
data_offset = lseek(fd, 0, SEEK_CUR);
if (data_offset == -1)
return -errno;
}
err = auxtrace_queues__add_event(&pt->queues, session, event,
data_offset, &buffer);
if (err)
return err;
/* Dump here now we have copied a piped trace out of the pipe */
if (dump_trace) {
if (auxtrace_buffer__get_data(buffer, fd)) {
intel_pt_dump_event(pt, buffer->data,
buffer->size);
auxtrace_buffer__put_data(buffer);
}
}
}
return 0;
}
struct intel_pt_synth {
struct perf_tool dummy_tool;
struct perf_session *session;
};
static int intel_pt_event_synth(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
{
struct intel_pt_synth *intel_pt_synth =
container_of(tool, struct intel_pt_synth, dummy_tool);
return perf_session__deliver_synth_event(intel_pt_synth->session, event,
NULL);
}
static int intel_pt_synth_event(struct perf_session *session,
struct perf_event_attr *attr, u64 id)
{
struct intel_pt_synth intel_pt_synth;
memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
intel_pt_synth.session = session;
return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
&id, intel_pt_event_synth);
}
static int intel_pt_synth_events(struct intel_pt *pt,
struct perf_session *session)
{
struct perf_evlist *evlist = session->evlist;
struct perf_evsel *evsel;
struct perf_event_attr attr;
bool found = false;
u64 id;
int err;
evlist__for_each(evlist, evsel) {
if (evsel->attr.type == pt->pmu_type && evsel->ids) {
found = true;
break;
}
}
if (!found) {
pr_debug("There are no selected events with Intel Processor Trace data\n");
return 0;
}
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.size = sizeof(struct perf_event_attr);
attr.type = PERF_TYPE_HARDWARE;
attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
PERF_SAMPLE_PERIOD;
if (pt->timeless_decoding)
attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
else
attr.sample_type |= PERF_SAMPLE_TIME;
if (!pt->per_cpu_mmaps)
attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
attr.exclude_user = evsel->attr.exclude_user;
attr.exclude_kernel = evsel->attr.exclude_kernel;
attr.exclude_hv = evsel->attr.exclude_hv;
attr.exclude_host = evsel->attr.exclude_host;
attr.exclude_guest = evsel->attr.exclude_guest;
attr.sample_id_all = evsel->attr.sample_id_all;
attr.read_format = evsel->attr.read_format;
id = evsel->id[0] + 1000000000;
if (!id)
id = 1;
if (pt->synth_opts.instructions) {
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
attr.sample_period =
intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
else
attr.sample_period = pt->synth_opts.period;
pt->instructions_sample_period = attr.sample_period;
if (pt->synth_opts.callchain)
attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
id, (u64)attr.sample_type);
err = intel_pt_synth_event(session, &attr, id);
if (err) {
pr_err("%s: failed to synthesize 'instructions' event type\n",
__func__);
return err;
}
pt->sample_instructions = true;
pt->instructions_sample_type = attr.sample_type;
pt->instructions_id = id;
id += 1;
}
if (pt->synth_opts.transactions) {
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
attr.sample_period = 1;
if (pt->synth_opts.callchain)
attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
id, (u64)attr.sample_type);
err = intel_pt_synth_event(session, &attr, id);
if (err) {
pr_err("%s: failed to synthesize 'transactions' event type\n",
__func__);
return err;
}
pt->sample_transactions = true;
pt->transactions_id = id;
id += 1;
evlist__for_each(evlist, evsel) {
if (evsel->id && evsel->id[0] == pt->transactions_id) {
if (evsel->name)
zfree(&evsel->name);
evsel->name = strdup("transactions");
break;
}
}
}
if (pt->synth_opts.branches) {
attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
attr.sample_period = 1;
attr.sample_type |= PERF_SAMPLE_ADDR;
attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
id, (u64)attr.sample_type);
err = intel_pt_synth_event(session, &attr, id);
if (err) {
pr_err("%s: failed to synthesize 'branches' event type\n",
__func__);
return err;
}
pt->sample_branches = true;
pt->branches_sample_type = attr.sample_type;
pt->branches_id = id;
}
pt->synth_needs_swap = evsel->needs_swap;
return 0;
}
static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
{
struct perf_evsel *evsel;
evlist__for_each_reverse(evlist, evsel) {
const char *name = perf_evsel__name(evsel);
if (!strcmp(name, "sched:sched_switch"))
return evsel;
}
return NULL;
}
static const char * const intel_pt_info_fmts[] = {
[INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
[INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
[INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
[INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n",
[INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
[INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n",
[INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n",
[INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n",
[INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
[INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n",
};
static void intel_pt_print_info(u64 *arr, int start, int finish)
{
int i;
if (!dump_trace)
return;
for (i = start; i <= finish; i++)
fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
}
int intel_pt_process_auxtrace_info(union perf_event *event,
struct perf_session *session)
{
struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
struct intel_pt *pt;
int err;
if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
min_sz)
return -EINVAL;
pt = zalloc(sizeof(struct intel_pt));
if (!pt)
return -ENOMEM;
err = auxtrace_queues__init(&pt->queues);
if (err)
goto err_free;
intel_pt_log_set_name(INTEL_PT_PMU_NAME);
pt->session = session;
pt->machine = &session->machines.host; /* No kvm support */
pt->auxtrace_type = auxtrace_info->type;
pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
INTEL_PT_PER_CPU_MMAPS);
pt->timeless_decoding = intel_pt_timeless_decoding(pt);
pt->have_tsc = intel_pt_have_tsc(pt);
pt->sampling_mode = false;
pt->est_tsc = !pt->timeless_decoding;
pt->unknown_thread = thread__new(999999999, 999999999);
if (!pt->unknown_thread) {
err = -ENOMEM;
goto err_free_queues;
}
err = thread__set_comm(pt->unknown_thread, "unknown", 0);
if (err)
goto err_delete_thread;
if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
err = -ENOMEM;
goto err_delete_thread;
}
pt->auxtrace.process_event = intel_pt_process_event;
pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
pt->auxtrace.flush_events = intel_pt_flush;
pt->auxtrace.free_events = intel_pt_free_events;
pt->auxtrace.free = intel_pt_free;
session->auxtrace = &pt->auxtrace;
if (dump_trace)
return 0;
if (pt->have_sched_switch == 1) {
pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
if (!pt->switch_evsel) {
pr_err("%s: missing sched_switch event\n", __func__);
goto err_delete_thread;
}
}
if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
pt->synth_opts = *session->itrace_synth_opts;
} else {
itrace_synth_opts__set_default(&pt->synth_opts);
if (use_browser != -1) {
pt->synth_opts.branches = false;
pt->synth_opts.callchain = true;
}
}
if (pt->synth_opts.log)
intel_pt_log_enable();
/* Maximum non-turbo ratio is TSC freq / 100 MHz */
if (pt->tc.time_mult) {
u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000;
intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
intel_pt_log("Maximum non-turbo ratio %u\n",
pt->max_non_turbo_ratio);
}
if (pt->synth_opts.calls)
pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
PERF_IP_FLAG_TRACE_END;
if (pt->synth_opts.returns)
pt->branches_filter |= PERF_IP_FLAG_RETURN |
PERF_IP_FLAG_TRACE_BEGIN;
if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
symbol_conf.use_callchain = true;
if (callchain_register_param(&callchain_param) < 0) {
symbol_conf.use_callchain = false;
pt->synth_opts.callchain = false;
}
}
err = intel_pt_synth_events(pt, session);
if (err)
goto err_delete_thread;
err = auxtrace_queues__process_index(&pt->queues, session);
if (err)
goto err_delete_thread;
if (pt->queues.populated)
pt->data_queued = true;
if (pt->timeless_decoding)
pr_debug2("Intel PT decoding without timestamps\n");
return 0;
err_delete_thread:
thread__delete(pt->unknown_thread);
err_free_queues:
intel_pt_log_disable();
auxtrace_queues__free(&pt->queues);
session->auxtrace = NULL;
err_free:
free(pt);
return err;
}
/*
* intel_pt.h: Intel Processor Trace support
* Copyright (c) 2013-2015, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
*/
#ifndef INCLUDE__PERF_INTEL_PT_H__
#define INCLUDE__PERF_INTEL_PT_H__
#define INTEL_PT_PMU_NAME "intel_pt"
enum {
INTEL_PT_PMU_TYPE,
INTEL_PT_TIME_SHIFT,
INTEL_PT_TIME_MULT,
INTEL_PT_TIME_ZERO,
INTEL_PT_CAP_USER_TIME_ZERO,
INTEL_PT_TSC_BIT,
INTEL_PT_NORETCOMP_BIT,
INTEL_PT_HAVE_SCHED_SWITCH,
INTEL_PT_SNAPSHOT_MODE,
INTEL_PT_PER_CPU_MMAPS,
INTEL_PT_AUXTRACE_PRIV_MAX,
};
#define INTEL_PT_AUXTRACE_PRIV_SIZE (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64))
struct auxtrace_record;
struct perf_tool;
union perf_event;
struct perf_session;
struct perf_event_attr;
struct perf_pmu;
struct auxtrace_record *intel_pt_recording_init(int *err);
int intel_pt_process_auxtrace_info(union perf_event *event,
struct perf_session *session);
struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu);
#endif
...@@ -462,8 +462,8 @@ static struct perf_pmu *pmu_lookup(const char *name) ...@@ -462,8 +462,8 @@ static struct perf_pmu *pmu_lookup(const char *name)
LIST_HEAD(aliases); LIST_HEAD(aliases);
__u32 type; __u32 type;
/* No support for intel_bts or intel_pt so disallow them */ /* No support for intel_bts so disallow it */
if (!strcmp(name, "intel_bts") || !strcmp(name, "intel_pt")) if (!strcmp(name, "intel_bts"))
return NULL; return NULL;
/* /*
......
...@@ -105,6 +105,30 @@ bool perf_can_record_switch_events(void) ...@@ -105,6 +105,30 @@ bool perf_can_record_switch_events(void)
return perf_probe_api(perf_probe_context_switch); return perf_probe_api(perf_probe_context_switch);
} }
bool perf_can_record_cpu_wide(void)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_CPU_CLOCK,
.exclude_kernel = 1,
};
struct cpu_map *cpus;
int cpu, fd;
cpus = cpu_map__new(NULL);
if (!cpus)
return false;
cpu = cpus->map[0];
cpu_map__put(cpus);
fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
if (fd < 0)
return false;
close(fd);
return true;
}
void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts) void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts)
{ {
struct perf_evsel *evsel; struct perf_evsel *evsel;
......
...@@ -875,6 +875,17 @@ int dso__load_sym(struct dso *dso, struct map *map, ...@@ -875,6 +875,17 @@ int dso__load_sym(struct dso *dso, struct map *map,
} }
} }
/*
* Handle any relocation of vdso necessary because older kernels
* attempted to prelink vdso to its virtual address.
*/
if (dso__is_vdso(dso)) {
GElf_Shdr tshdr;
if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL))
map->reloc = map->start - tshdr.sh_addr + tshdr.sh_offset;
}
dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap); dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
/* /*
* Initial kernel and module mappings do not map to the dso. For * Initial kernel and module mappings do not map to the dso. For
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment