Commit 94ac003b authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo' of...

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

  - Warn if given uprobe event accesses memory on older kernel (Masami Hiramatsu)

  - 'perf record' Documentation fixes (Namhyung Kim)

  - Report unsupported events properly in 'perf stat' (Suzuki K. Poulose)

Infrastructure changes:

  - Avoid FORK after COMM when synthesizing records for pre-existing threads (Arnaldo Carvalho de Melo)

  - Reference count struct thread (Arnaldo Carvalho de Melo)

  - Don't keep the session around in 'perf sched', thread refcounting removes that need (Arnaldo Carvalho de Melo)

  - Initialize cpu set in pthread_attr_setaffinity_np() feature test (Adrian Hunter)

  - Only include tsc file for x86 (David Ahern)

  - Compare JOBS to 0 after grep (David Ahern)

  - Improve feature detection messages (Ingo Molnar)

  - Revert "perf: Remove the extra validity check on nr_pages" (Kan Liang)

  - Remove bias offset to find probe point by address (Masami Hiramatsu)

  - Fix build error on ARCH=i386/x86_64/sparc64 (Namhyung Kim)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 788b94ba ae536acf
......@@ -223,27 +223,48 @@ static unsigned long
__recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
{
struct kprobe *kp;
unsigned long faddr;
kp = get_kprobe((void *)addr);
/* There is no probe, return original address */
if (!kp)
faddr = ftrace_location(addr);
/*
* Addresses inside the ftrace location are refused by
* arch_check_ftrace_location(). Something went terribly wrong
* if such an address is checked here.
*/
if (WARN_ON(faddr && faddr != addr))
return 0UL;
/*
* Use the current code if it is not modified by Kprobe
* and it cannot be modified by ftrace.
*/
if (!kp && !faddr)
return addr;
/*
* Basically, kp->ainsn.insn has an original instruction.
* However, RIP-relative instruction can not do single-stepping
* at different place, __copy_instruction() tweaks the displacement of
* that instruction. In that case, we can't recover the instruction
* from the kp->ainsn.insn.
* Basically, kp->ainsn.insn has an original instruction.
* However, RIP-relative instruction can not do single-stepping
* at different place, __copy_instruction() tweaks the displacement of
* that instruction. In that case, we can't recover the instruction
* from the kp->ainsn.insn.
*
* On the other hand, kp->opcode has a copy of the first byte of
* the probed instruction, which is overwritten by int3. And
* the instruction at kp->addr is not modified by kprobes except
* for the first byte, we can recover the original instruction
* from it and kp->opcode.
* On the other hand, in case on normal Kprobe, kp->opcode has a copy
* of the first byte of the probed instruction, which is overwritten
* by int3. And the instruction at kp->addr is not modified by kprobes
* except for the first byte, we can recover the original instruction
* from it and kp->opcode.
*
* In case of Kprobes using ftrace, we do not have a copy of
* the original instruction. In fact, the ftrace location might
* be modified at anytime and even could be in an inconsistent state.
* Fortunately, we know that the original code is the ideal 5-byte
* long NOP.
*/
memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
buf[0] = kp->opcode;
memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
if (faddr)
memcpy(buf, ideal_nops[NOP_ATOMIC5], 5);
else
buf[0] = kp->opcode;
return (unsigned long)buf;
}
......@@ -251,6 +272,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
* Recover the probed instruction at addr for further analysis.
* Caller must lock kprobes by kprobe_mutex, or disable preemption
* for preventing to release referencing kprobes.
* Returns zero if the instruction can not get recovered.
*/
unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
{
......@@ -285,6 +307,8 @@ static int can_probe(unsigned long paddr)
* normally used, we just go through if there is no kprobe.
*/
__addr = recover_probed_instruction(buf, addr);
if (!__addr)
return 0;
kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE);
insn_get_length(&insn);
......@@ -333,6 +357,8 @@ int __copy_instruction(u8 *dest, u8 *src)
unsigned long recovered_insn =
recover_probed_instruction(buf, (unsigned long)src);
if (!recovered_insn)
return 0;
kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
insn_get_length(&insn);
/* Another subsystem puts a breakpoint, failed to recover */
......
......@@ -259,6 +259,8 @@ static int can_optimize(unsigned long paddr)
*/
return 0;
recovered_insn = recover_probed_instruction(buf, addr);
if (!recovered_insn)
return 0;
kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
insn_get_length(&insn);
/* Another subsystem puts a breakpoint */
......
......@@ -4446,7 +4446,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
* If we have rb pages ensure they're a power-of-two number, so we
* can do bitmasks instead of modulo.
*/
if (!is_power_of_2(nr_pages))
if (nr_pages != 0 && !is_power_of_2(nr_pages))
return -EINVAL;
if (vma_size != PAGE_SIZE * (1 + nr_pages))
......
PERF-CFLAGS
PERF-GUI-VARS
PERF-VERSION-FILE
PERF-FEATURES
perf
perf-read-vdso32
perf-read-vdsox32
......
......@@ -55,6 +55,11 @@ OPTIONS
If you want to profile write accesses in [0x1000~1008), just set
'mem:0x1000/8:w'.
- a group of events surrounded by a pair of brace ("{event1,event2,...}").
Each event is separated by commas and the group should be quoted to
prevent the shell interpretation. You also need to use --group on
"perf report" to view group events together.
--filter=<filter>::
Event filter.
......@@ -62,9 +67,6 @@ OPTIONS
--all-cpus::
System-wide collection from all CPUs.
-l::
Scale counter values.
-p::
--pid=::
Record events on existing process ID (comma separated list).
......@@ -107,6 +109,10 @@ OPTIONS
specification with appended unit character - B/K/M/G. The
size is rounded up to have nearest pages power of two value.
--group::
Put all events in a single event group. This precedes the --event
option and remains only for backward compatibility. See --event.
-g::
Enables call-graph (stack chain/backtrace) recording.
......
......@@ -25,7 +25,7 @@ unexport MAKEFLAGS
#
ifeq ($(JOBS),)
JOBS := $(shell grep -c ^processor /proc/cpuinfo 2>/dev/null)
ifeq ($(JOBS),)
ifeq ($(JOBS),0)
JOBS := 1
endif
endif
......
......@@ -289,7 +289,7 @@ static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault)
memcpy_t fn = r->fn.memcpy;
int i;
memcpy_alloc_mem(&src, &dst, len);
memcpy_alloc_mem(&dst, &src, len);
if (prefault)
fn(dst, src, len);
......@@ -312,7 +312,7 @@ static double do_memcpy_gettimeofday(const struct routine *r, size_t len,
void *src = NULL, *dst = NULL;
int i;
memcpy_alloc_mem(&src, &dst, len);
memcpy_alloc_mem(&dst, &src, len);
if (prefault)
fn(dst, src, len);
......
......@@ -831,7 +831,7 @@ static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread)
return -1;
}
atoms->thread = thread;
atoms->thread = thread__get(thread);
INIT_LIST_HEAD(&atoms->work_list);
__thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid);
return 0;
......@@ -1439,8 +1439,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_
return err;
}
static int perf_sched__read_events(struct perf_sched *sched,
struct perf_session **psession)
static int perf_sched__read_events(struct perf_sched *sched)
{
const struct perf_evsel_str_handler handlers[] = {
{ "sched:sched_switch", process_sched_switch_event, },
......@@ -1454,6 +1453,7 @@ static int perf_sched__read_events(struct perf_sched *sched,
.path = input_name,
.mode = PERF_DATA_MODE_READ,
};
int rc = -1;
session = perf_session__new(&file, false, &sched->tool);
if (session == NULL) {
......@@ -1478,16 +1478,10 @@ static int perf_sched__read_events(struct perf_sched *sched,
sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST];
}
if (psession)
*psession = session;
else
perf_session__delete(session);
return 0;
rc = 0;
out_delete:
perf_session__delete(session);
return -1;
return rc;
}
static void print_bad_events(struct perf_sched *sched)
......@@ -1515,12 +1509,10 @@ static void print_bad_events(struct perf_sched *sched)
static int perf_sched__lat(struct perf_sched *sched)
{
struct rb_node *next;
struct perf_session *session;
setup_pager();
/* save session -- references to threads are held in work_list */
if (perf_sched__read_events(sched, &session))
if (perf_sched__read_events(sched))
return -1;
perf_sched__sort_lat(sched);
......@@ -1537,6 +1529,7 @@ static int perf_sched__lat(struct perf_sched *sched)
work_list = rb_entry(next, struct work_atoms, node);
output_lat_thread(sched, work_list);
next = rb_next(next);
thread__zput(work_list->thread);
}
printf(" -----------------------------------------------------------------------------------------------------------------\n");
......@@ -1548,7 +1541,6 @@ static int perf_sched__lat(struct perf_sched *sched)
print_bad_events(sched);
printf("\n");
perf_session__delete(session);
return 0;
}
......@@ -1557,7 +1549,7 @@ static int perf_sched__map(struct perf_sched *sched)
sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
setup_pager();
if (perf_sched__read_events(sched, NULL))
if (perf_sched__read_events(sched))
return -1;
print_bad_events(sched);
return 0;
......@@ -1572,7 +1564,7 @@ static int perf_sched__replay(struct perf_sched *sched)
test_calibrations(sched);
if (perf_sched__read_events(sched, NULL))
if (perf_sched__read_events(sched))
return -1;
printf("nr_run_events: %ld\n", sched->nr_run_events);
......
......@@ -510,6 +510,9 @@ static int read_counter(struct perf_evsel *counter)
int ncpus = perf_evsel__nr_cpus(counter);
int cpu, thread;
if (!counter->supported)
return -ENOENT;
if (counter->system_wide)
nthreads = 1;
......@@ -1285,7 +1288,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
if (prefix)
fprintf(output, "%s", prefix);
if (scaled == -1) {
if (scaled == -1 || !counter->supported) {
fprintf(output, "%*s%s",
csv_output ? 0 : 18,
counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
......
......@@ -1741,7 +1741,10 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
} else
ttrace->entry_pending = true;
trace->current = thread;
if (trace->current != thread) {
thread__put(trace->current);
trace->current = thread__get(thread);
}
return 0;
}
......@@ -2274,6 +2277,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
}
out_disable:
thread__zput(trace->current);
perf_evlist__disable(evlist);
if (!err) {
......
......@@ -531,7 +531,7 @@ else
ifneq ($(feature-libperl), 1)
CFLAGS += -DNO_LIBPERL
NO_LIBPERL := 1
msg := $(warning Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed);
msg := $(warning Missing perl devel files. Disabling perl scripting support, please install perl-ExtUtils-Embed/libperl-dev);
else
LDFLAGS += $(PERL_EMBED_LDFLAGS)
EXTLIBS += $(PERL_EMBED_LIBADD)
......@@ -548,22 +548,21 @@ endif
disable-python = $(eval $(disable-python_code))
define disable-python_code
CFLAGS += -DNO_LIBPYTHON
$(if $(1),$(warning No $(1) was found))
$(warning Python support will not be built)
$(warning $1)
NO_LIBPYTHON := 1
endef
ifdef NO_LIBPYTHON
$(call disable-python)
$(call disable-python,Python support disabled by user)
else
ifndef PYTHON
$(call disable-python,python interpreter)
$(call disable-python,No python interpreter was found: disables Python support - please install python-devel/python-dev)
else
PYTHON_WORD := $(call shell-wordify,$(PYTHON))
ifndef PYTHON_CONFIG
$(call disable-python,python-config tool)
$(call disable-python,No 'python-config' tool was found: disables Python support - please install python-devel/python-dev)
else
PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
......@@ -575,7 +574,7 @@ else
FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
ifneq ($(feature-libpython), 1)
$(call disable-python,Python.h (for Python 2.x))
$(call disable-python,No 'Python.h' (for Python 2.x support) was found: disables Python support - please install python-devel/python-dev)
else
ifneq ($(feature-libpython-version), 1)
......@@ -636,7 +635,7 @@ else
EXTLIBS += -liberty
CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
else
msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling)
msg := $(warning No bfd.h/libbfd found, please install binutils-dev[el]/zlib-static/libiberty-dev to gain symbol demangling)
CFLAGS += -DNO_DEMANGLE
endif
endif
......@@ -707,7 +706,7 @@ endif
ifndef NO_LIBBABELTRACE
ifeq ($(feature-libbabeltrace), 0)
msg := $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-devel/libbabeltrace-ctf-dev);
msg := $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev);
NO_LIBBABELTRACE := 1
else
CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS)
......
ifndef ARCH
ARCH := $(shell uname -m 2>/dev/null || echo not)
endif
uname_M := $(shell uname -m 2>/dev/null || echo not)
RAW_ARCH := $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
-e s/sun4u/sparc/ -e s/sparc64/sparc/ \
-e s/arm.*/arm/ -e s/sa110/arm/ \
-e s/s390x/s390/ -e s/parisc64/parisc/ \
-e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
-e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \
-e s/tile.*/tile/ )
# Additional ARCH settings for x86
ifeq ($(RAW_ARCH),i386)
ARCH ?= x86
endif
ifeq ($(RAW_ARCH),x86_64)
ARCH ?= x86
ifneq (, $(findstring m32,$(CFLAGS)))
RAW_ARCH := x86_32
endif
endif
ARCH ?= $(RAW_ARCH)
LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
ifeq ($(LP64), 1)
IS_64_BIT := 1
......
......@@ -39,24 +39,24 @@ PKG_CONFIG := $(CROSS_COMPILE)pkg-config
all: $(FILES)
BUILD = $(CC) $(CFLAGS) -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS)
BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS)
###############################
test-all.bin:
$(BUILD) -Werror -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -lbabeltrace
$(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -lbabeltrace
test-hello.bin:
$(BUILD)
test-pthread-attr-setaffinity-np.bin:
$(BUILD) -D_GNU_SOURCE -Werror -lpthread
$(BUILD) -D_GNU_SOURCE -lpthread
test-stackprotector-all.bin:
$(BUILD) -Werror -fstack-protector-all
$(BUILD) -fstack-protector-all
test-fortify-source.bin:
$(BUILD) -O2 -Werror -D_FORTIFY_SOURCE=2
$(BUILD) -O2 -D_FORTIFY_SOURCE=2
test-bionic.bin:
$(BUILD)
......@@ -119,10 +119,10 @@ test-libbfd.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl
test-liberty.bin:
$(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty
$(CC) -Wall -Werror -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty
test-liberty-z.bin:
$(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty -lz
$(CC) -Wall -Werror -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty -lz
test-cplus-demangle.bin:
$(BUILD) -liberty
......@@ -140,7 +140,7 @@ test-libbabeltrace.bin:
$(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace)
test-sync-compare-and-swap.bin:
$(BUILD) -Werror
$(BUILD)
test-compile-32.bin:
$(CC) -m32 -o $(OUTPUT)$@ test-compile.c
......
#include <stdint.h>
#include <pthread.h>
#include <sched.h>
int main(void)
{
int ret = 0;
pthread_attr_t thread_attr;
cpu_set_t cs;
pthread_attr_init(&thread_attr);
/* don't care abt exact args, just the API itself in libpthread */
ret = pthread_attr_setaffinity_np(&thread_attr, 0, NULL);
CPU_ZERO(&cs);
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cs), &cs);
return ret;
}
......@@ -175,6 +175,5 @@ _ge-abspath = $(if $(is-executable),$(1))
define get-executable-or-default
$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
endef
_ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2)))
_gea_warn = $(warning The path '$(1)' is not executable.)
_ge_attempt = $(if $(get-executable),$(get-executable),$(call _gea_err,$(2)))
_gea_err = $(if $(1),$(error Please set '$(1)' appropriately))
......@@ -1467,7 +1467,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
perf_hpp__set_user_width(symbol_conf.col_width_list_str);
while (1) {
const struct thread *thread = NULL;
struct thread *thread = NULL;
const struct dso *dso = NULL;
int choice = 0,
annotate = -2, zoom_dso = -2, zoom_thread = -2,
......@@ -1754,13 +1754,13 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
pstack__remove(fstack, &browser->hists->thread_filter);
zoom_out_thread:
ui_helpline__pop();
browser->hists->thread_filter = NULL;
thread__zput(browser->hists->thread_filter);
perf_hpp__set_elide(HISTC_THREAD, false);
} else {
ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
thread->comm_set ? thread__comm_str(thread) : "",
thread->tid);
browser->hists->thread_filter = thread;
browser->hists->thread_filter = thread__get(thread);
perf_hpp__set_elide(HISTC_THREAD, false);
pstack__push(fstack, &browser->hists->thread_filter);
}
......
......@@ -71,7 +71,7 @@ libperf-y += stat.o
libperf-y += record.o
libperf-y += srcline.o
libperf-y += data.o
libperf-y += tsc.o
libperf-$(CONFIG_X86) += tsc.o
libperf-y += cloexec.o
libperf-y += thread-stack.o
......
......@@ -61,8 +61,9 @@ static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused,
if (thread) {
rb_erase(&thread->rb_node, &machine->threads);
machine->last_match = NULL;
thread__delete(thread);
if (machine->last_match == thread)
thread__zput(machine->last_match);
thread__put(thread);
}
return 0;
......
......@@ -25,6 +25,10 @@ static int perf_flag_probe(void)
if (cpu < 0)
cpu = 0;
/*
* Using -1 for the pid is a workaround to avoid gratuitous jump label
* changes.
*/
while (1) {
/* check cloexec flag */
fd = sys_perf_event_open(&attr, pid, cpu, -1,
......@@ -47,16 +51,24 @@ static int perf_flag_probe(void)
err, strerror_r(err, sbuf, sizeof(sbuf)));
/* not supported, confirm error related to PERF_FLAG_FD_CLOEXEC */
fd = sys_perf_event_open(&attr, pid, cpu, -1, 0);
while (1) {
fd = sys_perf_event_open(&attr, pid, cpu, -1, 0);
if (fd < 0 && pid == -1 && errno == EACCES) {
pid = 0;
continue;
}
break;
}
err = errno;
if (fd >= 0)
close(fd);
if (WARN_ONCE(fd < 0 && err != EBUSY,
"perf_event_open(..., 0) failed unexpectedly with error %d (%s)\n",
err, strerror_r(err, sbuf, sizeof(sbuf))))
return -1;
close(fd);
return 0;
}
......
......@@ -95,9 +95,7 @@ static pid_t perf_event__get_comm_tgid(pid_t pid, char *comm, size_t len)
return tgid;
}
static pid_t perf_event__synthesize_comm(struct perf_tool *tool,
union perf_event *event, pid_t pid,
perf_event__handler_t process,
static pid_t perf_event__prepare_comm(union perf_event *event, pid_t pid,
struct machine *machine)
{
size_t size;
......@@ -124,6 +122,19 @@ static pid_t perf_event__synthesize_comm(struct perf_tool *tool,
(sizeof(event->comm.comm) - size) +
machine->id_hdr_size);
event->comm.tid = pid;
out:
return tgid;
}
static pid_t perf_event__synthesize_comm(struct perf_tool *tool,
union perf_event *event, pid_t pid,
perf_event__handler_t process,
struct machine *machine)
{
pid_t tgid = perf_event__prepare_comm(event, pid, machine);
if (tgid == -1)
goto out;
if (process(tool, event, &synth_sample, machine) != 0)
return -1;
......@@ -139,7 +150,6 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
{
memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size);
/* this is really a clone event but we use fork to synthesize it */
event->fork.ppid = tgid;
event->fork.ptid = tgid;
event->fork.pid = tgid;
......@@ -368,19 +378,23 @@ static int __event__synthesize_thread(union perf_event *comm_event,
if (*end)
continue;
tgid = perf_event__synthesize_comm(tool, comm_event, _pid,
process, machine);
tgid = perf_event__prepare_comm(comm_event, _pid, machine);
if (tgid == -1)
return -1;
if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
process, machine) < 0)
return -1;
/*
* Send the prepared comm event
*/
if (process(tool, comm_event, &synth_sample, machine) != 0)
return -1;
if (_pid == pid) {
/* process the parent's maps too */
rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
process, machine, mmap_data);
} else {
/* only fork the tid's map, to save time */
rc = perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
process, machine);
}
if (rc)
......
......@@ -28,7 +28,7 @@ struct perf_mmap {
int mask;
int refcnt;
unsigned int prev;
char event_copy[PERF_SAMPLE_MAX_SIZE];
char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8)));
};
struct perf_evlist {
......
......@@ -355,6 +355,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
callchain_init(he->callchain);
INIT_LIST_HEAD(&he->pairs.node);
thread__get(he->thread);
}
return he;
......@@ -941,6 +942,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
void hist_entry__delete(struct hist_entry *he)
{
thread__zput(he->thread);
zfree(&he->branch_info);
zfree(&he->mem_info);
zfree(&he->stat_acc);
......
......@@ -60,7 +60,7 @@ struct hists {
struct rb_root entries_collapsed;
u64 nr_entries;
u64 nr_non_filtered_entries;
const struct thread *thread_filter;
struct thread *thread_filter;
const struct dso *dso_filter;
const char *uid_filter_str;
const char *symbol_filter_str;
......
......@@ -14,6 +14,8 @@
#include "unwind.h"
#include "linux/hash.h"
static void machine__remove_thread(struct machine *machine, struct thread *th);
static void dsos__init(struct dsos *dsos)
{
INIT_LIST_HEAD(&dsos->head);
......@@ -89,16 +91,6 @@ static void dsos__delete(struct dsos *dsos)
}
}
void machine__delete_dead_threads(struct machine *machine)
{
struct thread *n, *t;
list_for_each_entry_safe(t, n, &machine->dead_threads, node) {
list_del(&t->node);
thread__delete(t);
}
}
void machine__delete_threads(struct machine *machine)
{
struct rb_node *nd = rb_first(&machine->threads);
......@@ -106,9 +98,8 @@ void machine__delete_threads(struct machine *machine)
while (nd) {
struct thread *t = rb_entry(nd, struct thread, rb_node);
rb_erase(&t->rb_node, &machine->threads);
nd = rb_next(nd);
thread__delete(t);
machine__remove_thread(machine, t);
}
}
......@@ -361,9 +352,13 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
* the full rbtree:
*/
th = machine->last_match;
if (th && th->tid == tid) {
machine__update_thread_pid(machine, th, pid);
return th;
if (th != NULL) {
if (th->tid == tid) {
machine__update_thread_pid(machine, th, pid);
return th;
}
thread__zput(machine->last_match);
}
while (*p != NULL) {
......@@ -371,7 +366,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
th = rb_entry(parent, struct thread, rb_node);
if (th->tid == tid) {
machine->last_match = th;
machine->last_match = thread__get(th);
machine__update_thread_pid(machine, th, pid);
return th;
}
......@@ -403,8 +398,11 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
thread__delete(th);
return NULL;
}
machine->last_match = th;
/*
* It is now in the rbtree, get a ref
*/
thread__get(th);
machine->last_match = thread__get(th);
}
return th;
......@@ -1238,13 +1236,17 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
static void machine__remove_thread(struct machine *machine, struct thread *th)
{
machine->last_match = NULL;
if (machine->last_match == th)
thread__zput(machine->last_match);
rb_erase(&th->rb_node, &machine->threads);
/*
* We may have references to this thread, for instance in some hist_entry
* instances, so just move them to a separate list.
* Move it first to the dead_threads list, then drop the reference,
* if this is the last reference, then the thread__delete destructor
* will be called and we will remove it from the dead_threads list.
*/
list_add_tail(&th->node, &machine->dead_threads);
thread__put(th);
}
int machine__process_fork_event(struct machine *machine, union perf_event *event,
......
......@@ -118,7 +118,6 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec);
struct machine *machine__new_host(void);
int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
void machine__exit(struct machine *machine);
void machine__delete_dead_threads(struct machine *machine);
void machine__delete_threads(struct machine *machine);
void machine__delete(struct machine *machine);
......
......@@ -2199,6 +2199,27 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
return ret;
}
/* Warn if the current kernel's uprobe implementation is old */
static void warn_uprobe_event_compat(struct probe_trace_event *tev)
{
int i;
char *buf = synthesize_probe_trace_command(tev);
/* Old uprobe event doesn't support memory dereference */
if (!tev->uprobes || tev->nargs == 0 || !buf)
goto out;
for (i = 0; i < tev->nargs; i++)
if (strglobmatch(tev->args[i].value, "[$@+-]*")) {
pr_warning("Please upgrade your kernel to at least "
"3.14 to have access to feature %s\n",
tev->args[i].value);
break;
}
out:
free(buf);
}
static int __add_probe_trace_events(struct perf_probe_event *pev,
struct probe_trace_event *tevs,
int ntevs, bool allow_suffix)
......@@ -2295,6 +2316,8 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
*/
allow_suffix = true;
}
if (ret == -EINVAL && pev->uprobes)
warn_uprobe_event_compat(tev);
/* Note that it is possible to skip all events because of blacklist */
if (ret >= 0 && tev->event) {
......
......@@ -1345,11 +1345,8 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr,
const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp;
int baseline = 0, lineno = 0, ret = 0;
/* Adjust address with bias */
addr += dbg->bias;
/* Find cu die */
if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr - dbg->bias, &cudie)) {
if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) {
pr_warning("Failed to find debug information for address %lx\n",
addr);
ret = -EINVAL;
......
......@@ -138,11 +138,6 @@ struct perf_session *perf_session__new(struct perf_data_file *file,
return NULL;
}
static void perf_session__delete_dead_threads(struct perf_session *session)
{
machine__delete_dead_threads(&session->machines.host);
}
static void perf_session__delete_threads(struct perf_session *session)
{
machine__delete_threads(&session->machines.host);
......@@ -167,7 +162,6 @@ static void perf_session_env__delete(struct perf_session_env *env)
void perf_session__delete(struct perf_session *session)
{
perf_session__destroy_kernel_maps(session);
perf_session__delete_dead_threads(session);
perf_session__delete_threads(session);
perf_session_env__delete(&session->header.env);
machines__exit(&session->machines);
......
......@@ -11,6 +11,11 @@
#include <symbol/kallsyms.h>
#include "debug.h"
#ifndef EM_AARCH64
#define EM_AARCH64 183 /* ARM 64 bit */
#endif
#ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
extern char *cplus_demangle(const char *, int);
......
......@@ -82,6 +82,20 @@ void thread__delete(struct thread *thread)
free(thread);
}
struct thread *thread__get(struct thread *thread)
{
++thread->refcnt;
return thread;
}
void thread__put(struct thread *thread)
{
if (thread && --thread->refcnt == 0) {
list_del_init(&thread->node);
thread__delete(thread);
}
}
struct comm *thread__comm(const struct thread *thread)
{
if (list_empty(&thread->comm_list))
......
......@@ -20,6 +20,7 @@ struct thread {
pid_t tid;
pid_t ppid;
int cpu;
int refcnt;
char shortname[3];
bool comm_set;
bool dead; /* if set thread has exited */
......@@ -37,6 +38,18 @@ struct comm;
struct thread *thread__new(pid_t pid, pid_t tid);
int thread__init_map_groups(struct thread *thread, struct machine *machine);
void thread__delete(struct thread *thread);
struct thread *thread__get(struct thread *thread);
void thread__put(struct thread *thread);
static inline void __thread__zput(struct thread **thread)
{
thread__put(*thread);
*thread = NULL;
}
#define thread__zput(thread) __thread__zput(&thread)
static inline void thread__exited(struct thread *thread)
{
thread->dead = true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment