Commit 4b50239a authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-4.15-20171003' of...

Merge tag 'perf-core-for-mingo-4.15-20171003' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

- Multithread the synthesizing of PERF_RECORD_ events for pre-existing
  threads in 'perf top', speeding up that phase, greatly improving the
  user experience in systems such as Intel's Knights Mill (Kan Liang)

- 'perf test' fixes for the perf_event_attr test case (Jiri Olsa, Thomas Richter)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents a47ba4d7 f6a9820d
......@@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req {
/* kvm attributes for KVM_S390_VM_TOD */
#define KVM_S390_VM_TOD_LOW 0
#define KVM_S390_VM_TOD_HIGH 1
#define KVM_S390_VM_TOD_EXT 2
struct kvm_s390_vm_tod_clock {
__u8 epoch_idx;
__u64 tod;
};
/* kvm attributes for KVM_S390_VM_CPU_MODEL */
/* processor related attributes are r/w */
......
......@@ -196,6 +196,7 @@
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
......@@ -287,6 +288,7 @@
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
......
......@@ -21,11 +21,13 @@
# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
# define DISABLE_PCID 0
#else
# define DISABLE_VME 0
# define DISABLE_K6_MTRR 0
# define DISABLE_CYRIX_ARR 0
# define DISABLE_CENTAUR_MCR 0
# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31))
#endif /* CONFIG_X86_64 */
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
......@@ -49,7 +51,7 @@
#define DISABLED_MASK1 0
#define DISABLED_MASK2 0
#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
#define DISABLED_MASK4 0
#define DISABLED_MASK4 (DISABLE_PCID)
#define DISABLED_MASK5 0
#define DISABLED_MASK6 0
#define DISABLED_MASK7 0
......
#ifndef _ASM_GENERIC_HUGETLB_ENCODE_H_
#define _ASM_GENERIC_HUGETLB_ENCODE_H_
/*
* Several system calls take a flag to request "hugetlb" huge pages.
* Without further specification, these system calls will use the
* system's default huge page size. If a system supports multiple
* huge page sizes, the desired huge page size can be specified in
* bits [26:31] of the flag arguments. The value in these 6 bits
* will encode the log2 of the huge page size.
*
* The following definitions are associated with this huge page size
* encoding in flag arguments. System call specific header files
* that use this encoding should include this file. They can then
* provide definitions based on these with their own specific prefix.
* for example:
* #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
*/
#define HUGETLB_FLAG_ENCODE_SHIFT 26
#define HUGETLB_FLAG_ENCODE_MASK 0x3f
#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT)
#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */
......@@ -700,6 +700,7 @@ struct drm_prime_handle {
struct drm_syncobj_create {
__u32 handle;
#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
__u32 flags;
};
......@@ -718,6 +719,24 @@ struct drm_syncobj_handle {
__u32 pad;
};
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
struct drm_syncobj_wait {
__u64 handles;
/* absolute timeout */
__s64 timeout_nsec;
__u32 count_handles;
__u32 flags;
__u32 first_signaled; /* only valid when not waiting all */
__u32 pad;
};
struct drm_syncobj_array {
__u64 handles;
__u32 count_handles;
__u32 pad;
};
#if defined(__cplusplus)
}
#endif
......@@ -840,6 +859,9 @@ extern "C" {
#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy)
#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle)
#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle)
#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait)
#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array)
#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array)
/**
* Device specific ioctls should only be in their respective headers
......
......@@ -260,6 +260,8 @@ typedef struct _drm_i915_sarea {
#define DRM_I915_GEM_CONTEXT_GETPARAM 0x34
#define DRM_I915_GEM_CONTEXT_SETPARAM 0x35
#define DRM_I915_PERF_OPEN 0x36
#define DRM_I915_PERF_ADD_CONFIG 0x37
#define DRM_I915_PERF_REMOVE_CONFIG 0x38
#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
......@@ -315,6 +317,8 @@ typedef struct _drm_i915_sarea {
#define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param)
#define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param)
#define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
/* Allow drivers to submit batchbuffers directly to hardware, relying
* on the security mechanisms provided by hardware.
......@@ -431,6 +435,11 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
* drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY.
*/
#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49
typedef struct drm_i915_getparam {
__s32 param;
/*
......@@ -812,6 +821,17 @@ struct drm_i915_gem_exec_object2 {
__u64 rsvd2;
};
struct drm_i915_gem_exec_fence {
/**
* User's handle for a drm_syncobj to wait on or signal.
*/
__u32 handle;
#define I915_EXEC_FENCE_WAIT (1<<0)
#define I915_EXEC_FENCE_SIGNAL (1<<1)
__u32 flags;
};
struct drm_i915_gem_execbuffer2 {
/**
* List of gem_exec_object2 structs
......@@ -826,7 +846,11 @@ struct drm_i915_gem_execbuffer2 {
__u32 DR1;
__u32 DR4;
__u32 num_cliprects;
/** This is a struct drm_clip_rect *cliprects */
/**
* This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
* is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a
* struct drm_i915_gem_exec_fence *fences.
*/
__u64 cliprects_ptr;
#define I915_EXEC_RING_MASK (7<<0)
#define I915_EXEC_DEFAULT (0<<0)
......@@ -927,7 +951,14 @@ struct drm_i915_gem_execbuffer2 {
* element).
*/
#define I915_EXEC_BATCH_FIRST (1<<18)
#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1))
/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr
* define an array of i915_gem_exec_fence structures which specify a set of
* dma fences to wait upon or signal.
*/
#define I915_EXEC_FENCE_ARRAY (1<<19)
#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
......@@ -1467,6 +1498,22 @@ enum drm_i915_perf_record_type {
DRM_I915_PERF_RECORD_MAX /* non-ABI */
};
/**
* Structure to upload perf dynamic configuration into the kernel.
*/
struct drm_i915_perf_oa_config {
/** String formatted like "%08x-%04x-%04x-%04x-%012x" */
char uuid[36];
__u32 n_mux_regs;
__u32 n_boolean_regs;
__u32 n_flex_regs;
__u64 __user mux_regs_ptr;
__u64 __user boolean_regs_ptr;
__u64 __user flex_regs_ptr;
};
#if defined(__cplusplus)
}
#endif
......
......@@ -143,12 +143,6 @@ enum bpf_attach_type {
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
enum bpf_sockmap_flags {
BPF_SOCKMAP_UNSPEC,
BPF_SOCKMAP_STRPARSER,
__MAX_BPF_SOCKMAP_FLAG
};
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
* to the given target_fd cgroup the descendent cgroup will be able to
* override effective bpf program that was inherited from this cgroup
......@@ -368,9 +362,20 @@ union bpf_attr {
* int bpf_redirect(ifindex, flags)
* redirect to another netdev
* @ifindex: ifindex of the net device
* @flags: bit 0 - if set, redirect to ingress instead of egress
* @flags:
* cls_bpf:
* bit 0 - if set, redirect to ingress instead of egress
* other bits - reserved
* Return: TC_ACT_REDIRECT
* xdp_bpf:
* all bits - reserved
* Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
* xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
* int bpf_redirect_map(map, key, flags)
* redirect to endpoint in map
* @map: pointer to dev map
* @key: index in map to lookup
* @flags: --
* Return: XDP_REDIRECT on success or XDP_ABORT on error
*
* u32 bpf_get_route_realm(skb)
* retrieve a dst's tclassid
......@@ -632,7 +637,7 @@ union bpf_attr {
FN(skb_adjust_room), \
FN(redirect_map), \
FN(sk_redirect_map), \
FN(sock_map_update),
FN(sock_map_update), \
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......@@ -753,20 +758,23 @@ struct bpf_sock {
__u32 family;
__u32 type;
__u32 protocol;
__u32 mark;
__u32 priority;
};
#define XDP_PACKET_HEADROOM 256
/* User return codes for XDP prog type.
* A valid XDP program must return one of these defined values. All other
* return codes are reserved for future use. Unknown return codes will result
* in packet drop.
* return codes are reserved for future use. Unknown return codes will
* result in packet drops and a warning via bpf_warn_invalid_xdp_action().
*/
enum xdp_action {
XDP_ABORTED = 0,
XDP_DROP,
XDP_PASS,
XDP_TX,
XDP_REDIRECT,
};
/* user accessible metadata for XDP packet hook
......
......@@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size {
struct kvm_ppc_smmu_info {
__u64 flags;
__u32 slb_size;
__u32 pad;
__u16 data_keys; /* # storage keys supported for data */
__u16 instr_keys; /* # storage keys supported for instructions */
struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
};
......
#ifndef _UAPI_LINUX_MMAN_H
#define _UAPI_LINUX_MMAN_H
#include <uapi/asm/mman.h>
#include <asm/mman.h>
#include <asm-generic/hugetlb_encode.h>
#define MREMAP_MAYMOVE 1
#define MREMAP_FIXED 2
......@@ -10,4 +11,25 @@
#define OVERCOMMIT_ALWAYS 1
#define OVERCOMMIT_NEVER 2
/*
* Huge page size encoding when MAP_HUGETLB is specified, and a huge page
* size other than the default is desired. See hugetlb_encode.h.
* All known huge page size encodings are provided here. It is the
* responsibility of the application to know which sizes are supported on
* the running system. See mmap(2) man page for details.
*/
#define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
#define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK
#define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB
#define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB
#define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB
#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
#endif /* _UAPI_LINUX_MMAN_H */
......@@ -240,6 +240,9 @@ Default is to monitor all CPUS.
--force::
Don't do ownership validation.
--num-thread-synthesize::
The number of threads to run when synthesizing events for existing processes.
By default, the number of threads equals to the number of online CPUs.
INTERACTIVE PROMPTING KEYS
--------------------------
......
libperf-y += header.o
libperf-y += sym-handling.o
libperf-y += kvm-stat.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
......
/*
* Architecture specific ELF symbol handling and relocation mapping.
*
* Copyright 2017 IBM Corp.
* Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
* as published by the Free Software Foundation.
*/
#include "symbol.h"
#ifdef HAVE_LIBELF_SUPPORT
bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
{
if (ehdr.e_type == ET_EXEC)
return false;
return ehdr.e_type == ET_REL || ehdr.e_type == ET_DYN;
}
void arch__adjust_sym_map_offset(GElf_Sym *sym,
GElf_Shdr *shdr __maybe_unused,
struct map *map)
{
if (map->type == MAP__FUNCTION)
sym->st_value += map->start;
}
#endif
......@@ -1441,7 +1441,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
perf_session__set_id_hdr_size(kvm->session);
ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
kvm->evlist->threads, false, kvm->opts.proc_map_timeout);
kvm->evlist->threads, false,
kvm->opts.proc_map_timeout, 1);
err = kvm_live_open_events(kvm);
if (err)
goto out;
......
......@@ -863,7 +863,7 @@ static int record__synthesize(struct record *rec, bool tail)
err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
process_synthesized_event, opts->sample_address,
opts->proc_map_timeout);
opts->proc_map_timeout, 1);
out:
return err;
}
......
......@@ -958,8 +958,16 @@ static int __cmd_top(struct perf_top *top)
if (perf_session__register_idle_thread(top->session) < 0)
goto out_delete;
if (top->nr_threads_synthesize > 1)
perf_set_multithreaded();
machine__synthesize_threads(&top->session->machines.host, &opts->target,
top->evlist->threads, false, opts->proc_map_timeout);
top->evlist->threads, false,
opts->proc_map_timeout,
top->nr_threads_synthesize);
if (top->nr_threads_synthesize > 1)
perf_set_singlethreaded();
if (perf_hpp_list.socket) {
ret = perf_env__read_cpu_topology_map(&perf_env);
......@@ -1112,6 +1120,7 @@ int cmd_top(int argc, const char **argv)
},
.max_stack = sysctl_perf_event_max_stack,
.sym_pcnt_filter = 5,
.nr_threads_synthesize = UINT_MAX,
};
struct record_opts *opts = &top.record_opts;
struct target *target = &opts->target;
......@@ -1221,6 +1230,8 @@ int cmd_top(int argc, const char **argv)
OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
"Show entries in a hierarchy"),
OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
"number of thread to run event synthesize"),
OPT_END()
};
const char * const top_usage[] = {
......
......@@ -1131,7 +1131,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
evlist->threads, trace__tool_process, false,
trace->opts.proc_map_timeout);
trace->opts.proc_map_timeout, 1);
if (err)
symbol__exit();
......
......@@ -166,7 +166,7 @@ static int run_dir(const char *d, const char *perf)
snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s",
d, d, perf, vcnt, v);
return system(cmd);
return system(cmd) ? TEST_FAIL : TEST_OK;
}
int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused)
......
......@@ -237,6 +237,7 @@ class Test(object):
# events in result. Fail if there's not any.
for exp_name, exp_event in expect.items():
exp_list = []
res_event = {}
log.debug(" matching [%s]" % exp_name)
for res_name, res_event in result.items():
log.debug(" to [%s]" % res_name)
......@@ -252,6 +253,9 @@ class Test(object):
if not exp_list:
if exp_event.optional():
log.debug(" %s does not match, but is optional" % exp_name)
else:
if not res_event:
log.debug(" res_event is empty");
else:
exp_event.diff(res_event)
raise Fail(self, 'match failure');
......
......@@ -23,7 +23,7 @@ comm=1
freq=1
inherit_stat=0
enable_on_exec=1
task=0
task=1
watermark=0
precise_ip=0|1|2|3
mmap_data=0
......
......@@ -17,5 +17,6 @@ sample_type=327
read_format=4
mmap=0
comm=0
task=0
enable_on_exec=0
disabled=0
......@@ -23,7 +23,7 @@ sample_type=343
# PERF_FORMAT_ID | PERF_FORMAT_GROUP
read_format=12
task=0
mmap=0
comm=0
enable_on_exec=0
......
......@@ -18,5 +18,6 @@ sample_type=327
read_format=4
mmap=0
comm=0
task=0
enable_on_exec=0
disabled=0
......@@ -6,6 +6,7 @@ ret = 1
[event-1:base-stat]
fd=1
group_fd=-1
read_format=3|15
[event-2:base-stat]
fd=2
......@@ -13,3 +14,4 @@ group_fd=1
config=1
disabled=0
enable_on_exec=0
read_format=3|15
......@@ -6,6 +6,7 @@ ret = 1
[event-1:base-stat]
fd=1
group_fd=-1
read_format=3|15
[event-2:base-stat]
fd=2
......@@ -13,3 +14,4 @@ group_fd=1
config=1
disabled=0
enable_on_exec=0
read_format=3|15
......@@ -131,7 +131,7 @@ static int synth_all(struct machine *machine)
{
return perf_event__synthesize_threads(NULL,
perf_event__process,
machine, 0, 500);
machine, 0, 500, 1);
}
static int synth_process(struct machine *machine)
......
......@@ -65,8 +65,6 @@ static int parse_callchain_mode(const char *value)
callchain_param.mode = CHAIN_FOLDED;
return 0;
}
pr_err("Invalid callchain mode: %s\n", value);
return -1;
}
......@@ -82,8 +80,6 @@ static int parse_callchain_order(const char *value)
callchain_param.order_set = true;
return 0;
}
pr_err("Invalid callchain order: %s\n", value);
return -1;
}
......@@ -105,8 +101,6 @@ static int parse_callchain_sort_key(const char *value)
callchain_param.branch_callstack = 1;
return 0;
}
pr_err("Invalid callchain sort key: %s\n", value);
return -1;
}
......@@ -124,8 +118,6 @@ static int parse_callchain_value(const char *value)
callchain_param.value = CCVAL_COUNT;
return 0;
}
pr_err("Invalid callchain config key: %s\n", value);
return -1;
}
......@@ -319,12 +311,27 @@ int perf_callchain_config(const char *var, const char *value)
return ret;
}
if (!strcmp(var, "print-type"))
return parse_callchain_mode(value);
if (!strcmp(var, "order"))
return parse_callchain_order(value);
if (!strcmp(var, "sort-key"))
return parse_callchain_sort_key(value);
if (!strcmp(var, "print-type")){
int ret;
ret = parse_callchain_mode(value);
if (ret == -1)
pr_err("Invalid callchain mode: %s\n", value);
return ret;
}
if (!strcmp(var, "order")){
int ret;
ret = parse_callchain_order(value);
if (ret == -1)
pr_err("Invalid callchain order: %s\n", value);
return ret;
}
if (!strcmp(var, "sort-key")){
int ret;
ret = parse_callchain_sort_key(value);
if (ret == -1)
pr_err("Invalid callchain sort key: %s\n", value);
return ret;
}
if (!strcmp(var, "threshold")) {
callchain_param.min_percent = strtod(value, &endptr);
if (value == endptr) {
......
......@@ -5,6 +5,7 @@
#include <stdio.h>
#include <string.h>
#include <linux/refcount.h>
#include "rwsem.h"
struct comm_str {
char *str;
......@@ -14,6 +15,7 @@ struct comm_str {
/* Should perhaps be moved to struct machine */
static struct rb_root comm_str_root;
static struct rw_semaphore comm_str_lock = {.lock = PTHREAD_RWLOCK_INITIALIZER,};
static struct comm_str *comm_str__get(struct comm_str *cs)
{
......@@ -25,7 +27,9 @@ static struct comm_str *comm_str__get(struct comm_str *cs)
static void comm_str__put(struct comm_str *cs)
{
if (cs && refcount_dec_and_test(&cs->refcnt)) {
down_write(&comm_str_lock);
rb_erase(&cs->rb_node, &comm_str_root);
up_write(&comm_str_lock);
zfree(&cs->str);
free(cs);
}
......@@ -50,7 +54,8 @@ static struct comm_str *comm_str__alloc(const char *str)
return cs;
}
static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root)
static
struct comm_str *__comm_str__findnew(const char *str, struct rb_root *root)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
......@@ -81,6 +86,17 @@ static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root)
return new;
}
static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root)
{
struct comm_str *cs;
down_write(&comm_str_lock);
cs = __comm_str__findnew(str, root);
up_write(&comm_str_lock);
return cs;
}
struct comm *comm__new(const char *str, u64 timestamp, bool exec)
{
struct comm *comm = zalloc(sizeof(*comm));
......
......@@ -678,23 +678,21 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
return err;
}
int perf_event__synthesize_threads(struct perf_tool *tool,
static int __perf_event__synthesize_threads(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine,
bool mmap_data,
unsigned int proc_map_timeout)
unsigned int proc_map_timeout,
struct dirent **dirent,
int start,
int num)
{
union perf_event *comm_event, *mmap_event, *fork_event;
union perf_event *namespaces_event;
char proc_path[PATH_MAX];
struct dirent **dirent;
int err = -1;
char *end;
pid_t pid;
int n, i;
if (machine__is_default_guest(machine))
return 0;
int i;
comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
if (comm_event == NULL)
......@@ -714,19 +712,14 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
if (namespaces_event == NULL)
goto out_free_fork;
snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
n = scandir(proc_path, &dirent, 0, alphasort);
if (n < 0)
goto out_free_namespaces;
for (i = 0; i < n; i++) {
for (i = start; i < start + num; i++) {
if (!isdigit(dirent[i]->d_name[0]))
continue;
pid = (pid_t)strtol(dirent[i]->d_name, &end, 10);
/* only interested in proper numerical dirents */
if (!*end) {
if (*end)
continue;
/*
* We may race with exiting thread, so don't stop just because
* one thread couldn't be synthesized.
......@@ -736,12 +729,8 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
tool, machine, mmap_data,
proc_map_timeout);
}
free(dirent[i]);
}
free(dirent);
err = 0;
out_free_namespaces:
free(namespaces_event);
out_free_fork:
free(fork_event);
......@@ -753,6 +742,118 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
return err;
}
struct synthesize_threads_arg {
struct perf_tool *tool;
perf_event__handler_t process;
struct machine *machine;
bool mmap_data;
unsigned int proc_map_timeout;
struct dirent **dirent;
int num;
int start;
};
static void *synthesize_threads_worker(void *arg)
{
struct synthesize_threads_arg *args = arg;
__perf_event__synthesize_threads(args->tool, args->process,
args->machine, args->mmap_data,
args->proc_map_timeout, args->dirent,
args->start, args->num);
return NULL;
}
int perf_event__synthesize_threads(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine,
bool mmap_data,
unsigned int proc_map_timeout,
unsigned int nr_threads_synthesize)
{
struct synthesize_threads_arg *args = NULL;
pthread_t *synthesize_threads = NULL;
char proc_path[PATH_MAX];
struct dirent **dirent;
int num_per_thread;
int m, n, i, j;
int thread_nr;
int base = 0;
int err = -1;
if (machine__is_default_guest(machine))
return 0;
snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
n = scandir(proc_path, &dirent, 0, alphasort);
if (n < 0)
return err;
if (nr_threads_synthesize == UINT_MAX)
thread_nr = sysconf(_SC_NPROCESSORS_ONLN);
else
thread_nr = nr_threads_synthesize;
if (thread_nr <= 1) {
err = __perf_event__synthesize_threads(tool, process,
machine, mmap_data,
proc_map_timeout,
dirent, base, n);
goto free_dirent;
}
if (thread_nr > n)
thread_nr = n;
synthesize_threads = calloc(sizeof(pthread_t), thread_nr);
if (synthesize_threads == NULL)
goto free_dirent;
args = calloc(sizeof(*args), thread_nr);
if (args == NULL)
goto free_threads;
num_per_thread = n / thread_nr;
m = n % thread_nr;
for (i = 0; i < thread_nr; i++) {
args[i].tool = tool;
args[i].process = process;
args[i].machine = machine;
args[i].mmap_data = mmap_data;
args[i].proc_map_timeout = proc_map_timeout;
args[i].dirent = dirent;
}
for (i = 0; i < m; i++) {
args[i].num = num_per_thread + 1;
args[i].start = i * args[i].num;
}
if (i != 0)
base = args[i-1].start + args[i-1].num;
for (j = i; j < thread_nr; j++) {
args[j].num = num_per_thread;
args[j].start = base + (j - i) * args[i].num;
}
for (i = 0; i < thread_nr; i++) {
if (pthread_create(&synthesize_threads[i], NULL,
synthesize_threads_worker, &args[i]))
goto out_join;
}
err = 0;
out_join:
for (i = 0; i < thread_nr; i++)
pthread_join(synthesize_threads[i], NULL);
free(args);
free_threads:
free(synthesize_threads);
free_dirent:
for (i = 0; i < n; i++)
free(dirent[i]);
free(dirent);
return err;
}
struct process_symbol_args {
const char *name;
u64 start;
......
......@@ -680,7 +680,8 @@ int perf_event__synthesize_cpu_map(struct perf_tool *tool,
int perf_event__synthesize_threads(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine, bool mmap_data,
unsigned int proc_map_timeout);
unsigned int proc_map_timeout,
unsigned int nr_threads_synthesize);
int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine);
......
......@@ -271,12 +271,17 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
return evsel;
}
static bool perf_event_can_profile_kernel(void)
{
return geteuid() == 0 || perf_event_paranoid() == -1;
}
struct perf_evsel *perf_evsel__new_cycles(bool precise)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
.exclude_kernel = geteuid() != 0,
.exclude_kernel = !perf_event_can_profile_kernel(),
};
struct perf_evsel *evsel;
......
......@@ -2218,12 +2218,16 @@ int machines__for_each_thread(struct machines *machines,
int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
struct target *target, struct thread_map *threads,
perf_event__handler_t process, bool data_mmap,
unsigned int proc_map_timeout)
unsigned int proc_map_timeout,
unsigned int nr_threads_synthesize)
{
if (target__has_task(target))
return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout);
else if (target__has_cpu(target))
return perf_event__synthesize_threads(tool, process, machine, data_mmap, proc_map_timeout);
return perf_event__synthesize_threads(tool, process,
machine, data_mmap,
proc_map_timeout,
nr_threads_synthesize);
/* command specified */
return 0;
}
......
......@@ -257,15 +257,18 @@ int machines__for_each_thread(struct machines *machines,
int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
struct target *target, struct thread_map *threads,
perf_event__handler_t process, bool data_mmap,
unsigned int proc_map_timeout);
unsigned int proc_map_timeout,
unsigned int nr_threads_synthesize);
static inline
int machine__synthesize_threads(struct machine *machine, struct target *target,
struct thread_map *threads, bool data_mmap,
unsigned int proc_map_timeout)
unsigned int proc_map_timeout,
unsigned int nr_threads_synthesize)
{
return __machine__synthesize_threads(machine, NULL, target, threads,
perf_event__process, data_mmap,
proc_map_timeout);
proc_map_timeout,
nr_threads_synthesize);
}
pid_t machine__get_current_tid(struct machine *machine, int cpu);
......
......@@ -810,12 +810,6 @@ static u64 ref_reloc(struct kmap *kmap)
void __weak arch__sym_update(struct symbol *s __maybe_unused,
GElf_Sym *sym __maybe_unused) { }
void __weak arch__adjust_sym_map_offset(GElf_Sym *sym, GElf_Shdr *shdr,
struct map *map __maybe_unused)
{
sym->st_value -= shdr->sh_addr - shdr->sh_offset;
}
int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
struct symsrc *runtime_ss, int kmodule)
{
......@@ -996,7 +990,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
/* Adjust symbol to map to file offset */
if (adjust_kernel_syms)
arch__adjust_sym_map_offset(&sym, &shdr, map);
sym.st_value -= shdr.sh_addr - shdr.sh_offset;
if (strcmp(section_name,
(curr_dso->short_name +
......
......@@ -344,9 +344,6 @@ int setup_intlist(struct intlist **list, const char *list_str,
#ifdef HAVE_LIBELF_SUPPORT
bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
void arch__sym_update(struct symbol *s, GElf_Sym *sym);
void arch__adjust_sym_map_offset(GElf_Sym *sym,
GElf_Shdr *shdr __maybe_unused,
struct map *map __maybe_unused);
#endif
#define SYMBOL_A 0
......
......@@ -15,9 +15,9 @@
#include "syscalltbl.h"
#include <stdlib.h>
#include <linux/compiler.h>
#ifdef HAVE_SYSCALL_TABLE
#include <linux/compiler.h>
#include <string.h>
#include "string2.h"
#include "util.h"
......
......@@ -45,6 +45,8 @@ struct thread *thread__new(pid_t pid, pid_t tid)
thread->cpu = -1;
INIT_LIST_HEAD(&thread->namespaces_list);
INIT_LIST_HEAD(&thread->comm_list);
init_rwsem(&thread->namespaces_lock);
init_rwsem(&thread->comm_lock);
comm_str = malloc(32);
if (!comm_str)
......@@ -83,18 +85,26 @@ void thread__delete(struct thread *thread)
map_groups__put(thread->mg);
thread->mg = NULL;
}
down_write(&thread->namespaces_lock);
list_for_each_entry_safe(namespaces, tmp_namespaces,
&thread->namespaces_list, list) {
list_del(&namespaces->list);
namespaces__free(namespaces);
}
up_write(&thread->namespaces_lock);
down_write(&thread->comm_lock);
list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) {
list_del(&comm->list);
comm__free(comm);
}
up_write(&thread->comm_lock);
unwind__finish_access(thread);
nsinfo__zput(thread->nsinfo);
exit_rwsem(&thread->namespaces_lock);
exit_rwsem(&thread->comm_lock);
free(thread);
}
......@@ -125,7 +135,7 @@ struct namespaces *thread__namespaces(const struct thread *thread)
return list_first_entry(&thread->namespaces_list, struct namespaces, list);
}
int thread__set_namespaces(struct thread *thread, u64 timestamp,
static int __thread__set_namespaces(struct thread *thread, u64 timestamp,
struct namespaces_event *event)
{
struct namespaces *new, *curr = thread__namespaces(thread);
......@@ -149,6 +159,17 @@ int thread__set_namespaces(struct thread *thread, u64 timestamp,
return 0;
}
int thread__set_namespaces(struct thread *thread, u64 timestamp,
struct namespaces_event *event)
{
int ret;
down_write(&thread->namespaces_lock);
ret = __thread__set_namespaces(thread, timestamp, event);
up_write(&thread->namespaces_lock);
return ret;
}
struct comm *thread__comm(const struct thread *thread)
{
if (list_empty(&thread->comm_list))
......@@ -170,8 +191,8 @@ struct comm *thread__exec_comm(const struct thread *thread)
return last;
}
int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
bool exec)
static int ____thread__set_comm(struct thread *thread, const char *str,
u64 timestamp, bool exec)
{
struct comm *new, *curr = thread__comm(thread);
......@@ -195,6 +216,17 @@ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
return 0;
}
int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
bool exec)
{
int ret;
down_write(&thread->comm_lock);
ret = ____thread__set_comm(thread, str, timestamp, exec);
up_write(&thread->comm_lock);
return ret;
}
int thread__set_comm_from_proc(struct thread *thread)
{
char path[64];
......@@ -212,7 +244,7 @@ int thread__set_comm_from_proc(struct thread *thread)
return err;
}
const char *thread__comm_str(const struct thread *thread)
static const char *__thread__comm_str(const struct thread *thread)
{
const struct comm *comm = thread__comm(thread);
......@@ -222,6 +254,17 @@ const char *thread__comm_str(const struct thread *thread)
return comm__str(comm);
}
const char *thread__comm_str(const struct thread *thread)
{
const char *str;
down_read((struct rw_semaphore *)&thread->comm_lock);
str = __thread__comm_str(thread);
up_read((struct rw_semaphore *)&thread->comm_lock);
return str;
}
/* CHECKME: it should probably better return the max comm len from its comm list */
int thread__comm_len(struct thread *thread)
{
......
......@@ -9,6 +9,7 @@
#include "symbol.h"
#include <strlist.h>
#include <intlist.h>
#include "rwsem.h"
struct thread_stack;
struct unwind_libunwind_ops;
......@@ -29,7 +30,9 @@ struct thread {
int comm_len;
bool dead; /* if set thread has exited */
struct list_head namespaces_list;
struct rw_semaphore namespaces_lock;
struct list_head comm_list;
struct rw_semaphore comm_lock;
u64 db_id;
void *priv;
......
......@@ -37,6 +37,7 @@ struct perf_top {
int sym_pcnt_filter;
const char *sym_filter;
float min_percent;
unsigned int nr_threads_synthesize;
};
#define CONSOLE_CLEAR ""
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment