Commit d8fce2db authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
 "Mostly tooling fixes, but also an uncore PMU driver fix and an uncore
  PMU driver hardware-enablement addition"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf probe: Fix segfault if passed with ''.
  perf report: Fix -T/--threads option to work again
  perf bench numa: Fix immediate meeting of convergence condition
  perf bench numa: Fixes of --quiet argument
  perf bench futex: Fix hung wakeup tasks after requeueing
  perf probe: Fix bug with global variables handling
  perf top: Fix a segfault when kernel map is restricted.
  tools lib traceevent: Fix build failure on 32-bit arch
  perf kmem: Fix compiles on RHEL6/OL6
  tools lib api: Undefine _FORTIFY_SOURCE before setting it
  perf kmem: Consistently use PRIu64 for printing u64 values
  perf trace: Disable events and drain events when forked workload ends
  perf trace: Enable events when doing system wide tracing and starting a workload
  perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver
  perf/x86/intel/uncore: Add support for Intel Haswell ULT (lower power Mobile Processor) IMC uncore PMUs
  perf/x86/intel: Add cpu_(prepare|starting|dying) for core_pmu
parents 02f0f572 74f40c1f
......@@ -2533,34 +2533,6 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)
return x86_event_sysfs_show(page, config, event);
}
static __initconst const struct x86_pmu core_pmu = {
.name = "core",
.handle_irq = x86_pmu_handle_irq,
.disable_all = x86_pmu_disable_all,
.enable_all = core_pmu_enable_all,
.enable = core_pmu_enable_event,
.disable = x86_pmu_disable_event,
.hw_config = x86_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
.event_map = intel_pmu_event_map,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
/*
* Intel PMCs cannot be accessed sanely above 32 bit width,
* so we install an artificial 1<<31 period regardless of
* the generic event period:
*/
.max_period = (1ULL << 31) - 1,
.get_event_constraints = intel_get_event_constraints,
.put_event_constraints = intel_put_event_constraints,
.event_constraints = intel_core_event_constraints,
.guest_get_msrs = core_guest_get_msrs,
.format_attrs = intel_arch_formats_attr,
.events_sysfs_show = intel_event_sysfs_show,
};
struct intel_shared_regs *allocate_shared_regs(int cpu)
{
struct intel_shared_regs *regs;
......@@ -2743,6 +2715,44 @@ static struct attribute *intel_arch3_formats_attr[] = {
NULL,
};
static __initconst const struct x86_pmu core_pmu = {
.name = "core",
.handle_irq = x86_pmu_handle_irq,
.disable_all = x86_pmu_disable_all,
.enable_all = core_pmu_enable_all,
.enable = core_pmu_enable_event,
.disable = x86_pmu_disable_event,
.hw_config = x86_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
.event_map = intel_pmu_event_map,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
/*
* Intel PMCs cannot be accessed sanely above 32-bit width,
* so we install an artificial 1<<31 period regardless of
* the generic event period:
*/
.max_period = (1ULL<<31) - 1,
.get_event_constraints = intel_get_event_constraints,
.put_event_constraints = intel_put_event_constraints,
.event_constraints = intel_core_event_constraints,
.guest_get_msrs = core_guest_get_msrs,
.format_attrs = intel_arch_formats_attr,
.events_sysfs_show = intel_event_sysfs_show,
/*
* Virtual (or funny metal) CPU can define x86_pmu.extra_regs
* together with PMU version 1 and thus be using core_pmu with
* shared_regs. We need following callbacks here to allocate
* it properly.
*/
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
};
static __initconst const struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
......
/* Nehalem/SandBridge/Haswell uncore support */
#include "perf_event_intel_uncore.h"
/* Uncore IMC PCI IDs */
#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154
#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00
......@@ -472,6 +479,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* end: all zeroes */ },
};
......@@ -502,6 +513,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver), /* 3rd Gen Core processor */
IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */
IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */
{ /* end marker */ }
};
......
......@@ -2541,10 +2541,6 @@
#define PCI_VENDOR_ID_INTEL 0x8086
#define PCI_DEVICE_ID_INTEL_EESSC 0x0008
#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154
#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
#define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320
#define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321
#define PCI_DEVICE_ID_INTEL_PXH_0 0x0329
......
......@@ -16,7 +16,7 @@ MAKEFLAGS += --no-print-directory
LIBFILE = $(OUTPUT)libapi.a
CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 -fPIC
CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
RM = rm -f
......
......@@ -3865,7 +3865,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
} else if (el_size == 4) {
trace_seq_printf(s, "%u", *(uint32_t *)num);
} else if (el_size == 8) {
trace_seq_printf(s, "%lu", *(uint64_t *)num);
trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num);
} else {
trace_seq_printf(s, "BAD SIZE:%d 0x%x",
el_size, *(uint8_t *)num);
......
......@@ -132,6 +132,9 @@ int bench_futex_requeue(int argc, const char **argv,
if (!fshared)
futex_flag = FUTEX_PRIVATE_FLAG;
if (nrequeue > nthreads)
nrequeue = nthreads;
printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
"%d at a time.\n\n", getpid(), nthreads,
fshared ? "shared":"private", &futex1, &futex2, nrequeue);
......@@ -161,20 +164,18 @@ int bench_futex_requeue(int argc, const char **argv,
/* Ok, all threads are patiently blocked, start requeueing */
gettimeofday(&start, NULL);
for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue) {
while (nrequeued < nthreads) {
/*
* Do not wakeup any tasks blocked on futex1, allowing
* us to really measure futex_wait functionality.
*/
futex_cmp_requeue(&futex1, 0, &futex2, 0,
nrequeue, futex_flag);
nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0,
nrequeue, futex_flag);
}
gettimeofday(&end, NULL);
timersub(&end, &start, &runtime);
if (nrequeued > nthreads)
nrequeued = nthreads;
update_stats(&requeued_stats, nrequeued);
update_stats(&requeuetime_stats, runtime.tv_usec);
......@@ -184,7 +185,7 @@ int bench_futex_requeue(int argc, const char **argv,
}
/* everybody should be blocked on futex2, wake'em up */
nrequeued = futex_wake(&futex2, nthreads, futex_flag);
nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
if (nthreads != nrequeued)
warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);
......
......@@ -180,7 +180,7 @@ static const struct option options[] = {
OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"),
OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"),
OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "bzero the initial allocations"),
OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"),
OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
/* Special option string parsing callbacks: */
......@@ -828,6 +828,9 @@ static int count_process_nodes(int process_nr)
td = g->threads + task_nr;
node = numa_node_of_cpu(td->curr_cpu);
if (node < 0) /* curr_cpu was likely still -1 */
return 0;
node_present[node] = 1;
}
......@@ -882,6 +885,11 @@ static void calc_convergence_compression(int *strong)
for (p = 0; p < g->p.nr_proc; p++) {
unsigned int nodes = count_process_nodes(p);
if (!nodes) {
*strong = 0;
return;
}
nodes_min = min(nodes, nodes_min);
nodes_max = max(nodes, nodes_max);
}
......@@ -1395,7 +1403,7 @@ static void print_res(const char *name, double val,
if (!name)
name = "main,";
if (g->p.show_quiet)
if (!g->p.show_quiet)
printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
else
printf(" %14.3f %s\n", val, txt_long);
......
......@@ -319,7 +319,7 @@ static int page_stat_cmp(struct page_stat *a, struct page_stat *b)
return 0;
}
static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool create)
static struct page_stat *search_page_alloc_stat(struct page_stat *pstat, bool create)
{
struct rb_node **node = &page_alloc_tree.rb_node;
struct rb_node *parent = NULL;
......@@ -331,7 +331,7 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool cre
parent = *node;
data = rb_entry(*node, struct page_stat, node);
cmp = page_stat_cmp(data, stat);
cmp = page_stat_cmp(data, pstat);
if (cmp < 0)
node = &parent->rb_left;
else if (cmp > 0)
......@@ -345,10 +345,10 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool cre
data = zalloc(sizeof(*data));
if (data != NULL) {
data->page = stat->page;
data->order = stat->order;
data->gfp_flags = stat->gfp_flags;
data->migrate_type = stat->migrate_type;
data->page = pstat->page;
data->order = pstat->order;
data->gfp_flags = pstat->gfp_flags;
data->migrate_type = pstat->migrate_type;
rb_link_node(&data->node, parent, node);
rb_insert_color(&data->node, &page_alloc_tree);
......@@ -375,7 +375,7 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
unsigned int migrate_type = perf_evsel__intval(evsel, sample,
"migratetype");
u64 bytes = kmem_page_size << order;
struct page_stat *stat;
struct page_stat *pstat;
struct page_stat this = {
.order = order,
.gfp_flags = gfp_flags,
......@@ -401,21 +401,21 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
* This is to find the current page (with correct gfp flags and
* migrate type) at free event.
*/
stat = search_page(page, true);
if (stat == NULL)
pstat = search_page(page, true);
if (pstat == NULL)
return -ENOMEM;
stat->order = order;
stat->gfp_flags = gfp_flags;
stat->migrate_type = migrate_type;
pstat->order = order;
pstat->gfp_flags = gfp_flags;
pstat->migrate_type = migrate_type;
this.page = page;
stat = search_page_alloc_stat(&this, true);
if (stat == NULL)
pstat = search_page_alloc_stat(&this, true);
if (pstat == NULL)
return -ENOMEM;
stat->nr_alloc++;
stat->alloc_bytes += bytes;
pstat->nr_alloc++;
pstat->alloc_bytes += bytes;
order_stats[order][migrate_type]++;
......@@ -428,7 +428,7 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
u64 page;
unsigned int order = perf_evsel__intval(evsel, sample, "order");
u64 bytes = kmem_page_size << order;
struct page_stat *stat;
struct page_stat *pstat;
struct page_stat this = {
.order = order,
};
......@@ -441,8 +441,8 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
nr_page_frees++;
total_page_free_bytes += bytes;
stat = search_page(page, false);
if (stat == NULL) {
pstat = search_page(page, false);
if (pstat == NULL) {
pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
page, order);
......@@ -453,18 +453,18 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
}
this.page = page;
this.gfp_flags = stat->gfp_flags;
this.migrate_type = stat->migrate_type;
this.gfp_flags = pstat->gfp_flags;
this.migrate_type = pstat->migrate_type;
rb_erase(&stat->node, &page_tree);
free(stat);
rb_erase(&pstat->node, &page_tree);
free(pstat);
stat = search_page_alloc_stat(&this, false);
if (stat == NULL)
pstat = search_page_alloc_stat(&this, false);
if (pstat == NULL)
return -ENOENT;
stat->nr_free++;
stat->free_bytes += bytes;
pstat->nr_free++;
pstat->free_bytes += bytes;
return 0;
}
......@@ -640,9 +640,9 @@ static void print_page_summary(void)
nr_page_frees, total_page_free_bytes / 1024);
printf("\n");
printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
nr_page_allocs - nr_alloc_freed,
(total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free-only requests",
......
......@@ -329,7 +329,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
fprintf(stdout, "\n\n");
}
if (sort_order == default_sort_order &&
if (sort_order == NULL &&
parent_pattern == default_parent_pattern) {
fprintf(stdout, "#\n# (%s)\n#\n", help);
......
......@@ -733,7 +733,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
"Check /proc/sys/kernel/kptr_restrict.\n\n"
"Kernel%s samples will not be resolved.\n",
!RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
" modules" : "");
if (use_browser <= 0)
sleep(5);
......
......@@ -2241,10 +2241,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
if (err < 0)
goto out_error_mmap;
if (!target__none(&trace->opts.target))
perf_evlist__enable(evlist);
if (forks)
perf_evlist__start_workload(evlist);
else
perf_evlist__enable(evlist);
trace->multiple_threads = evlist->threads->map[0] == -1 ||
evlist->threads->nr > 1 ||
......@@ -2272,6 +2273,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
if (interrupted)
goto out_disable;
if (done && !draining) {
perf_evlist__disable(evlist);
draining = true;
}
}
}
......
......@@ -1084,6 +1084,8 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
*
* TODO:Group name support
*/
if (!arg)
return -EINVAL;
ptr = strpbrk(arg, ";=@+%");
if (ptr && *ptr == '=') { /* Event name */
......
......@@ -578,10 +578,12 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
/* Search child die for local variables and parameters. */
if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) {
/* Search again in global variables */
if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, 0, &vr_die))
if (!die_find_variable_at(&pf->cu_die, pf->pvar->var,
0, &vr_die)) {
pr_warning("Failed to find '%s' in this function.\n",
pf->pvar->var);
ret = -ENOENT;
}
}
if (ret >= 0)
ret = convert_variable(&vr_die, pf);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment