Commit 19504828 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perf-urgent-for-linus' of...

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  perf tools: Fix sample size bit operations
  perf tools: Fix ommitted mmap data update on remap
  watchdog: Change the default timeout and configure nmi watchdog period based on watchdog_thresh
  watchdog: Disable watchdog when thresh is zero
  watchdog: Only disable/enable watchdog if neccessary
  watchdog: Fix rounding bug in get_sample_period()
  perf tools: Propagate event parse error handling
  perf tools: Robustify dynamic sample content fetch
  perf tools: Pre-check sample size before parsing
  perf tools: Move evlist sample helpers to evlist area
  perf tools: Remove junk code in mmap size handling
  perf tools: Check we are able to read the event size on mmap
parents 57d19e80 3cb6d154
......@@ -19,9 +19,9 @@
#include <linux/delay.h>
#ifdef CONFIG_HARDLOCKUP_DETECTOR
u64 hw_nmi_get_sample_period(void)
u64 hw_nmi_get_sample_period(int watchdog_thresh)
{
return (u64)(cpu_khz) * 1000 * 60;
return (u64)(cpu_khz) * 1000 * watchdog_thresh;
}
#endif
......
......@@ -45,11 +45,12 @@ static inline bool trigger_all_cpu_backtrace(void)
#ifdef CONFIG_LOCKUP_DETECTOR
int hw_nmi_is_cpu_stuck(struct pt_regs *);
u64 hw_nmi_get_sample_period(void);
u64 hw_nmi_get_sample_period(int watchdog_thresh);
extern int watchdog_enabled;
extern int watchdog_thresh;
struct ctl_table;
extern int proc_dowatchdog_enabled(struct ctl_table *, int ,
void __user *, size_t *, loff_t *);
extern int proc_dowatchdog(struct ctl_table *, int ,
void __user *, size_t *, loff_t *);
#endif
#endif
......@@ -315,7 +315,6 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos);
extern unsigned int softlockup_panic;
extern int softlockup_thresh;
void lockup_detector_init(void);
#else
static inline void touch_softlockup_watchdog(void)
......
......@@ -730,14 +730,16 @@ static struct ctl_table kern_table[] = {
.data = &watchdog_enabled,
.maxlen = sizeof (int),
.mode = 0644,
.proc_handler = proc_dowatchdog_enabled,
.proc_handler = proc_dowatchdog,
.extra1 = &zero,
.extra2 = &one,
},
{
.procname = "watchdog_thresh",
.data = &softlockup_thresh,
.data = &watchdog_thresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dowatchdog_thresh,
.proc_handler = proc_dowatchdog,
.extra1 = &neg_one,
.extra2 = &sixty,
},
......@@ -755,7 +757,9 @@ static struct ctl_table kern_table[] = {
.data = &watchdog_enabled,
.maxlen = sizeof (int),
.mode = 0644,
.proc_handler = proc_dowatchdog_enabled,
.proc_handler = proc_dowatchdog,
.extra1 = &zero,
.extra2 = &one,
},
#endif
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
......
......@@ -28,7 +28,7 @@
#include <linux/perf_event.h>
int watchdog_enabled = 1;
int __read_mostly softlockup_thresh = 60;
int __read_mostly watchdog_thresh = 10;
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
......@@ -91,6 +91,17 @@ static int __init nosoftlockup_setup(char *str)
__setup("nosoftlockup", nosoftlockup_setup);
/* */
/*
* Hard-lockup warnings should be triggered after just a few seconds. Soft-
* lockups can have false positives under extreme conditions. So we generally
* want a higher threshold for soft lockups than for hard lockups. So we couple
* the thresholds with a factor: we make the soft threshold twice the amount of
* time the hard threshold is.
*/
static int get_softlockup_thresh()
{
return watchdog_thresh * 2;
}
/*
* Returns seconds, approximately. We don't need nanosecond
......@@ -105,12 +116,12 @@ static unsigned long get_timestamp(int this_cpu)
static unsigned long get_sample_period(void)
{
/*
* convert softlockup_thresh from seconds to ns
* convert watchdog_thresh from seconds to ns
* the divide by 5 is to give hrtimer 5 chances to
* increment before the hardlockup detector generates
* a warning
*/
return softlockup_thresh / 5 * NSEC_PER_SEC;
return get_softlockup_thresh() * (NSEC_PER_SEC / 5);
}
/* Commands for resetting the watchdog */
......@@ -182,7 +193,7 @@ static int is_softlockup(unsigned long touch_ts)
unsigned long now = get_timestamp(smp_processor_id());
/* Warn about unreasonable delays: */
if (time_after(now, touch_ts + softlockup_thresh))
if (time_after(now, touch_ts + get_softlockup_thresh()))
return now - touch_ts;
return 0;
......@@ -359,7 +370,7 @@ static int watchdog_nmi_enable(int cpu)
/* Try to register using hardware perf events */
wd_attr = &wd_hw_attr;
wd_attr->sample_period = hw_nmi_get_sample_period();
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback);
if (!IS_ERR(event)) {
printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
......@@ -501,28 +512,25 @@ static void watchdog_disable_all_cpus(void)
/* sysctl functions */
#ifdef CONFIG_SYSCTL
/*
* proc handler for /proc/sys/kernel/nmi_watchdog
* proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh
*/
int proc_dowatchdog_enabled(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
int proc_dowatchdog(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
proc_dointvec(table, write, buffer, length, ppos);
int ret;
if (write) {
if (watchdog_enabled)
watchdog_enable_all_cpus();
else
watchdog_disable_all_cpus();
}
return 0;
}
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write)
goto out;
int proc_dowatchdog_thresh(struct ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (watchdog_enabled && watchdog_thresh)
watchdog_enable_all_cpus();
else
watchdog_disable_all_cpus();
out:
return ret;
}
#endif /* CONFIG_SYSCTL */
......
......@@ -474,6 +474,7 @@ static int test__basic_mmap(void)
unsigned int nr_events[nsyscalls],
expected_nr_events[nsyscalls], i, j;
struct perf_evsel *evsels[nsyscalls], *evsel;
int sample_size = perf_sample_size(attr.sample_type);
for (i = 0; i < nsyscalls; ++i) {
char name[64];
......@@ -558,7 +559,13 @@ static int test__basic_mmap(void)
goto out_munmap;
}
perf_event__parse_sample(event, attr.sample_type, false, &sample);
err = perf_event__parse_sample(event, attr.sample_type, sample_size,
false, &sample);
if (err) {
pr_err("Can't parse sample, err = %d\n", err);
goto out_munmap;
}
evsel = perf_evlist__id2evsel(evlist, sample.id);
if (evsel == NULL) {
pr_debug("event with id %" PRIu64
......
......@@ -805,9 +805,14 @@ static void perf_session__mmap_read_idx(struct perf_session *self, int idx)
{
struct perf_sample sample;
union perf_event *event;
int ret;
while ((event = perf_evlist__mmap_read(top.evlist, idx)) != NULL) {
perf_session__parse_sample(self, event, &sample);
ret = perf_session__parse_sample(self, event, &sample);
if (ret) {
pr_err("Can't parse sample, err = %d\n", ret);
continue;
}
if (event->header.type == PERF_RECORD_SAMPLE)
perf_event__process_sample(event, &sample, self);
......
......@@ -9,21 +9,21 @@
#include "thread_map.h"
static const char *perf_event__names[] = {
[0] = "TOTAL",
[PERF_RECORD_MMAP] = "MMAP",
[PERF_RECORD_LOST] = "LOST",
[PERF_RECORD_COMM] = "COMM",
[PERF_RECORD_EXIT] = "EXIT",
[PERF_RECORD_THROTTLE] = "THROTTLE",
[PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
[PERF_RECORD_FORK] = "FORK",
[PERF_RECORD_READ] = "READ",
[PERF_RECORD_SAMPLE] = "SAMPLE",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
[PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID",
[PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND",
[0] = "TOTAL",
[PERF_RECORD_MMAP] = "MMAP",
[PERF_RECORD_LOST] = "LOST",
[PERF_RECORD_COMM] = "COMM",
[PERF_RECORD_EXIT] = "EXIT",
[PERF_RECORD_THROTTLE] = "THROTTLE",
[PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
[PERF_RECORD_FORK] = "FORK",
[PERF_RECORD_READ] = "READ",
[PERF_RECORD_SAMPLE] = "SAMPLE",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
[PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID",
[PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND",
};
const char *perf_event__name(unsigned int id)
......@@ -35,6 +35,22 @@ const char *perf_event__name(unsigned int id)
return perf_event__names[id];
}
int perf_sample_size(u64 sample_type)
{
u64 mask = sample_type & PERF_SAMPLE_MASK;
int size = 0;
int i;
for (i = 0; i < 64; i++) {
if (mask & (1UL << i))
size++;
}
size *= sizeof(u64);
return size;
}
static struct perf_sample synth_sample = {
.pid = -1,
.tid = -1,
......
......@@ -56,6 +56,13 @@ struct read_event {
u64 id;
};
#define PERF_SAMPLE_MASK \
(PERF_SAMPLE_IP | PERF_SAMPLE_TID | \
PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | \
PERF_SAMPLE_ID | PERF_SAMPLE_STREAM_ID | \
PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD)
struct sample_event {
struct perf_event_header header;
u64 array[];
......@@ -75,6 +82,8 @@ struct perf_sample {
struct ip_callchain *callchain;
};
int perf_sample_size(u64 sample_type);
#define BUILD_ID_SIZE 20
struct build_id_event {
......@@ -178,6 +187,7 @@ int perf_event__preprocess_sample(const union perf_event *self,
const char *perf_event__name(unsigned int id);
int perf_event__parse_sample(const union perf_event *event, u64 type,
bool sample_id_all, struct perf_sample *sample);
int sample_size, bool sample_id_all,
struct perf_sample *sample);
#endif /* __PERF_RECORD_H */
......@@ -459,3 +459,34 @@ int perf_evlist__set_filters(struct perf_evlist *evlist)
return 0;
}
u64 perf_evlist__sample_type(struct perf_evlist *evlist)
{
struct perf_evsel *pos;
u64 type = 0;
list_for_each_entry(pos, &evlist->entries, node) {
if (!type)
type = pos->attr.sample_type;
else if (type != pos->attr.sample_type)
die("non matching sample_type");
}
return type;
}
bool perf_evlist__sample_id_all(const struct perf_evlist *evlist)
{
bool value = false, first = true;
struct perf_evsel *pos;
list_for_each_entry(pos, &evlist->entries, node) {
if (first) {
value = pos->attr.sample_id_all;
first = false;
} else if (value != pos->attr.sample_id_all)
die("non matching sample_id_all");
}
return value;
}
......@@ -66,4 +66,7 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
void perf_evlist__delete_maps(struct perf_evlist *evlist);
int perf_evlist__set_filters(struct perf_evlist *evlist);
u64 perf_evlist__sample_type(struct perf_evlist *evlist);
bool perf_evlist__sample_id_all(const struct perf_evlist *evlist);
#endif /* __PERF_EVLIST_H */
......@@ -303,8 +303,20 @@ static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
return 0;
}
static bool sample_overlap(const union perf_event *event,
const void *offset, u64 size)
{
const void *base = event;
if (offset + size > base + event->header.size)
return true;
return false;
}
int perf_event__parse_sample(const union perf_event *event, u64 type,
bool sample_id_all, struct perf_sample *data)
int sample_size, bool sample_id_all,
struct perf_sample *data)
{
const u64 *array;
......@@ -319,6 +331,9 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
array = event->sample.array;
if (sample_size + sizeof(event->header) > event->header.size)
return -EFAULT;
if (type & PERF_SAMPLE_IP) {
data->ip = event->ip.ip;
array++;
......@@ -369,14 +384,29 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
}
if (type & PERF_SAMPLE_CALLCHAIN) {
if (sample_overlap(event, array, sizeof(data->callchain->nr)))
return -EFAULT;
data->callchain = (struct ip_callchain *)array;
if (sample_overlap(event, array, data->callchain->nr))
return -EFAULT;
array += 1 + data->callchain->nr;
}
if (type & PERF_SAMPLE_RAW) {
u32 *p = (u32 *)array;
if (sample_overlap(event, array, sizeof(u32)))
return -EFAULT;
data->raw_size = *p;
p++;
if (sample_overlap(event, p, data->raw_size))
return -EFAULT;
data->raw_data = p;
}
......
......@@ -934,37 +934,6 @@ int perf_session__read_header(struct perf_session *session, int fd)
return -ENOMEM;
}
u64 perf_evlist__sample_type(struct perf_evlist *evlist)
{
struct perf_evsel *pos;
u64 type = 0;
list_for_each_entry(pos, &evlist->entries, node) {
if (!type)
type = pos->attr.sample_type;
else if (type != pos->attr.sample_type)
die("non matching sample_type");
}
return type;
}
bool perf_evlist__sample_id_all(const struct perf_evlist *evlist)
{
bool value = false, first = true;
struct perf_evsel *pos;
list_for_each_entry(pos, &evlist->entries, node) {
if (first) {
value = pos->attr.sample_id_all;
first = false;
} else if (value != pos->attr.sample_id_all)
die("non matching sample_id_all");
}
return value;
}
int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
perf_event__handler_t process,
struct perf_session *session)
......
......@@ -64,8 +64,6 @@ int perf_header__write_pipe(int fd);
int perf_header__push_event(u64 id, const char *name);
char *perf_header__find_event(u64 id);
u64 perf_evlist__sample_type(struct perf_evlist *evlist);
bool perf_evlist__sample_id_all(const struct perf_evlist *evlist);
void perf_header__set_feat(struct perf_header *header, int feat);
void perf_header__clear_feat(struct perf_header *header, int feat);
bool perf_header__has_feat(const struct perf_header *header, int feat);
......
#include <linux/kernel.h>
#include <linux/prefetch.h>
#include "../../../../include/linux/list.h"
#ifndef PERF_LIST_H
......
......@@ -675,6 +675,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
union perf_event *event;
int sample_id_all = 1, cpu;
static char *kwlist[] = {"sample_id_all", NULL, NULL};
int err;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
&cpu, &sample_id_all))
......@@ -690,11 +691,17 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
return PyErr_NoMemory();
first = list_entry(evlist->entries.next, struct perf_evsel, node);
perf_event__parse_sample(event, first->attr.sample_type, sample_id_all,
&pevent->sample);
err = perf_event__parse_sample(event, first->attr.sample_type,
perf_sample_size(first->attr.sample_type),
sample_id_all, &pevent->sample);
if (err) {
pr_err("Can't parse sample, err = %d\n", err);
goto end;
}
return pyevent;
}
end:
Py_INCREF(Py_None);
return Py_None;
}
......
......@@ -97,6 +97,7 @@ static void perf_session__id_header_size(struct perf_session *session)
void perf_session__update_sample_type(struct perf_session *self)
{
self->sample_type = perf_evlist__sample_type(self->evlist);
self->sample_size = perf_sample_size(self->sample_type);
self->sample_id_all = perf_evlist__sample_id_all(self->evlist);
perf_session__id_header_size(self);
}
......@@ -479,6 +480,7 @@ static void flush_sample_queue(struct perf_session *s,
struct perf_sample sample;
u64 limit = os->next_flush;
u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
int ret;
if (!ops->ordered_samples || !limit)
return;
......@@ -487,9 +489,12 @@ static void flush_sample_queue(struct perf_session *s,
if (iter->timestamp > limit)
break;
perf_session__parse_sample(s, iter->event, &sample);
perf_session_deliver_event(s, iter->event, &sample, ops,
iter->file_offset);
ret = perf_session__parse_sample(s, iter->event, &sample);
if (ret)
pr_err("Can't parse sample, err = %d\n", ret);
else
perf_session_deliver_event(s, iter->event, &sample, ops,
iter->file_offset);
os->last_flush = iter->timestamp;
list_del(&iter->list);
......@@ -805,7 +810,9 @@ static int perf_session__process_event(struct perf_session *session,
/*
* For all kernel events we get the sample data
*/
perf_session__parse_sample(session, event, &sample);
ret = perf_session__parse_sample(session, event, &sample);
if (ret)
return ret;
/* Preprocess sample records - precheck callchains */
if (perf_session__preprocess_sample(session, event, &sample))
......@@ -953,6 +960,30 @@ static int __perf_session__process_pipe_events(struct perf_session *self,
return err;
}
static union perf_event *
fetch_mmaped_event(struct perf_session *session,
u64 head, size_t mmap_size, char *buf)
{
union perf_event *event;
/*
* Ensure we have enough space remaining to read
* the size of the event in the headers.
*/
if (head + sizeof(event->header) > mmap_size)
return NULL;
event = (union perf_event *)(buf + head);
if (session->header.needs_swap)
perf_event_header__bswap(&event->header);
if (head + event->header.size > mmap_size)
return NULL;
return event;
}
int __perf_session__process_events(struct perf_session *session,
u64 data_offset, u64 data_size,
u64 file_size, struct perf_event_ops *ops)
......@@ -1007,15 +1038,8 @@ int __perf_session__process_events(struct perf_session *session,
file_pos = file_offset + head;
more:
event = (union perf_event *)(buf + head);
if (session->header.needs_swap)
perf_event_header__bswap(&event->header);
size = event->header.size;
if (size == 0)
size = 8;
if (head + event->header.size > mmap_size) {
event = fetch_mmaped_event(session, head, mmap_size, buf);
if (!event) {
if (mmaps[map_idx]) {
munmap(mmaps[map_idx], mmap_size);
mmaps[map_idx] = NULL;
......
......@@ -43,6 +43,7 @@ struct perf_session {
*/
struct hists hists;
u64 sample_type;
int sample_size;
int fd;
bool fd_pipe;
bool repipe;
......@@ -159,6 +160,7 @@ static inline int perf_session__parse_sample(struct perf_session *session,
struct perf_sample *sample)
{
return perf_event__parse_sample(event, session->sample_type,
session->sample_size,
session->sample_id_all, sample);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment