Commit 818e3dd3 authored by Steven Rostedt's avatar Steven Rostedt Committed by Ingo Molnar

tracing, ring-buffer: add paranoid checks for loops

While writing a new tracer, I had a bug where I caused the ring-buffer
to recurse in a bad way. The bug was with the tracer I was writing
and not the ring-buffer itself. But it took a long time to find the
problem.

This patch adds paranoid checks into the ring-buffer infrastructure
that will catch bugs of this nature.

Note: I put the bug back in the tracer and this patch showed the error
      nicely and prevented the lockup.
Signed-off-by: default avatarSteven Rostedt <srostedt@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent b3aa5577
...@@ -1022,8 +1022,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, ...@@ -1022,8 +1022,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event; struct ring_buffer_event *event;
u64 ts, delta; u64 ts, delta;
int commit = 0; int commit = 0;
int nr_loops = 0;
again: again:
/*
* We allow for interrupts to reenter here and do a trace.
* If one does, it will cause this original code to loop
* back here. Even with heavy interrupts happening, this
* should only happen a few times in a row. If this happens
* 1000 times in a row, there must be either an interrupt
* storm or we have something buggy.
* Bail!
*/
if (unlikely(++nr_loops > 1000)) {
RB_WARN_ON(cpu_buffer, 1);
return NULL;
}
ts = ring_buffer_time_stamp(cpu_buffer->cpu); ts = ring_buffer_time_stamp(cpu_buffer->cpu);
/* /*
...@@ -1532,10 +1547,23 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) ...@@ -1532,10 +1547,23 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
{ {
struct buffer_page *reader = NULL; struct buffer_page *reader = NULL;
unsigned long flags; unsigned long flags;
int nr_loops = 0;
spin_lock_irqsave(&cpu_buffer->lock, flags); spin_lock_irqsave(&cpu_buffer->lock, flags);
again: again:
/*
* This should normally only loop twice. But because the
* start of the reader inserts an empty page, it causes
* a case where we will loop three times. There should be no
* reason to loop four times (that I know of).
*/
if (unlikely(++nr_loops > 3)) {
RB_WARN_ON(cpu_buffer, 1);
reader = NULL;
goto out;
}
reader = cpu_buffer->reader_page; reader = cpu_buffer->reader_page;
/* If there's more to read, return this page */ /* If there's more to read, return this page */
...@@ -1665,6 +1693,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) ...@@ -1665,6 +1693,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event; struct ring_buffer_event *event;
struct buffer_page *reader; struct buffer_page *reader;
int nr_loops = 0;
if (!cpu_isset(cpu, buffer->cpumask)) if (!cpu_isset(cpu, buffer->cpumask))
return NULL; return NULL;
...@@ -1672,6 +1701,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) ...@@ -1672,6 +1701,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
cpu_buffer = buffer->buffers[cpu]; cpu_buffer = buffer->buffers[cpu];
again: again:
/*
* We repeat when a timestamp is encountered. It is possible
* to get multiple timestamps from an interrupt entering just
* as one timestamp is about to be written. The max times
* that this can happen is the number of nested interrupts we
* can have. Nesting 10 deep of interrupts is clearly
* an anomaly.
*/
if (unlikely(++nr_loops > 10)) {
RB_WARN_ON(cpu_buffer, 1);
return NULL;
}
reader = rb_get_reader_page(cpu_buffer); reader = rb_get_reader_page(cpu_buffer);
if (!reader) if (!reader)
return NULL; return NULL;
...@@ -1722,6 +1764,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) ...@@ -1722,6 +1764,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
struct ring_buffer *buffer; struct ring_buffer *buffer;
struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event; struct ring_buffer_event *event;
int nr_loops = 0;
if (ring_buffer_iter_empty(iter)) if (ring_buffer_iter_empty(iter))
return NULL; return NULL;
...@@ -1730,6 +1773,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) ...@@ -1730,6 +1773,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
buffer = cpu_buffer->buffer; buffer = cpu_buffer->buffer;
again: again:
/*
* We repeat when a timestamp is encountered. It is possible
* to get multiple timestamps from an interrupt entering just
* as one timestamp is about to be written. The max times
* that this can happen is the number of nested interrupts we
* can have. Nesting 10 deep of interrupts is clearly
* an anomaly.
*/
if (unlikely(++nr_loops > 10)) {
RB_WARN_ON(cpu_buffer, 1);
return NULL;
}
if (rb_per_cpu_empty(cpu_buffer)) if (rb_per_cpu_empty(cpu_buffer))
return NULL; return NULL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment