Commit 63bd30f2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'trace-ring-buffer-v6.8-rc7-2' of...

Merge tag 'trace-ring-buffer-v6.8-rc7-2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull tracing updates from Steven Rostedt:

 - Do not update shortest_full in rb_watermark_hit() if the watermark is
   hit. The shortest_full field was being updated regardless if the task
   was going to wait or not. If the watermark is hit, then the task is
   not going to wait, so do not update the shortest_full field (used by
   the waker).

 - Update shortest_full field before setting the full_waiters_pending
   flag

   In the poll logic, the full_waiters_pending flag was being set before
   the shortest_full field was set. If the full_waiters_pending flag is
   set, writers will check the shortest_full field which has the least
   percentage of data that the ring buffer needs to be filled before
   waking up. The writer will check shortest_full if
   full_waiters_pending is set, and if the ring buffer percentage filled
   is greater than shortest full, then it will call the irq_work to wake
   up the waiters.

   The problem was that the poll logic set the full_waiters_pending flag
   before updating shortest_full, which when zero will always trigger
   the writer to call the irq_work to wake up the waiters. The irq_work
   will reset the shortest_full field back to zero as the woken waiters
   is suppose to reset it.

 - There's some optimized logic in the rb_watermark_hit() that is used
   in ring_buffer_wait(). Use that helper function in the poll logic as
   well.

 - Restructure ring_buffer_wait() to use wait_event_interruptible()

   The logic to wake up pending readers when the file descriptor is
   closed is racy. Restructure ring_buffer_wait() to allow callers to
   pass in conditions besides the ring buffer having enough data in it
   by using wait_event_interruptible().

 - Update the tracing_wait_on_pipe() to call ring_buffer_wait() with its
   own conditions to exit the wait loop.

* tag 'trace-ring-buffer-v6.8-rc7-2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  tracing/ring-buffer: Fix wait_on_pipe() race
  ring-buffer: Use wait_event_interruptible() in ring_buffer_wait()
  ring-buffer: Reuse rb_watermark_hit() for the poll logic
  ring-buffer: Fix full_waiters_pending in poll
  ring-buffer: Do not set shortest_full when full target is hit
parents 01732755 2aa043a5
...@@ -98,7 +98,9 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k ...@@ -98,7 +98,9 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
__ring_buffer_alloc((size), (flags), &__key); \ __ring_buffer_alloc((size), (flags), &__key); \
}) })
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); typedef bool (*ring_buffer_cond_fn)(void *data);
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full,
ring_buffer_cond_fn cond, void *data);
__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table, int full); struct file *filp, poll_table *poll_table, int full);
void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu); void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu);
......
...@@ -103,13 +103,16 @@ struct trace_iterator { ...@@ -103,13 +103,16 @@ struct trace_iterator {
unsigned int temp_size; unsigned int temp_size;
char *fmt; /* modified format holder */ char *fmt; /* modified format holder */
unsigned int fmt_size; unsigned int fmt_size;
long wait_index; atomic_t wait_index;
/* trace_seq for __print_flags() and __print_symbolic() etc. */ /* trace_seq for __print_flags() and __print_symbolic() etc. */
struct trace_seq tmp_seq; struct trace_seq tmp_seq;
cpumask_var_t started; cpumask_var_t started;
/* Set when the file is closed to prevent new waiters */
bool closed;
/* it's true when current open file is snapshot */ /* it's true when current open file is snapshot */
bool snapshot; bool snapshot;
......
...@@ -834,51 +834,24 @@ static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full) ...@@ -834,51 +834,24 @@ static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full)
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
ret = !pagebusy && full_hit(buffer, cpu, full); ret = !pagebusy && full_hit(buffer, cpu, full);
if (!cpu_buffer->shortest_full || if (!ret && (!cpu_buffer->shortest_full ||
cpu_buffer->shortest_full > full) cpu_buffer->shortest_full > full)) {
cpu_buffer->shortest_full = full; cpu_buffer->shortest_full = full;
}
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
} }
return ret; return ret;
} }
/** static inline bool
* ring_buffer_wait - wait for input to the ring buffer rb_wait_cond(struct rb_irq_work *rbwork, struct trace_buffer *buffer,
* @buffer: buffer to wait on int cpu, int full, ring_buffer_cond_fn cond, void *data)
* @cpu: the cpu buffer to wait on
* @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
*
* If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
* as data is added to any of the @buffer's cpu buffers. Otherwise
* it will wait for data to be added to a specific cpu buffer.
*/
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
{ {
struct ring_buffer_per_cpu *cpu_buffer; if (rb_watermark_hit(buffer, cpu, full))
DEFINE_WAIT(wait); return true;
struct rb_irq_work *work;
int ret = 0;
/*
* Depending on what the caller is waiting for, either any
* data in any cpu buffer, or a specific buffer, put the
* caller on the appropriate wait queue.
*/
if (cpu == RING_BUFFER_ALL_CPUS) {
work = &buffer->irq_work;
/* Full only makes sense on per cpu reads */
full = 0;
} else {
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return -ENODEV;
cpu_buffer = buffer->buffers[cpu];
work = &cpu_buffer->irq_work;
}
if (full) if (cond(data))
prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE); return true;
else
prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
/* /*
* The events can happen in critical sections where * The events can happen in critical sections where
...@@ -901,27 +874,78 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) ...@@ -901,27 +874,78 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
* a task has been queued. It's OK for spurious wake ups. * a task has been queued. It's OK for spurious wake ups.
*/ */
if (full) if (full)
work->full_waiters_pending = true; rbwork->full_waiters_pending = true;
else else
work->waiters_pending = true; rbwork->waiters_pending = true;
if (rb_watermark_hit(buffer, cpu, full)) return false;
goto out; }
if (signal_pending(current)) { /*
ret = -EINTR; * The default wait condition for ring_buffer_wait() is to just to exit the
goto out; * wait loop the first time it is woken up.
*/
static bool rb_wait_once(void *data)
{
long *once = data;
/* wait_event() actually calls this twice before scheduling*/
if (*once > 1)
return true;
(*once)++;
return false;
}
/**
* ring_buffer_wait - wait for input to the ring buffer
* @buffer: buffer to wait on
* @cpu: the cpu buffer to wait on
* @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
* @cond: condition function to break out of wait (NULL to run once)
* @data: the data to pass to @cond.
*
* If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
* as data is added to any of the @buffer's cpu buffers. Otherwise
* it will wait for data to be added to a specific cpu buffer.
*/
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full,
ring_buffer_cond_fn cond, void *data)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct wait_queue_head *waitq;
struct rb_irq_work *rbwork;
long once = 0;
int ret = 0;
if (!cond) {
cond = rb_wait_once;
data = &once;
}
/*
* Depending on what the caller is waiting for, either any
* data in any cpu buffer, or a specific buffer, put the
* caller on the appropriate wait queue.
*/
if (cpu == RING_BUFFER_ALL_CPUS) {
rbwork = &buffer->irq_work;
/* Full only makes sense on per cpu reads */
full = 0;
} else {
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return -ENODEV;
cpu_buffer = buffer->buffers[cpu];
rbwork = &cpu_buffer->irq_work;
} }
schedule();
out:
if (full) if (full)
finish_wait(&work->full_waiters, &wait); waitq = &rbwork->full_waiters;
else else
finish_wait(&work->waiters, &wait); waitq = &rbwork->waiters;
if (!ret && !rb_watermark_hit(buffer, cpu, full) && signal_pending(current)) ret = wait_event_interruptible((*waitq),
ret = -EINTR; rb_wait_cond(rbwork, buffer, cpu, full, cond, data));
return ret; return ret;
} }
...@@ -959,21 +983,30 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, ...@@ -959,21 +983,30 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
} }
if (full) { if (full) {
unsigned long flags;
poll_wait(filp, &rbwork->full_waiters, poll_table); poll_wait(filp, &rbwork->full_waiters, poll_table);
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); if (rb_watermark_hit(buffer, cpu, full))
return EPOLLIN | EPOLLRDNORM;
/*
* Only allow full_waiters_pending update to be seen after
* the shortest_full is set (in rb_watermark_hit). If the
* writer sees the full_waiters_pending flag set, it will
* compare the amount in the ring buffer to shortest_full.
* If the amount in the ring buffer is greater than the
* shortest_full percent, it will call the irq_work handler
* to wake up this list. The irq_handler will reset shortest_full
* back to zero. That's done under the reader_lock, but
* the below smp_mb() makes sure that the update to
* full_waiters_pending doesn't leak up into the above.
*/
smp_mb();
rbwork->full_waiters_pending = true; rbwork->full_waiters_pending = true;
if (!cpu_buffer->shortest_full || return 0;
cpu_buffer->shortest_full > full)
cpu_buffer->shortest_full = full;
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
} else {
poll_wait(filp, &rbwork->waiters, poll_table);
rbwork->waiters_pending = true;
} }
poll_wait(filp, &rbwork->waiters, poll_table);
rbwork->waiters_pending = true;
/* /*
* There's a tight race between setting the waiters_pending and * There's a tight race between setting the waiters_pending and
* checking if the ring buffer is empty. Once the waiters_pending bit * checking if the ring buffer is empty. Once the waiters_pending bit
...@@ -989,9 +1022,6 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, ...@@ -989,9 +1022,6 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
*/ */
smp_mb(); smp_mb();
if (full)
return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0;
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
return EPOLLIN | EPOLLRDNORM; return EPOLLIN | EPOLLRDNORM;
......
...@@ -1955,15 +1955,36 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) ...@@ -1955,15 +1955,36 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
#endif /* CONFIG_TRACER_MAX_TRACE */ #endif /* CONFIG_TRACER_MAX_TRACE */
struct pipe_wait {
struct trace_iterator *iter;
int wait_index;
};
static bool wait_pipe_cond(void *data)
{
struct pipe_wait *pwait = data;
struct trace_iterator *iter = pwait->iter;
if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
return true;
return iter->closed;
}
static int wait_on_pipe(struct trace_iterator *iter, int full) static int wait_on_pipe(struct trace_iterator *iter, int full)
{ {
struct pipe_wait pwait;
int ret; int ret;
/* Iterators are static, they should be filled or empty */ /* Iterators are static, they should be filled or empty */
if (trace_buffer_iter(iter, iter->cpu_file)) if (trace_buffer_iter(iter, iter->cpu_file))
return 0; return 0;
ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full); pwait.wait_index = atomic_read_acquire(&iter->wait_index);
pwait.iter = iter;
ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
wait_pipe_cond, &pwait);
#ifdef CONFIG_TRACER_MAX_TRACE #ifdef CONFIG_TRACER_MAX_TRACE
/* /*
...@@ -8397,9 +8418,9 @@ static int tracing_buffers_flush(struct file *file, fl_owner_t id) ...@@ -8397,9 +8418,9 @@ static int tracing_buffers_flush(struct file *file, fl_owner_t id)
struct ftrace_buffer_info *info = file->private_data; struct ftrace_buffer_info *info = file->private_data;
struct trace_iterator *iter = &info->iter; struct trace_iterator *iter = &info->iter;
iter->wait_index++; iter->closed = true;
/* Make sure the waiters see the new wait_index */ /* Make sure the waiters see the new wait_index */
smp_wmb(); (void)atomic_fetch_inc_release(&iter->wait_index);
ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
...@@ -8499,6 +8520,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, ...@@ -8499,6 +8520,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
.spd_release = buffer_spd_release, .spd_release = buffer_spd_release,
}; };
struct buffer_ref *ref; struct buffer_ref *ref;
bool woken = false;
int page_size; int page_size;
int entries, i; int entries, i;
ssize_t ret = 0; ssize_t ret = 0;
...@@ -8572,17 +8594,17 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, ...@@ -8572,17 +8594,17 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
/* did we read anything? */ /* did we read anything? */
if (!spd.nr_pages) { if (!spd.nr_pages) {
long wait_index;
if (ret) if (ret)
goto out; goto out;
if (woken)
goto out;
ret = -EAGAIN; ret = -EAGAIN;
if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
goto out; goto out;
wait_index = READ_ONCE(iter->wait_index);
ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent); ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
if (ret) if (ret)
goto out; goto out;
...@@ -8591,10 +8613,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, ...@@ -8591,10 +8613,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
if (!tracer_tracing_is_on(iter->tr)) if (!tracer_tracing_is_on(iter->tr))
goto out; goto out;
/* Make sure we see the new wait_index */ /* Iterate one more time to collect any new data then exit */
smp_rmb(); woken = true;
if (wait_index != iter->wait_index)
goto out;
goto again; goto again;
} }
...@@ -8617,9 +8637,8 @@ static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned ...@@ -8617,9 +8637,8 @@ static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned
mutex_lock(&trace_types_lock); mutex_lock(&trace_types_lock);
iter->wait_index++;
/* Make sure the waiters see the new wait_index */ /* Make sure the waiters see the new wait_index */
smp_wmb(); (void)atomic_fetch_inc_release(&iter->wait_index);
ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment