Commit 95ff4ca2 authored by Alexander Shishkin's avatar Alexander Shishkin Committed by Ingo Molnar

perf/core: Free AUX pages in unmap path

Now that we can ensure that when ring buffer's AUX area is on the way
to getting unmapped new transactions won't start, we only need to stop
all events that can potentially be writing aux data to our ring buffer.

Having done that, we can safely free the AUX pages and corresponding
PMU data, as this time it is guaranteed to be the last aux reference
holder.

This partially reverts:

  57ffc5ca ("perf: Fix AUX buffer refcounting")

... which was made to defer deallocation that was otherwise possible
from an NMI context. Now it is no longer the case; the last call to
rb_free_aux() that drops the last AUX reference has to happen in
perf_mmap_close() on that AUX area.
Signed-off-by: default avatarAlexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: vince@deater.net
Link: http://lkml.kernel.org/r/87d1qtz23d.fsf@ashishki-desk.ger.corp.intel.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent dcb10a96
...@@ -1925,8 +1925,13 @@ event_sched_in(struct perf_event *event, ...@@ -1925,8 +1925,13 @@ event_sched_in(struct perf_event *event,
if (event->state <= PERF_EVENT_STATE_OFF) if (event->state <= PERF_EVENT_STATE_OFF)
return 0; return 0;
event->state = PERF_EVENT_STATE_ACTIVE; WRITE_ONCE(event->oncpu, smp_processor_id());
event->oncpu = smp_processor_id(); /*
* Order event::oncpu write to happen before the ACTIVE state
* is visible.
*/
smp_wmb();
WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE);
/* /*
* Unthrottle events, since we scheduled we might have missed several * Unthrottle events, since we scheduled we might have missed several
...@@ -2358,6 +2363,29 @@ void perf_event_enable(struct perf_event *event) ...@@ -2358,6 +2363,29 @@ void perf_event_enable(struct perf_event *event)
} }
EXPORT_SYMBOL_GPL(perf_event_enable); EXPORT_SYMBOL_GPL(perf_event_enable);
static int __perf_event_stop(void *info)
{
struct perf_event *event = info;
/* for AUX events, our job is done if the event is already inactive */
if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
return 0;
/* matches smp_wmb() in event_sched_in() */
smp_rmb();
/*
* There is a window with interrupts enabled before we get here,
* so we need to check again lest we try to stop another CPU's event.
*/
if (READ_ONCE(event->oncpu) != smp_processor_id())
return -EAGAIN;
event->pmu->stop(event, PERF_EF_UPDATE);
return 0;
}
static int _perf_event_refresh(struct perf_event *event, int refresh) static int _perf_event_refresh(struct perf_event *event, int refresh)
{ {
/* /*
...@@ -4667,6 +4695,8 @@ static void perf_mmap_open(struct vm_area_struct *vma) ...@@ -4667,6 +4695,8 @@ static void perf_mmap_open(struct vm_area_struct *vma)
event->pmu->event_mapped(event); event->pmu->event_mapped(event);
} }
static void perf_pmu_output_stop(struct perf_event *event);
/* /*
* A buffer can be mmap()ed multiple times; either directly through the same * A buffer can be mmap()ed multiple times; either directly through the same
* event, or through other events by use of perf_event_set_output(). * event, or through other events by use of perf_event_set_output().
...@@ -4694,10 +4724,22 @@ static void perf_mmap_close(struct vm_area_struct *vma) ...@@ -4694,10 +4724,22 @@ static void perf_mmap_close(struct vm_area_struct *vma)
*/ */
if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff && if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) { atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
/*
* Stop all AUX events that are writing to this buffer,
* so that we can free its AUX pages and corresponding PMU
* data. Note that after rb::aux_mmap_count dropped to zero,
* they won't start any more (see perf_aux_output_begin()).
*/
perf_pmu_output_stop(event);
/* now it's safe to free the pages */
atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm); atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
vma->vm_mm->pinned_vm -= rb->aux_mmap_locked; vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
/* this has to be the last one */
rb_free_aux(rb); rb_free_aux(rb);
WARN_ON_ONCE(atomic_read(&rb->aux_refcount));
mutex_unlock(&event->mmap_mutex); mutex_unlock(&event->mmap_mutex);
} }
...@@ -5768,6 +5810,80 @@ perf_event_aux(perf_event_aux_output_cb output, void *data, ...@@ -5768,6 +5810,80 @@ perf_event_aux(perf_event_aux_output_cb output, void *data,
rcu_read_unlock(); rcu_read_unlock();
} }
struct remote_output {
struct ring_buffer *rb;
int err;
};
static void __perf_event_output_stop(struct perf_event *event, void *data)
{
struct perf_event *parent = event->parent;
struct remote_output *ro = data;
struct ring_buffer *rb = ro->rb;
if (!has_aux(event))
return;
if (!parent)
parent = event;
/*
* In case of inheritance, it will be the parent that links to the
* ring-buffer, but it will be the child that's actually using it:
*/
if (rcu_dereference(parent->rb) == rb)
ro->err = __perf_event_stop(event);
}
static int __perf_pmu_output_stop(void *info)
{
struct perf_event *event = info;
struct pmu *pmu = event->pmu;
struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
struct remote_output ro = {
.rb = event->rb,
};
rcu_read_lock();
perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro);
if (cpuctx->task_ctx)
perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
&ro);
rcu_read_unlock();
return ro.err;
}
static void perf_pmu_output_stop(struct perf_event *event)
{
struct perf_event *iter;
int err, cpu;
restart:
rcu_read_lock();
list_for_each_entry_rcu(iter, &event->rb->event_list, rb_entry) {
/*
* For per-CPU events, we need to make sure that neither they
* nor their children are running; for cpu==-1 events it's
* sufficient to stop the event itself if it's active, since
* it can't have children.
*/
cpu = iter->cpu;
if (cpu == -1)
cpu = READ_ONCE(iter->oncpu);
if (cpu == -1)
continue;
err = cpu_function_call(cpu, __perf_pmu_output_stop, event);
if (err == -EAGAIN) {
rcu_read_unlock();
goto restart;
}
}
rcu_read_unlock();
}
/* /*
* task tracking -- fork/exit * task tracking -- fork/exit
* *
......
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
struct ring_buffer { struct ring_buffer {
atomic_t refcount; atomic_t refcount;
struct rcu_head rcu_head; struct rcu_head rcu_head;
struct irq_work irq_work;
#ifdef CONFIG_PERF_USE_VMALLOC #ifdef CONFIG_PERF_USE_VMALLOC
struct work_struct work; struct work_struct work;
int page_order; /* allocation order */ int page_order; /* allocation order */
......
...@@ -221,8 +221,6 @@ void perf_output_end(struct perf_output_handle *handle) ...@@ -221,8 +221,6 @@ void perf_output_end(struct perf_output_handle *handle)
rcu_read_unlock(); rcu_read_unlock();
} }
static void rb_irq_work(struct irq_work *work);
static void static void
ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
{ {
...@@ -243,16 +241,6 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) ...@@ -243,16 +241,6 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
INIT_LIST_HEAD(&rb->event_list); INIT_LIST_HEAD(&rb->event_list);
spin_lock_init(&rb->event_lock); spin_lock_init(&rb->event_lock);
init_irq_work(&rb->irq_work, rb_irq_work);
}
static void ring_buffer_put_async(struct ring_buffer *rb)
{
if (!atomic_dec_and_test(&rb->refcount))
return;
rb->rcu_head.next = (void *)rb;
irq_work_queue(&rb->irq_work);
} }
/* /*
...@@ -292,7 +280,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, ...@@ -292,7 +280,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
* the aux buffer is in perf_mmap_close(), about to get freed. * the aux buffer is in perf_mmap_close(), about to get freed.
*/ */
if (!atomic_read(&rb->aux_mmap_count)) if (!atomic_read(&rb->aux_mmap_count))
goto err; goto err_put;
/* /*
* Nesting is not supported for AUX area, make sure nested * Nesting is not supported for AUX area, make sure nested
...@@ -338,7 +326,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, ...@@ -338,7 +326,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
rb_free_aux(rb); rb_free_aux(rb);
err: err:
ring_buffer_put_async(rb); ring_buffer_put(rb);
handle->event = NULL; handle->event = NULL;
return NULL; return NULL;
...@@ -389,7 +377,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, ...@@ -389,7 +377,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
local_set(&rb->aux_nest, 0); local_set(&rb->aux_nest, 0);
rb_free_aux(rb); rb_free_aux(rb);
ring_buffer_put_async(rb); ring_buffer_put(rb);
} }
/* /*
...@@ -470,6 +458,14 @@ static void __rb_free_aux(struct ring_buffer *rb) ...@@ -470,6 +458,14 @@ static void __rb_free_aux(struct ring_buffer *rb)
{ {
int pg; int pg;
/*
* Should never happen, the last reference should be dropped from
* perf_mmap_close() path, which first stops aux transactions (which
* in turn are the atomic holders of aux_refcount) and then does the
* last rb_free_aux().
*/
WARN_ON_ONCE(in_atomic());
if (rb->aux_priv) { if (rb->aux_priv) {
rb->free_aux(rb->aux_priv); rb->free_aux(rb->aux_priv);
rb->free_aux = NULL; rb->free_aux = NULL;
...@@ -581,18 +577,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, ...@@ -581,18 +577,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
void rb_free_aux(struct ring_buffer *rb) void rb_free_aux(struct ring_buffer *rb)
{ {
if (atomic_dec_and_test(&rb->aux_refcount)) if (atomic_dec_and_test(&rb->aux_refcount))
irq_work_queue(&rb->irq_work);
}
static void rb_irq_work(struct irq_work *work)
{
struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work);
if (!atomic_read(&rb->aux_refcount))
__rb_free_aux(rb); __rb_free_aux(rb);
if (rb->rcu_head.next == (void *)rb)
call_rcu(&rb->rcu_head, rb_free_rcu);
} }
#ifndef CONFIG_PERF_USE_VMALLOC #ifndef CONFIG_PERF_USE_VMALLOC
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment