Commit d1b26c70 authored by Wang Nan's avatar Wang Nan Committed by Ingo Molnar

perf/ring_buffer: Prepare writing into the ring-buffer from the end

Convert perf_output_begin() to __perf_output_begin() and make the later
function able to write records from the end of the ring-buffer.

Following commits will utilize the 'backward' flag.

This is the core patch to support writing to the ring-buffer backwards,
which will be introduced by upcoming patches to support reading from
overwritable ring-buffers.

In theory, this patch should not introduce any extra performance
overhead since we use always_inline, but it does not hurt to double
check that assumption:

When CONFIG_OPTIMIZE_INLINING is disabled, the output object is nearly
identical to original one. See:

   http://lkml.kernel.org/g/56F52E83.70409@huawei.com

When CONFIG_OPTIMIZE_INLINING is enabled, the resuling object file becomes
smaller:

 $ size kernel/events/ring_buffer.o*
   text       data        bss        dec        hex    filename
   4641          4          8       4653       122d kernel/events/ring_buffer.o.old
   4545          4          8       4557       11cd kernel/events/ring_buffer.o.new

Performance testing results:

Calling 3000000 times of 'close(-1)', use gettimeofday() to check
duration.  Use 'perf record -o /dev/null -e raw_syscalls:*' to capture
system calls. In ns.

Testing environment:

 CPU    : Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
 Kernel : v4.5.0

                     MEAN         STDVAR
  BASE            800214.950    2853.083
  PRE            2253846.700    9997.014
  POST           2257495.540    8516.293

Where 'BASE' is pure performance without capturing. 'PRE' is test
result of pure 'v4.5.0' kernel. 'POST' is test result after this
patch.

Considering the stdvar, this patch doesn't hurt performance, within
noise margin.

For testing details, see:

  http://lkml.kernel.org/g/56F89DCD.1040202@huawei.comSigned-off-by: default avatarWang Nan <wangnan0@huawei.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: <pi3orama@163.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1459147292-239310-4-git-send-email-wangnan0@huawei.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 1879445d
...@@ -102,8 +102,21 @@ static void perf_output_put_handle(struct perf_output_handle *handle) ...@@ -102,8 +102,21 @@ static void perf_output_put_handle(struct perf_output_handle *handle)
preempt_enable(); preempt_enable();
} }
int perf_output_begin(struct perf_output_handle *handle, static bool __always_inline
struct perf_event *event, unsigned int size) ring_buffer_has_space(unsigned long head, unsigned long tail,
unsigned long data_size, unsigned int size,
bool backward)
{
if (!backward)
return CIRC_SPACE(head, tail, data_size) >= size;
else
return CIRC_SPACE(tail, head, data_size) >= size;
}
static int __always_inline
__perf_output_begin(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size,
bool backward)
{ {
struct ring_buffer *rb; struct ring_buffer *rb;
unsigned long tail, offset, head; unsigned long tail, offset, head;
...@@ -146,9 +159,12 @@ int perf_output_begin(struct perf_output_handle *handle, ...@@ -146,9 +159,12 @@ int perf_output_begin(struct perf_output_handle *handle,
do { do {
tail = READ_ONCE(rb->user_page->data_tail); tail = READ_ONCE(rb->user_page->data_tail);
offset = head = local_read(&rb->head); offset = head = local_read(&rb->head);
if (!rb->overwrite && if (!rb->overwrite) {
unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size)) if (unlikely(!ring_buffer_has_space(head, tail,
goto fail; perf_data_size(rb),
size, backward)))
goto fail;
}
/* /*
* The above forms a control dependency barrier separating the * The above forms a control dependency barrier separating the
...@@ -162,9 +178,17 @@ int perf_output_begin(struct perf_output_handle *handle, ...@@ -162,9 +178,17 @@ int perf_output_begin(struct perf_output_handle *handle,
* See perf_output_put_handle(). * See perf_output_put_handle().
*/ */
head += size; if (!backward)
head += size;
else
head -= size;
} while (local_cmpxchg(&rb->head, offset, head) != offset); } while (local_cmpxchg(&rb->head, offset, head) != offset);
if (backward) {
offset = head;
head = (u64)(-head);
}
/* /*
* We rely on the implied barrier() by local_cmpxchg() to ensure * We rely on the implied barrier() by local_cmpxchg() to ensure
* none of the data stores below can be lifted up by the compiler. * none of the data stores below can be lifted up by the compiler.
...@@ -206,6 +230,12 @@ int perf_output_begin(struct perf_output_handle *handle, ...@@ -206,6 +230,12 @@ int perf_output_begin(struct perf_output_handle *handle,
return -ENOSPC; return -ENOSPC;
} }
int perf_output_begin(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size)
{
return __perf_output_begin(handle, event, size, false);
}
unsigned int perf_output_copy(struct perf_output_handle *handle, unsigned int perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len) const void *buf, unsigned int len)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment