1. 21 Mar, 2018 5 commits
    • Arnaldo Carvalho de Melo's avatar
      perf annotate: Introduce --ignore-vmlinux command line option · be316409
      Arnaldo Carvalho de Melo authored
      This is already present in 'perf top', albeit undocumented (will fix),
      and is useful to use /proc/kcore instead of vmlinux and then get what is
      really in place, not what the kernel starts with, before alternatives,
      ftrace .text patching, etc, see the differences:
      
        # perf annotate --stdio2 _raw_spin_lock_irqsave
        _raw_spin_lock_irqsave() /lib/modules/4.16.0-rc4/build/vmlinux
        Event: anon group { cycles, instructions }
      
          0.00   3.17      → callq  __fentry__
          0.00   7.94        push   %rbx
          7.69  36.51      → callq  __page_file_index
                             mov    %rax,%rbx
          7.69   3.17      → callq  *ffffffff82225cd0
                             xor    %eax,%eax
                             mov    $0x1,%edx
         80.77  49.21        lock   cmpxchg %edx,(%rdi)
                             test   %eax,%eax
                           ↓ jne    2b
          3.85   0.00        mov    %rbx,%rax
                             pop    %rbx
                           ← retq
                       2b:   mov    %eax,%esi
                           → callq  queued_spin_lock_slowpath
                             mov    %rbx,%rax
                             pop    %rbx
                           ← retq
        [root@jouet ~]# perf annotate --ignore-vmlinux --stdio2 _raw_spin_lock_irqsave
        _raw_spin_lock_irqsave() /proc/kcore
        Event: anon group { cycles, instructions }
      
          0.00   3.17        nop
          0.00   7.94        push   %rbx
          0.00  23.81        pushfq
          7.69  12.70        pop    %rax
                             nop
                             mov    %rax,%rbx
          7.69   3.17        cli
                             nop
                             xor    %eax,%eax
                             mov    $0x1,%edx
         80.77  49.21        lock   cmpxchg %edx,(%rdi)
                             test   %eax,%eax
                           ↓ jne    2b
          3.85   0.00        mov    %rbx,%rax
                             pop    %rbx
                           ← retq
                       2b:   mov    %eax,%esi
                           → callq  *ffffffff820e96b0
                             mov    %rbx,%rax
                             pop    %rbx
                           ← retq
        #
      
      Diff of the output of those commands:
      
        # perf annotate --stdio2 _raw_spin_lock_irqsave > /tmp/vmlinux
        # perf annotate --ignore-vmlinux --stdio2 _raw_spin_lock_irqsave > /tmp/kcore
        # diff -y /tmp/vmlinux /tmp/kcore
        _raw_spin_lock_irqsave() vmlinux             | _raw_spin_lock_irqsave() /proc/kcore
        Event: anon group { cycles, instructions }     Event: anon group { cycles, instructions }
      
         0.00  3.17  → callq __fentry__              |  0.00  3.17     nop
         0.00  7.94    push  %rbx                       0.00  7.94     push  %rbx
         7.69 36.51  → callq __page_file_index       |  0.00 23.81     pushfq
                                                     >  7.69 12.70     pop   %rax
                                                     >                 nop
                       mov   %rax,%rbx                                 mov   %rax,%rbx
         7.69  3.17  → callq *ffffffff82225cd0       |  7.69  3.17     cli
                                                     >                 nop
                       xor   %eax,%eax                                 xor   %eax,%eax
                       mov   $0x1,%edx                                 mov   $0x1,%edx
        80.77 49.21    lock  cmpxchg %edx,(%rdi)       80.77 49.21     lock  cmpxchg %edx,(%rdi)
                       test  %eax,%eax                                 test  %eax,%eax
                     ↓ jne   2b                                      ↓ jne   2b
         3.85  0.00    mov   %rbx,%rax                  3.85  0.00     mov   %rbx,%rax
                       pop   %rbx                                      pop   %rbx
                     ← retq                                          ← retq
                  2b:  mov   %eax,%esi                            2b:  mov   %eax,%esi
                     → callq queued_spin_lock_slowpath|              → callq *ffffffff820e96b0
                       mov   %rbx,%rax                                 mov   %rbx,%rax
                       pop   %rbx                                      pop   %rbx
                     ← retq                                          ← retq
        #
      
      This should be further streamlined by doing both annotations and
      allowing the TUI to toggle initial/current, and show the patched
      instructions in a slightly different color.
      
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: David Ahern <dsahern@gmail.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Wang Nan <wangnan0@huawei.com>
      Link: https://lkml.kernel.org/n/tip-wz8d269hxkcwaczr0r4rhyjg@git.kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      be316409
    • Arnaldo Carvalho de Melo's avatar
      perf annotate: Add function header to --stdio2 · 864298f2
      Arnaldo Carvalho de Melo authored
        # perf annotate --stdio2 _raw_spin_lock_irqsave
        _raw_spin_lock_irqsave() /lib/modules/4.16.0-rc4/build/vmlinux
        Event: anon group { cycles, instructions }
      
          0.00   3.17      → callq  __fentry__
          0.00   7.94        push   %rbx
          7.69  36.51      → callq  __page_file_index
                             mov    %rax,%rbx
          7.69   3.17      → callq  *ffffffff82225cd0
                             xor    %eax,%eax
                             mov    $0x1,%edx
         80.77  49.21        lock   cmpxchg %edx,(%rdi)
                             test   %eax,%eax
                           ↓ jne    2b
          3.85   0.00        mov    %rbx,%rax
                             pop    %rbx
                           ← retq
                       2b:   mov    %eax,%esi
                           → callq  queued_spin_lock_slowpath
                             mov    %rbx,%rax
                             pop    %rbx
                           ← retq
        #
      
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: David Ahern <dsahern@gmail.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Wang Nan <wangnan0@huawei.com>
      Link: https://lkml.kernel.org/n/tip-i86yfyzl8m194ioxgj1jo32f@git.kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      864298f2
    • Arnaldo Carvalho de Melo's avatar
      perf annotate: Use the default annotation options for --stdio2 · 35632892
      Arnaldo Carvalho de Melo authored
      With an empty '[annotate]' section in ~/.perfconfig:
      
        # perf record -a --all-kernel -e '{cycles,instructions}:P' sleep 5
        [ perf record: Woken up 1 times to write data ]
        [ perf record: Captured and wrote 2.243 MB perf.data (5513 samples) ]
        # perf annotate --stdio2 _raw_spin_lock | head -20
      
                           Disassembly of section .text:
      
                           ffffffff81868790 <_raw_spin_lock>:
                           _raw_spin_lock():
                           EXPORT_SYMBOL(_raw_spin_trylock_bh);
                           #endif
      
                           #ifndef CONFIG_INLINE_SPIN_LOCK
                           void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
                           {
                           → callq  __fentry__
                           atomic_cmpxchg():
                                   return xadd(&v->counter, -i);
                           }
      
                           static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
                           {
        # perf annotate --stdio2 _raw_spin_lock | head -20
                           → callq  __fentry__
                             xor    %eax,%eax
                             mov    $0x1,%edx
         87.50 100.00        lock   cmpxchg %edx,(%rdi)
          6.25   0.00        test   %eax,%eax
                           ↓ jne    16
          6.25   0.00        repz   retq
                       16:   mov    %eax,%esi
                           ↑ jmpq   ffffffff810e96b0 <queued_spin_lock_slowpath>
        #
        # cat ~/.perfconfig
        [annotate]
      
          hide_src_code = false
          show_linenr = true
        # perf annotate --stdio2 _raw_spin_lock | head -20
      
                       3   Disassembly of section .text:
      
                       5   ffffffff81868790 <_raw_spin_lock>:
                       6   _raw_spin_lock():
                       143 EXPORT_SYMBOL(_raw_spin_trylock_bh);
                       144 #endif
      
                       146 #ifndef CONFIG_INLINE_SPIN_LOCK
                       147 void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
                       148 {
                           → callq  __fentry__
                       150 atomic_cmpxchg():
                       187         return xadd(&v->counter, -i);
                       188 }
      
                       190 static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
                       191 {
        #
        # cat ~/.perfconfig
        [annotate]
      
          hide_src_code = true
          show_total_period = true
        # perf annotate --stdio2 _raw_spin_lock | head -20
                                     → callq  __fentry__
                                       xor    %eax,%eax
                                       mov    $0x1,%edx
            1411316      152339        lock   cmpxchg %edx,(%rdi)
             344694           0        test   %eax,%eax
                                     ↓ jne    16
              80806           0        repz   retq
                                 16:   mov    %eax,%esi
                                     ↑ jmpq   ffffffff810e96b0 <queued_spin_lock_slowpath>
        #
      
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: David Ahern <dsahern@gmail.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Wang Nan <wangnan0@huawei.com>
      Link: https://lkml.kernel.org/n/tip-nu4rxg5zkdtgs1b2gc40p7v7@git.kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      35632892
    • Arnaldo Carvalho de Melo's avatar
      perf annotate: Move the default annotate options to the library · 7f0b6fde
      Arnaldo Carvalho de Melo authored
      One more thing that goes from the TUI code to be used more widely,
      for instance it'll affect the default options used by:
      
        perf annotate --stdio2
      
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: David Ahern <dsahern@gmail.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Wang Nan <wangnan0@huawei.com>
      Link: https://lkml.kernel.org/n/tip-0nsz0dm0akdbo30vgja2a10e@git.kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      7f0b6fde
    • Arnaldo Carvalho de Melo's avatar
      perf annotate: Introduce the --stdio2 output mode · befd2a38
      Arnaldo Carvalho de Melo authored
      This uses the TUI augmented formatting routines, modulo interactivity.
      
        # perf annotate --ignore-vmlinux --stdio2 _raw_spin_lock_irqsave
        _raw_spin_lock_irqsave() /proc/kcore
        Event: cycles:ppp
      
        Percent
      
                    Disassembly of section load0:
      
                    ffffffff9a8734b0 <load0>:
                      nop
                      push   %rbx
         50.00        pushfq
                      pop    %rax
                      nop
                      mov    %rax,%rbx
                      cli
                      nop
                      xor    %eax,%eax
                      mov    $0x1,%edx
         50.00        lock   cmpxchg %edx,(%rdi)
                      test   %eax,%eax
                    ↓ jne    2b
                      mov    %rbx,%rax
                      pop    %rbx
                    ← retq
                2b:   mov    %eax,%esi
                    → callq  queued_spin_lock_slowpath
                      mov    %rbx,%rax
                      pop    %rbx
                    ← retq
      Tested-by: default avatarJin Yao <yao.jin@linux.intel.com>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: David Ahern <dsahern@gmail.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Wang Nan <wangnan0@huawei.com>
      Link: https://lkml.kernel.org/n/tip-6cte5o8z84mbivbvqlg14uh1@git.kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      befd2a38
  2. 20 Mar, 2018 29 commits
  3. 19 Mar, 2018 6 commits
    • Ingo Molnar's avatar
      Merge tag 'perf-core-for-mingo-4.17-20180319' of... · ecd380b8
      Ingo Molnar authored
      Merge tag 'perf-core-for-mingo-4.17-20180319' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
      
      Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
      
      - Fixes for problems experienced with new GCC 8 warnings, that treated
        as errors, broke the build, related to snprintf and casting issues.
        (Arnaldo Carvalho de Melo, Jiri Olsa, Josh Poinboeuf)
      
      - Fix build of new breakpoint 'perf test' entry with clang < 6, noticed
        on fedora 25, 26 and 27 (Arnaldo Carvalho de Melo)
      
      - Workaround problem with symbol resolution in 'perf annotate', using
        the symbol name already present in the objdump output (Arnaldo Carvalho de Melo)
      
      - Document 'perf top --ignore-vmlinux' (Arnaldo Carvalho de Melo)
      
      - Fix out of bounds access on array fd when cnt is 100 in one of the
        'perf test' entries, detected using 'cpptest' (Colin Ian King)
      
      - Add support for the forced leader feature, i.e. 'perf report --group'
        for a group of events not really grouped when scheduled (without using
        {} to enclose the list of events in the command line) in pipe mode,
        e.g.:
      
          $ perf record -e cycles,instructions -o - kill | perf report --group -i -
      
      - Use right type to access array elements in 'perf probe' (Masami Hiramatsu)
      
      - Update POWER9 vendor events (those described in JSON format) (Sukadev Bhattiprolu)
      
      - Discard head in overwrite_rb_find_range() (Yisheng Xie)
      
      - Avoid setting 'quiet' to 'true' unnecessarily (Yisheng Xie)
      Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
      ecd380b8
    • Ingo Molnar's avatar
      134933e5
    • Arnaldo Carvalho de Melo's avatar
      perf tests bp_account: Fix build with clang-6 · 1cd61883
      Arnaldo Carvalho de Melo authored
      To shut up this compiler warning:
      
          CC       /tmp/build/perf/tests/bp_account.o
          CC       /tmp/build/perf/tests/task-exit.o
          CC       /tmp/build/perf/tests/sw-clock.o
        tests/bp_account.c:106:20: error: pointer type mismatch ('int (*)(void)' and 'void *') [-Werror,-Wpointer-type-mismatch]
                void *addr = is_x ? test_function : (void *) &the_var;
                                  ^ ~~~~~~~~~~~~~   ~~~~~~~~~~~~~~~~~
        1 error generated.
      
      Noticed with clang 6 on fedora rawhide.
      
        [perfbuilder@44490f0e7241 perf]$ clang -v
        clang version 6.0.0 (tags/RELEASE_600/final)
        Target: x86_64-unknown-linux-gnu
        Thread model: posix
        InstalledDir: /usr/bin
        Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-redhat-linux/8
        Found candidate GCC installation: /usr/lib/gcc/x86_64-redhat-linux/8
        Selected GCC installation: /usr/bin/../lib/gcc/x86_64-redhat-linux/8
        Candidate multilib: .;@m64
        Candidate multilib: 32;@m32
        Selected multilib: .;@m64
        [perfbuilder@44490f0e7241 perf]$
      
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: David Ahern <dsahern@gmail.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Wang Nan <wangnan0@huawei.com>
      Fixes: 032db28e ("perf tests: Add breakpoint accounting/modify test")
      Link: https://lkml.kernel.org/n/tip-a3jnkzh4xam0l954de5tn66d@git.kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      1cd61883
    • Josh Poimboeuf's avatar
      objtool, perf: Fix GCC 8 -Wrestrict error · 854e55ad
      Josh Poimboeuf authored
      Starting with recent GCC 8 builds, objtool and perf fail to build with
      the following error:
      
        ../str_error_r.c: In function ‘str_error_r’:
        ../str_error_r.c:25:3: error: passing argument 1 to restrict-qualified parameter aliases with argument 5 [-Werror=restrict]
           snprintf(buf, buflen, "INTERNAL ERROR: strerror_r(%d, %p, %zd)=%d", errnum, buf, buflen, err);
      
      The code seems harmless, but there's probably no benefit in printing the
      'buf' pointer in this situation anyway, so just remove it to make GCC
      happy.
      Reported-by: default avatarLaura Abbott <labbott@redhat.com>
      Signed-off-by: default avatarJosh Poimboeuf <jpoimboe@redhat.com>
      Tested-by: default avatarLaura Abbott <labbott@redhat.com>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Wang Nan <wangnan0@huawei.com>
      Link: http://lkml.kernel.org/r/20180316031154.juk2uncs7baffctp@trebleSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      854e55ad
    • Masami Hiramatsu's avatar
      perf probe: Use right type to access array elements · d0461794
      Masami Hiramatsu authored
      Current 'perf probe' converts the type of array-elements incorrectly. It
      always converts the types as a pointer of array. This passes the "array"
      type DIE to the type converter so that it can get correct "element of
      array" type DIE from it.
      
      E.g.
        ====
        $ cat hello.c
        #include <stdio.h>
      
        void foo(int a[])
        {
      	  printf("%d\n", a[1]);
        }
      
        void main()
        {
      	  int a[3] = {4, 5, 6};
      	  printf("%d\n", a[0]);
      	  foo(a);
        }
      
        $ gcc -g hello.c -o hello
        $ perf probe -x ./hello -D "foo a[1]"
        ====
      
      Without this fix, above outputs
        ====
        p:probe_hello/foo /tmp/hello:0x4d3 a=+4(-8(%bp)):u64
        ====
      The "u64" means "int *", but a[1] is "int".
      
      With this,
        ====
        p:probe_hello/foo /tmp/hello:0x4d3 a=+4(-8(%bp)):s32
        ====
      So, "int" correctly converted to "s32"
      Signed-off-by: default avatarMasami Hiramatsu <mhiramat@kernel.org>
      Tested-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
      Cc: Shuah Khan <shuah@kernel.org>
      Cc: Steven Rostedt <rostedt@goodmis.org>
      Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
      Cc: linux-kselftest@vger.kernel.org
      Cc: linux-trace-users@vger.kernel.org
      Fixes: b2a3c12b ("perf probe: Support tracing an entry of array")
      Link: http://lkml.kernel.org/r/152129114502.31874.2474068470011496356.stgit@devboxSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      d0461794
    • Arnaldo Carvalho de Melo's avatar
      perf annotate: Use ops->target.name when available for unresolved call targets · 4c9cb2c2
      Arnaldo Carvalho de Melo authored
      There is a bug where when using 'perf annotate timerqueue_add' the
      target for its only routine called with the 'callq' instruction,
      'rb_insert_color', doesn't get resolved from its address when parsing
      that 'callq' instruction.
      
      That symbol resolution works when using 'perf report --tui' and then
      doing annotation for 'timerqueue_add' from there, the vmlinux
      dso->symbols rb_tree somehow gets in a state that we can't find that
      address, that is a bug that has to be further investigated.
      
      But since the objdump output has the function name, i.e. the raw objdump
      disassembled line looks like:
      
      So, before:
      
        # perf annotate timerqueue_add
      
                    │      mov    %rbx,%rdi
                    │      mov    %rbx,(%rdx)
                    │    → callq  *ffffffff8184dc80
                    │      mov    0x8(%rbp),%rdx
                    │      test   %rdx,%rdx
                    │    ↓ je     67
      
        # perf report
      
                    │      mov    %rbx,%rdi
                    │      mov    %rbx,(%rdx)
                    │    → callq  rb_insert_color
                    │      mov    0x8(%rbp),%rdx
                    │      test   %rdx,%rdx
                    │    ↓ je     67
      
      And after both look the same:
      
        # perf annotate timerqueue_add
      
                    │      mov    %rbx,%rdi
                    │      mov    %rbx,(%rdx)
                    │    → callq  rb_insert_color
                    │      mov    0x8(%rbp),%rdx
                    │      test   %rdx,%rdx
                    │    ↓ je     67
      
      From 'perf report' one can annotate and navigate to that 'rb_insert_color'
      function, but not directly from 'perf annotate timerqueue_add', that
      remains to be investigated and fixed.
      
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: David Ahern <dsahern@gmail.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Wang Nan <wangnan0@huawei.com>
      Link: https://lkml.kernel.org/n/tip-nkktz6355rhqtq7o8atr8f8r@git.kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      4c9cb2c2