Commit bbd4180c authored by Sandipan Das's avatar Sandipan Das Committed by yonghong-song

Fix uprobes on powerpc64 (#2032)

* Use correct entry point for uprobes on powerpc64

For powerpc64 (big endian), the address of a function is the
address of the corresponding function descriptor. While the
actual functions reside in the ".text" section, the function
descriptors are present in the ".opd" section. According to
the ABI, each descriptor is a tri-doubleword data structure
where the first doubleword is the actual entry point address.

The symbol table entries do not list actual entry points but
instead provide the location of the function descriptor. So,
when attaching a probe, the location should be changed to the
actual entry point by peeking into the function descriptor.

This has been verified as shown below.

  $ readelf -S /usr/lib64/power8/libc-2.26.so | grep -A1 ".opd"
    [30] .opd              PROGBITS         0000000000213648  00203648
         000000000000bcb8  0000000000000000  WA       0     0     8

The first column shows the index of the ".opd" section.

  $ readelf -s /usr/lib64/power8/libc-2.26.so | grep "inet_pton$"
    3405: 000000000021d168    96 FUNC    LOCAL  DEFAULT   30 __inet_pton
    3990: 000000000021d168    96 FUNC    LOCAL  DEFAULT   30 __GI___inet_pton
    5167: 000000000021d168    96 FUNC    LOCAL  DEFAULT   30 __GI_inet_pton
    6514: 000000000021d168    96 FUNC    WEAK   DEFAULT   30 inet_pton

The seventh column shows the index of the section to which the
symbols belong. This implies that all of these symbols are from
the ".opd" section.

  $ objdump -d --section=.opd /usr/lib64/power8/libc-2.26.so | grep -A5 "inet_pton>:"
  000000000021d168 <inet_pton>:
    21d168:       00 00 00 00     .long 0x0
    21d16c:       00 17 2b 40     .long 0x172b40
    21d170:       00 00 00 00     .long 0x0
    21d174:       00 22 73 00     .long 0x227300

  $ objdump -d /usr/lib64/power8/libc-2.26.so | grep -A5 "inet_pton>:"
  0000000000172b40 <.__inet_pton>:
    172b40:       7c 08 02 a6     mflr    r0
    172b44:       fb c1 ff f0     std     r30,-16(r1)
    172b48:       fb e1 ff f8     std     r31,-8(r1)
    172b4c:       7c 7f 1b 78     mr      r31,r3
    172b50:       7c 83 23 78     mr      r3,r4

The first doubleword in the descriptor of "inet_pton" gives the
actual entry point address i.e. 0x172b40. So, the probe must be
attached here and not 0x21d168.

  $ sudo trace "c:inet_pton" -U
  PID     TID     COMM            FUNC
  40769   40769   ping            inet_pton
          __GI___inet_pton+0x0 [libc-2.26.so]
          gaih_inet.constprop.7+0xf4c [libc-2.26.so]
          __GI_getaddrinfo+0x15c [libc-2.26.so]
          [unknown] [ping]
          generic_start_main.isra.0+0x150 [libc-2.26.so]
          __libc_start_main+0xbc [libc-2.26.so]

  $ ping -6 ::1
  PING ::1(::1) 56 data bytes
  64 bytes from ::1: icmp_seq=1 ttl=64 time=0.271 ms
  64 bytes from ::1: icmp_seq=2 ttl=64 time=0.039 ms
  ^C
  --- ::1 ping statistics ---
  2 packets transmitted, 2 received, 0% packet loss, time 1058ms
  rtt min/avg/max/mdev = 0.039/0.155/0.271/0.116 ms

Previously, the event was not triggered upon running ping.
Signed-off-by: default avatarSandipan Das <sandipan@linux.ibm.com>

* Use correct entry point for uprobes on powerpc64le

For powerpc64le, functions have a Global Entry Point (GEP)
and a Local Entry Point (LEP). When using the GEP, there
are some additional instructions at the beginning of the
function that setup the TOC pointer. However, for all local
calls, the TOC pointer is not required and a function can
be called into directly via the LEP.

While placing a uprobe, we should always prefer the LEP as
the probe location since this will be encountered for any
call through either the GEP or the LEP. Currently, the GEP
is used as the probe location and hence the corresponding
event is never triggered when the function is called via
it's LEP.

Information about the LEP can be obtained from the st_other
field of an Elf symbol. While this field typically provides
visibility information, the three most significant bits can
provide additional information about the offset of the Local
Entry Point (LEP) from the Global Entry Point (GEP) for any
symbol in case of powerpc64le.

This has been verified as shown below.

  $ readelf -s /usr/lib64/libc-2.27.so | grep "inet_pton "
  3522: 0000000000164610   104 FUNC    LOCAL  DEFAULT   11 __inet_pton  [<localentry>: 8]
  4188: 0000000000164610   104 FUNC    LOCAL  DEFAULT   11 __GI___inet_pton     [<localentry>: 8]
  5528: 0000000000164610   104 FUNC    LOCAL  DEFAULT   11 __GI_inet_pton       [<localentry>: 8]
  6925: 0000000000164610   104 FUNC    WEAK   DEFAULT   11 inet_pton    [<localentry>: 8]

  $ sudo trace "c:inet_pton" -U
  PID     TID     COMM            FUNC
  25383   25383   ping            inet_pton
          __GI___inet_pton+0x8 [libc-2.27.so]
          gaih_inet.constprop.7+0x1040 [libc-2.27.so]
          getaddrinfo+0x164 [libc-2.27.so]
          [unknown] [ping]
          generic_start_main.isra.0+0x138 [libc-2.27.so]
          __libc_start_main+0xc4 [libc-2.27.so]

  $ ping -6 ::1
  PING ::1(::1) 56 data bytes
  64 bytes from ::1: icmp_seq=1 ttl=64 time=0.140 ms
  64 bytes from ::1: icmp_seq=2 ttl=64 time=0.029 ms
  ^C
  --- ::1 ping statistics ---
  2 packets transmitted, 2 received, 0% packet loss, time 1022ms
  rtt min/avg/max/mdev = 0.029/0.084/0.140/0.056 ms

Previously, the event was not triggered upon running ping.
Signed-off-by: default avatarSandipan Das <sandipan@linux.ibm.com>
parent 64a709da
...@@ -184,11 +184,50 @@ int bcc_elf_foreach_usdt(const char *path, bcc_elf_probecb callback, ...@@ -184,11 +184,50 @@ int bcc_elf_foreach_usdt(const char *path, bcc_elf_probecb callback,
return res; return res;
} }
static Elf_Scn * get_section(Elf *e, const char *section_name,
GElf_Shdr *section_hdr, size_t *section_idx) {
Elf_Scn *section = NULL;
GElf_Shdr header;
char *name;
size_t stridx;
if (elf_getshdrstrndx(e, &stridx) != 0)
return NULL;
size_t index;
for (index = 1; (section = elf_nextscn(e, section)) != 0; index++) {
if (!gelf_getshdr(section, &header))
continue;
name = elf_strptr(e, stridx, header.sh_name);
if (name && !strcmp(name, section_name)) {
if (section_hdr)
*section_hdr = header;
if (section_idx)
*section_idx = index;
return section;
}
}
return NULL;
}
static int list_in_scn(Elf *e, Elf_Scn *section, size_t stridx, size_t symsize, static int list_in_scn(Elf *e, Elf_Scn *section, size_t stridx, size_t symsize,
struct bcc_symbol_option *option, struct bcc_symbol_option *option,
bcc_elf_symcb callback, void *payload) { bcc_elf_symcb callback, void *payload) {
Elf_Data *data = NULL; Elf_Data *data = NULL;
#if defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
size_t opdidx = 0;
Elf_Scn *opdsec = NULL;
GElf_Shdr opdshdr = {};
Elf_Data *opddata = NULL;
opdsec = get_section(e, ".opd", &opdshdr, &opdidx);
if (opdsec && opdshdr.sh_type == SHT_PROGBITS)
opddata = elf_getdata(opdsec, NULL);
#endif
while ((data = elf_getdata(section, data)) != 0) { while ((data = elf_getdata(section, data)) != 0) {
size_t i, symcount = data->d_size / symsize; size_t i, symcount = data->d_size / symsize;
...@@ -214,6 +253,40 @@ static int list_in_scn(Elf *e, Elf_Scn *section, size_t stridx, size_t symsize, ...@@ -214,6 +253,40 @@ static int list_in_scn(Elf *e, Elf_Scn *section, size_t stridx, size_t symsize,
if (!(option->use_symbol_type & (1 << st_type))) if (!(option->use_symbol_type & (1 << st_type)))
continue; continue;
#ifdef __powerpc64__
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
if (opddata && sym.st_shndx == opdidx) {
size_t offset = sym.st_value - opdshdr.sh_addr;
/* Find the function descriptor */
uint64_t *descr = opddata->d_buf + offset;
/* Read the actual entry point address from the descriptor */
sym.st_value = *descr;
}
#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
if (option->use_symbol_type & (1 << STT_PPC64LE_SYM_LEP)) {
/*
* The PowerPC 64-bit ELF v2 ABI says that the 3 most significant bits
* in the st_other field of the symbol table specifies the number of
* instructions between a function's Global Entry Point (GEP) and Local
* Entry Point (LEP).
*/
switch (sym.st_other >> 5) {
/* GEP and LEP are the same for 0 or 1, usage is reserved for 7 */
/* If 2, LEP is 1 instruction past the GEP */
case 2: sym.st_value += 4; break;
/* If 3, LEP is 2 instructions past the GEP */
case 3: sym.st_value += 8; break;
/* If 4, LEP is 4 instructions past the GEP */
case 4: sym.st_value += 16; break;
/* If 5, LEP is 8 instructions past the GEP */
case 5: sym.st_value += 32; break;
/* If 6, LEP is 16 instructions past the GEP */
case 6: sym.st_value += 64; break;
}
}
#endif
#endif
if (callback(name, sym.st_value, sym.st_size, payload) < 0) if (callback(name, sym.st_value, sym.st_size, payload) < 0)
return 1; // signal termination to caller return 1; // signal termination to caller
} }
...@@ -248,24 +321,9 @@ static int listsymbols(Elf *e, bcc_elf_symcb callback, void *payload, ...@@ -248,24 +321,9 @@ static int listsymbols(Elf *e, bcc_elf_symcb callback, void *payload,
} }
static Elf_Data * get_section_elf_data(Elf *e, const char *section_name) { static Elf_Data * get_section_elf_data(Elf *e, const char *section_name) {
Elf_Scn *section = NULL; Elf_Scn *section = get_section(e, section_name, NULL, NULL);
GElf_Shdr header; if (section)
char *name; return elf_getdata(section, NULL);
size_t stridx;
if (elf_getshdrstrndx(e, &stridx) != 0)
return NULL;
while ((section = elf_nextscn(e, section)) != 0) {
if (!gelf_getshdr(section, &header))
continue;
name = elf_strptr(e, stridx, header.sh_name);
if (name && !strcmp(name, section_name)) {
return elf_getdata(section, NULL);
}
}
return NULL; return NULL;
} }
......
...@@ -500,7 +500,11 @@ int bcc_resolve_symname(const char *module, const char *symname, ...@@ -500,7 +500,11 @@ int bcc_resolve_symname(const char *module, const char *symname,
static struct bcc_symbol_option default_option = { static struct bcc_symbol_option default_option = {
.use_debug_file = 1, .use_debug_file = 1,
.check_debug_file_crc = 1, .check_debug_file_crc = 1,
#if defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
.use_symbol_type = BCC_SYM_ALL_TYPES | (1 << STT_PPC64LE_SYM_LEP),
#else
.use_symbol_type = BCC_SYM_ALL_TYPES, .use_symbol_type = BCC_SYM_ALL_TYPES,
#endif
}; };
if (module == NULL) if (module == NULL)
......
...@@ -34,6 +34,13 @@ typedef int (*SYM_CB)(const char *symname, uint64_t addr); ...@@ -34,6 +34,13 @@ typedef int (*SYM_CB)(const char *symname, uint64_t addr);
#ifndef STT_GNU_IFUNC #ifndef STT_GNU_IFUNC
#define STT_GNU_IFUNC 10 #define STT_GNU_IFUNC 10
#endif #endif
#if defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
// Indicate if the Local Entry Point (LEP) should be used as a symbol's
// start address
#define STT_PPC64LE_SYM_LEP 31
#endif
static const uint32_t BCC_SYM_ALL_TYPES = 65535; static const uint32_t BCC_SYM_ALL_TYPES = 65535;
struct bcc_symbol_option { struct bcc_symbol_option {
int use_debug_file; int use_debug_file;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment