Commit 98535685 authored by Vicent Marti's avatar Vicent Marti

python: Use the native Symbol resolution APIs

The native resolvers are used for the Kernel Symbol resolution in the
`BPF` class, and for the ProcessSymbols class. A lot of redundant code
has been removed.
parent 26cc64bd
...@@ -25,7 +25,7 @@ import struct ...@@ -25,7 +25,7 @@ import struct
import sys import sys
basestring = (unicode if sys.version_info[0] < 3 else str) basestring = (unicode if sys.version_info[0] < 3 else str)
from .libbcc import lib, _CB_TYPE from .libbcc import lib, _CB_TYPE, bcc_symbol
from .procstat import ProcStat, ProcUtils from .procstat import ProcStat, ProcUtils
from .table import Table from .table import Table
from .tracepoint import Perf, Tracepoint from .tracepoint import Perf, Tracepoint
...@@ -35,10 +35,6 @@ open_kprobes = {} ...@@ -35,10 +35,6 @@ open_kprobes = {}
open_uprobes = {} open_uprobes = {}
tracefile = None tracefile = None
TRACEFS = "/sys/kernel/debug/tracing" TRACEFS = "/sys/kernel/debug/tracing"
KALLSYMS = "/proc/kallsyms"
ksyms = []
ksym_names = {}
ksym_loaded = 0
_kprobe_limit = 1000 _kprobe_limit = 1000
DEBUG_LLVM_IR = 0x1 DEBUG_LLVM_IR = 0x1
...@@ -67,6 +63,22 @@ def _check_probe_quota(num_new_probes): ...@@ -67,6 +63,22 @@ def _check_probe_quota(num_new_probes):
if len(open_kprobes) + len(open_uprobes) + num_new_probes > _kprobe_limit: if len(open_kprobes) + len(open_uprobes) + num_new_probes > _kprobe_limit:
raise Exception("Number of open probes would exceed quota") raise Exception("Number of open probes would exceed quota")
class KernelSymbolCache(object):
def __init__(self):
self.cache = lib.bcc_symcache_new(-1)
def resolve(self, addr):
sym = bcc_symbol()
psym = ct.pointer(sym)
if lib.bcc_symcache_resolve(self.cache, addr, psym) < 0:
return "[unknown]", 0
return sym.name, sym.offset
def resolve_name(self, name):
addr = ct.c_ulonglong()
if lib.bcc_symcache_resolve_name(self.cache, name, ct.pointer(addr)) < 0:
return -1
return addr.value
class BPF(object): class BPF(object):
SOCKET_FILTER = 1 SOCKET_FILTER = 1
...@@ -75,9 +87,7 @@ class BPF(object): ...@@ -75,9 +87,7 @@ class BPF(object):
SCHED_ACT = 4 SCHED_ACT = 4
_probe_repl = re.compile("[^a-zA-Z0-9_]") _probe_repl = re.compile("[^a-zA-Z0-9_]")
_libsearch_cache = {} _ksym_cache = KernelSymbolCache()
_lib_load_address_cache = {}
_lib_symbol_cache = {}
_auto_includes = { _auto_includes = {
"linux/time.h" : ["time"], "linux/time.h" : ["time"],
...@@ -413,84 +423,14 @@ class BPF(object): ...@@ -413,84 +423,14 @@ class BPF(object):
del open_kprobes[ev_name] del open_kprobes[ev_name]
@classmethod @classmethod
def find_library(cls, name): def _check_path_symbol(cls, module, symname, addr):
if name in cls._libsearch_cache: sym = bcc_symbol()
return cls._libsearch_cache[name] psym = ct.pointer(sym)
if lib.bcc_resolve_symname(module, symname, addr or 0x0, psym) < 0:
if struct.calcsize("l") == 4: if not sym.module:
machine = os.uname()[4] + "-32" raise Exception("could not find library %s" % module)
else: raise Exception("could not determine address of symbol %s" % symname)
machine = os.uname()[4] + "-64" return sym.module, sym.offset
mach_map = {
"x86_64-64": "libc6,x86-64",
"ppc64-64": "libc6,64bit",
"sparc64-64": "libc6,64bit",
"s390x-64": "libc6,64bit",
"ia64-64": "libc6,IA-64",
}
abi_type = mach_map.get(machine, "libc6")
expr = r"\s+lib%s\.[^\s]+\s+\(%s[^)]*[^/]+([^\s]+)" % (name, abi_type)
with os.popen("/sbin/ldconfig -p 2>/dev/null") as f:
data = f.read()
res = re.search(expr, data)
if not res:
return None
path = res.group(1)
cls._libsearch_cache[name] = path
return path
@classmethod
def find_load_address(cls, path):
if path in cls._lib_load_address_cache:
return cls._lib_load_address_cache[path]
# "LOAD off 0x0000000000000000 vaddr 0x0000000000400000 paddr 0x..."
with os.popen("""/usr/bin/objdump -x %s | \
awk '$1 == "LOAD" && $3 ~ /^[0x]*$/ \
{ print $5 }'""" % path) as f:
data = f.read().rstrip()
if not data:
return None
addr = int(data, 16)
cls._lib_load_address_cache[path] = addr
cls._lib_symbol_cache[path] = {}
return addr
@classmethod
def find_symbol(cls, path, sym):
# initialized in find_load_address
symbols = cls._lib_symbol_cache[path]
if sym in symbols:
return symbols[sym]
with os.popen("""/usr/bin/objdump -tT %s | \
awk -v sym=%s '$NF == sym && ($4 == ".text" \
|| $4 == "text.hot" || $4 == "text.unlikely") \
{ print $1; exit }'""" % (path, sym)) as f:
data = f.read().rstrip()
if not data:
return None
addr = int(data, 16)
symbols[sym] = addr
return addr
@classmethod
def _check_path_symbol(cls, name, sym, addr):
if name.startswith("/"):
path = name
else:
path = BPF.find_library(name)
if not path:
raise Exception("could not find library %s" % name)
path = os.path.realpath(path)
load_addr = BPF.find_load_address(path)
if not addr and sym:
addr = BPF.find_symbol(path, sym)
if not addr:
raise Exception("could not determine address of symbol %s" % sym)
return (path, addr-load_addr)
def attach_uprobe(self, name="", sym="", addr=None, def attach_uprobe(self, name="", sym="", addr=None,
fn_name="", pid=-1, cpu=0, group_fd=-1): fn_name="", pid=-1, cpu=0, group_fd=-1):
...@@ -678,52 +618,15 @@ class BPF(object): ...@@ -678,52 +618,15 @@ class BPF(object):
except KeyboardInterrupt: except KeyboardInterrupt:
exit() exit()
@staticmethod
def _load_kallsyms():
global ksym_loaded, ksyms, ksym_names
if ksym_loaded:
return
try:
syms = open(KALLSYMS, "r")
except:
raise Exception("Could not read %s" % KALLSYMS)
line = syms.readline()
for line in iter(syms):
cols = line.split()
name = cols[2]
addr = int(cols[0], 16)
# keep a mapping of names to ksyms index
ksym_names[name] = len(ksyms)
ksyms.append((name, addr))
syms.close()
ksym_loaded = 1
@staticmethod
def _ksym_addr2index(addr):
global ksyms
start = -1
end = len(ksyms)
while end != start + 1:
mid = int((start + end) / 2)
if addr < ksyms[mid][1]:
end = mid
else:
start = mid
return start
@staticmethod @staticmethod
def ksym(addr): def ksym(addr):
"""ksym(addr) """ksym(addr)
Translate a kernel memory address into a kernel function name, which is Translate a kernel memory address into a kernel function name, which is
returned. This is a simple translator that uses /proc/kallsyms. returned.
""" """
global ksyms name, _ = BPF._ksym_cache.resolve(addr)
BPF._load_kallsyms() return name
idx = BPF._ksym_addr2index(addr)
if idx == -1:
return "[unknown]"
return ksyms[idx][0]
@staticmethod @staticmethod
def ksymaddr(addr): def ksymaddr(addr):
...@@ -731,15 +634,10 @@ class BPF(object): ...@@ -731,15 +634,10 @@ class BPF(object):
Translate a kernel memory address into a kernel function name plus the Translate a kernel memory address into a kernel function name plus the
instruction offset as a hexidecimal number, which is returned as a instruction offset as a hexidecimal number, which is returned as a
string. This is a simple translator that uses /proc/kallsyms. string.
""" """
global ksyms name, offset = BPF._ksym_cache.resolve(addr)
BPF._load_kallsyms() return "%s+0x%x" % (name, offset)
idx = BPF._ksym_addr2index(addr)
if idx == -1:
return "[unknown]"
offset = int(addr - ksyms[idx][1])
return "%s+0x%x" % (ksyms[idx][0], offset)
@staticmethod @staticmethod
def ksymname(name): def ksymname(name):
...@@ -747,35 +645,7 @@ class BPF(object): ...@@ -747,35 +645,7 @@ class BPF(object):
Translate a kernel name into an address. This is the reverse of Translate a kernel name into an address. This is the reverse of
ksymaddr. Returns -1 when the function name is unknown.""" ksymaddr. Returns -1 when the function name is unknown."""
return BPF._ksym_cache.resolve_name(name)
global ksyms, ksym_names
BPF._load_kallsyms()
idx = ksym_names.get(name, -1)
if idx == -1:
return 0
return ksyms[idx][1]
@classmethod
def usymaddr(cls, pid, addr, refresh_symbols=False):
"""usymaddr(pid, addr, refresh_symbols=False)
Decode the specified address in the specified process to a symbolic
representation that includes the symbol name, offset within the symbol,
and the module name. See the ProcessSymbols class for more details.
Specify refresh_symbols=True if you suspect the set of loaded modules
or their load addresses has changed since the last time you called
usymaddr() on this pid.
"""
proc_sym = None
if pid in cls._process_symbols:
proc_sym = cls._process_symbols[pid]
if refresh_symbols:
proc_sym.refresh_code_ranges()
else:
proc_sym = ProcessSymbols(pid)
cls._process_symbols[pid] = proc_sym
return proc_sym.decode_addr(addr)
@staticmethod @staticmethod
def num_open_kprobes(): def num_open_kprobes():
......
...@@ -11,8 +11,8 @@ ...@@ -11,8 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import ctypes as ct
from subprocess import Popen, PIPE, STDOUT from .libbcc import lib, bcc_symbol
class ProcessSymbols(object): class ProcessSymbols(object):
def __init__(self, pid): def __init__(self, pid):
...@@ -21,79 +21,10 @@ class ProcessSymbols(object): ...@@ -21,79 +21,10 @@ class ProcessSymbols(object):
Call refresh_code_ranges() periodically if you anticipate changes Call refresh_code_ranges() periodically if you anticipate changes
in the set of loaded libraries or their addresses. in the set of loaded libraries or their addresses.
""" """
self.pid = pid self.cache = lib.bcc_symcache_new(pid)
self.refresh_code_ranges()
def refresh_code_ranges(self): def refresh_code_ranges(self):
self.code_ranges = self._get_code_ranges() lib.bcc_symcache_refresh(self.cache)
self.ranges_cache = {}
self.procstat = ProcStat(self.pid)
@staticmethod
def _is_binary_segment(parts):
return len(parts) == 6 and parts[5][0] != '[' and 'x' in parts[1]
def _get_code_ranges(self):
ranges = {}
raw_ranges = open("/proc/%d/maps" % self.pid).readlines()
# A typical line from /proc/PID/maps looks like this:
# 7f21b6635000-7f21b67eb000 r-xp ... /usr/lib64/libc-2.21.so
# We are looking for executable segments that have a .so file
# or the main executable. The first two lines are the range of
# that memory segment, which we index by binary name.
for raw_range in raw_ranges:
parts = raw_range.split()
if not ProcessSymbols._is_binary_segment(parts):
continue
binary = parts[5]
range_parts = parts[0].split('-')
addr_range = (int(range_parts[0], 16), int(range_parts[1], 16))
ranges[binary] = addr_range
return ranges
@staticmethod
def _is_function_symbol(parts):
return len(parts) == 6 and parts[3] == ".text" and parts[2] == "F"
@staticmethod
def _run_command_get_output(command):
p = Popen(command.split(), stdout=PIPE, stderr=STDOUT)
return iter(p.stdout.readline, b'')
def _get_sym_ranges(self, binary):
if binary in self.ranges_cache:
return self.ranges_cache[binary]
sym_ranges = {}
raw_symbols = ProcessSymbols._run_command_get_output(
"objdump -t %s" % binary)
for raw_symbol in raw_symbols:
# A typical line from objdump -t looks like this:
# 00000000004007f5 g F .text 000000000000010e main
# We only care about functions in the .text segment.
# The first number is the start address, and the second
# number is the length.
parts = raw_symbol.split()
if not ProcessSymbols._is_function_symbol(parts):
continue
sym_start = int(parts[0], 16)
sym_len = int(parts[4], 16)
sym_name = parts[5]
sym_ranges[sym_name] = (sym_start, sym_len)
self.ranges_cache[binary] = sym_ranges
return sym_ranges
def _decode_sym(self, binary, offset):
sym_ranges = self._get_sym_ranges(binary)
# Find the symbol that contains the specified offset.
# There might not be one.
for name, (start, length) in sym_ranges.items():
if offset >= start and offset <= (start + length):
return "%s+0x%x" % (name, offset - start)
return "%x" % offset
def _check_pid_wrap(self):
if self.procstat.is_stale():
self.refresh_code_ranges()
def decode_addr(self, addr): def decode_addr(self, addr):
""" """
...@@ -103,16 +34,10 @@ class ProcessSymbols(object): ...@@ -103,16 +34,10 @@ class ProcessSymbols(object):
the hex string and the module. If we do have a symbol for it, the hex string and the module. If we do have a symbol for it,
return the symbol and the module, e.g. "readline+0x10 [bash]". return the symbol and the module, e.g. "readline+0x10 [bash]".
""" """
self._check_pid_wrap() sym = bcc_symbol()
# Find the binary that contains the specified address. psym = ct.pointer(sym)
# For .so files, look at the relative address; for the main if lib.bcc_symcache_resolve(self.cache, addr, psym) < 0:
# executable, look at the absolute address. if sym.module and sym.offset:
for binary, (start, end) in self.code_ranges.items(): return "0x%x [%s]" % (sym.offset, sym.module)
if addr >= start and addr <= end: return "%x" % addr
offset = addr - start \ return "%s+0x%x [%s]" % (sym.name, sym.offset, sym.module)
if binary.endswith(".so") else addr
return "%s [%s]" % (self._decode_sym(binary, offset),
binary)
return "%x" % addr
from . import ProcStat
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment