Commit 5f37b160 authored by Marius Wachtler's avatar Marius Wachtler

perf: add a script which allows 'perf report' to disassemble JITed functions

parent a5488ae7
...@@ -819,7 +819,7 @@ endef ...@@ -819,7 +819,7 @@ endef
.PHONY: perf_report .PHONY: perf_report
perf_report: perf_report:
perf report -n perf report -n --objdump=tools/perf_jit.py
.PHONY: run run_% dbg_% debug_% perf_% .PHONY: run run_% dbg_% debug_% perf_%
run: run_dbg run: run_dbg
......
...@@ -11,6 +11,8 @@ corresponding flags to dump the necessary output, and the collect and process it ...@@ -11,6 +11,8 @@ corresponding flags to dump the necessary output, and the collect and process it
There's a tool called annotate.py in the tools/ directory that can combine the results of perf and data dumped from the There's a tool called annotate.py in the tools/ directory that can combine the results of perf and data dumped from the
run, to get instruction-level profiles; this is supported directly in perf for non-JIT'd functions, but I couldn't run, to get instruction-level profiles; this is supported directly in perf for non-JIT'd functions, but I couldn't
figure out another way to get it working for JIT'd ones. figure out another way to get it working for JIT'd ones.
We also have a script which allows 'perf report' to display bjit and LLVM jited functions directly, it works by pretending to be objdump
so the usage is: perf report --objdump=tools/perf_jit.py
Note: this tool will show the *final* assembly code that was output, ie with all the patchpoints filled in with whatever Note: this tool will show the *final* assembly code that was output, ie with all the patchpoints filled in with whatever
code they had at the exit of the program. code they had at the exit of the program.
......
...@@ -303,7 +303,7 @@ void ASTInterpreter::startJITing(CFGBlock* block, int exit_offset) { ...@@ -303,7 +303,7 @@ void ASTInterpreter::startJITing(CFGBlock* block, int exit_offset) {
code_block = code_blocks[code_blocks.size() - 1].get(); code_block = code_blocks[code_blocks.size() - 1].get();
if (!code_block || code_block->shouldCreateNewBlock()) { if (!code_block || code_block->shouldCreateNewBlock()) {
code_blocks.push_back(std::unique_ptr<JitCodeBlock>(new JitCodeBlock(source_info->getName()->s()))); code_blocks.push_back(llvm::make_unique<JitCodeBlock>(source_info->getName()->s()));
code_block = code_blocks[code_blocks.size() - 1].get(); code_block = code_blocks[code_blocks.size() - 1].get();
exit_offset = 0; exit_offset = 0;
} }
......
...@@ -72,6 +72,8 @@ JitCodeBlock::MemoryManager::MemoryManager() { ...@@ -72,6 +72,8 @@ JitCodeBlock::MemoryManager::MemoryManager() {
JitCodeBlock::MemoryManager::~MemoryManager() { JitCodeBlock::MemoryManager::~MemoryManager() {
munmap(addr, JitCodeBlock::memory_size); munmap(addr, JitCodeBlock::memory_size);
addr = NULL; addr = NULL;
RELEASE_ASSERT(0, "we have to unregister this block from g.func_addr_registry");
} }
JitCodeBlock::JitCodeBlock(llvm::StringRef name) JitCodeBlock::JitCodeBlock(llvm::StringRef name)
...@@ -111,7 +113,9 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name) ...@@ -111,7 +113,9 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name)
registerDynamicEhFrame((uint64_t)code, code_size, (uint64_t)eh_frame_addr, size - 4); registerDynamicEhFrame((uint64_t)code, code_size, (uint64_t)eh_frame_addr, size - 4);
registerEHFrames((uint8_t*)eh_frame_addr, (uint64_t)eh_frame_addr, size); registerEHFrames((uint8_t*)eh_frame_addr, (uint64_t)eh_frame_addr, size);
g.func_addr_registry.registerFunction(("bjit_" + name).str(), code, code_size, NULL); static int num_block = 0;
auto unique_name = ("bjit_" + name + "_" + llvm::Twine(num_block++)).str();
g.func_addr_registry.registerFunction(unique_name, code, code_size, NULL);
} }
std::unique_ptr<JitFragmentWriter> JitCodeBlock::newFragment(CFGBlock* block, int patch_jump_offset) { std::unique_ptr<JitFragmentWriter> JitCodeBlock::newFragment(CFGBlock* block, int patch_jump_offset) {
......
...@@ -9,15 +9,33 @@ def get_objdump(func): ...@@ -9,15 +9,33 @@ def get_objdump(func):
for l in open(args.perf_map_dir + "/index.txt"): for l in open(args.perf_map_dir + "/index.txt"):
addr, this_func = l.split() addr, this_func = l.split()
if this_func == func: if this_func == func:
# print ' '.join(["objdump", "-b", "binary", "-m", "i386", "-D", "perf_map/" + func, "--adjust-vma=0x%s" % addr]) obj_args = ["objdump", "-b", "binary", "-m", "i386:x86-64", "-D", args.perf_map_dir + "/" + func, "--adjust-vma=0x%s" % addr]
p = subprocess.Popen(["objdump", "-b", "binary", "-m", "i386:x86-64", "-D", args.perf_map_dir + "/" + func, "--adjust-vma=0x%s" % addr], stdout=subprocess.PIPE) if not args.print_raw_bytes:
obj_args += ["--no-show-raw"]
p = subprocess.Popen(obj_args, stdout=subprocess.PIPE)
r = p.communicate()[0] r = p.communicate()[0]
assert p.wait() == 0 assert p.wait() == 0
return r return r
raise Exception("Couldn't find function %r to objdump" % func) raise Exception("Couldn't find function %r to objdump" % func)
def getNameForAddr(addr):
for l in open(args.perf_map_dir + "/index.txt"):
this_addr, this_func = l.split()
if int(this_addr, 16) == addr:
return this_func
raise Exception("Couldn't find function with addr %x" % addr)
_symbols = None _symbols = None
def demangle(sym):
if os.path.exists("tools/demangle"):
demangled = commands.getoutput("tools/demangle %s" % sym)
if demangled == "Error: unable to demangle":
demangled = sym
else:
demangled = commands.getoutput("c++filt %s" % sym)
return demangled
def lookupAsSymbol(n): def lookupAsSymbol(n):
global _symbols global _symbols
if _symbols is None: if _symbols is None:
...@@ -31,12 +49,11 @@ def lookupAsSymbol(n): ...@@ -31,12 +49,11 @@ def lookupAsSymbol(n):
if not sym: if not sym:
return sym return sym
demangled = None if sym.startswith('_'):
if sym.startswith('_') and os.path.exists("tools/demangle"): demangled = demangle(sym)
demangled = commands.getoutput("tools/demangle %s" % sym) # perf report does not like '<'
if demangled != "Error: unable to demangle": return demangled.replace("<", "_")
return demangled return sym
return sym + "()"
_heap_proc = None _heap_proc = None
heapmap_args = None heapmap_args = None
...@@ -81,21 +98,41 @@ def lookupAsHeapAddr(n): ...@@ -81,21 +98,41 @@ def lookupAsHeapAddr(n):
def lookupConstant(n): def lookupConstant(n):
sym = lookupAsSymbol(n) sym = lookupAsSymbol(n)
if sym: if sym:
return "# " + sym return "; " + sym
heap = lookupAsHeapAddr(n) heap = lookupAsHeapAddr(n)
if heap: if heap:
return "# " + heap return "; " + heap
return "" return ""
def getCommentForInst(inst):
patterns = ["movabs \\$0x([0-9a-f]+),",
"mov \\$0x([0-9a-f]+),",
"cmpq \\$0x([0-9a-f]+),",
"callq 0x([0-9a-f]+)",
]
for pattern in patterns:
m = re.search(pattern, inst)
if m:
n = int(m.group(1), 16)
if n:
return lookupConstant(n)
return None
def printLine(count, inst, extra = ""):
if args.print_perf_counts:
print str(count).rjust(8),
print inst.ljust(70), extra
if __name__ == "__main__": if __name__ == "__main__":
# TODO: if it's not passed, maybe default to annotating the # TODO: if it's not passed, maybe default to annotating the
# first function in the profile (the one in which the plurality of # first function in the profile (the one in which the plurality of
# the time is spent)? # the time is spent)?
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("func_name", metavar="FUNC_NAME") parser.add_argument("func_name", metavar="FUNC_NAME_OR_ADDR", help="name or address of function to inspect")
parser.add_argument("--collapse-nops", dest="collapse_nops", action="store", default=5, type=int) parser.add_argument("--collapse-nops", dest="collapse_nops", action="store", default=5, type=int)
parser.add_argument("--no-collapse-nops", dest="collapse_nops", action="store_false") parser.add_argument("--no-collapse-nops", dest="collapse_nops", action="store_false")
parser.add_argument("--heap-map-args", nargs='+', help=""" parser.add_argument("--heap-map-args", nargs='+', help="""
...@@ -109,9 +146,18 @@ equivalent to '--heap-map-args ./pyston_release -i BENCHMARK'. ...@@ -109,9 +146,18 @@ equivalent to '--heap-map-args ./pyston_release -i BENCHMARK'.
""".strip()) """.strip())
parser.add_argument("--perf-data", default="perf.data") parser.add_argument("--perf-data", default="perf.data")
parser.add_argument("--perf-map-dir", default="perf_map") parser.add_argument("--perf-map-dir", default="perf_map")
parser.add_argument("--print-raw-bytes", default=True, action='store_true')
parser.add_argument("--no-print-raw-bytes", dest="print_raw_bytes", action='store_false')
parser.add_argument("--print-perf-counts", default=True, action='store_true')
parser.add_argument("--no-print-perf-counts", dest="print_perf_counts", action='store_false')
args = parser.parse_args() args = parser.parse_args()
func = args.func_name if args.func_name.lower().startswith("0x"):
addr = int(args.func_name, 16)
func = getNameForAddr(addr)
else:
func = args.func_name
if args.heap_map_args: if args.heap_map_args:
heapmap_args = args.heap_map_args heapmap_args = args.heap_map_args
elif args.heap_map_target: elif args.heap_map_target:
...@@ -134,39 +180,26 @@ equivalent to '--heap-map-args ./pyston_release -i BENCHMARK'. ...@@ -134,39 +180,26 @@ equivalent to '--heap-map-args ./pyston_release -i BENCHMARK'.
counts[addr] = int(count) counts[addr] = int(count)
nops = None # (count, num, start, end) nop_lines = [] # list of tuple(int(count), str(line))
for l in objdump.split('\n')[7:]: for l in objdump.split('\n')[7:]:
addr = l.split(':')[0] addr = l.split(':')[0]
count = counts.pop(addr.strip(), 0) count = counts.pop(addr.strip(), 0)
extra = getCommentForInst(l) or ""
extra = ""
m = re.search("movabs \\$0x([0-9a-f]{4,}),", l)
if m:
n = int(m.group(1), 16)
extra = lookupConstant(n)
m = re.search("mov \\$0x([0-9a-f]{4,}),", l)
if m:
n = int(m.group(1), 16)
extra = lookupConstant(n)
if args.collapse_nops and l.endswith("\tnop"): if args.collapse_nops and l.endswith("\tnop"):
addr = l.split()[0][:-1] nop_lines.append((count, l))
if not nops:
nops = (count, 1, addr, addr)
else:
nops = (nops[0] + count, nops[1] + 1, nops[2], addr)
else: else:
if nops: if len(nop_lines):
if int(nops[3], 16) - int(nops[2], 16) + 1 <= args.collapse_nops: if len(nop_lines) <= args.collapse_nops:
nop_count = nops[0] for nop in nop_lines:
for addr in xrange(int(nops[2], 16), int(nops[3], 16) + 1): printLine(nop[0], nop[1])
print str(nop_count).rjust(8), (" %s nop" % ("%x: 90" % addr).ljust(29)).ljust(70)
nop_count = 0
else: else:
print str(nops[0]).rjust(8), (" %s nop*%d" % (("%s-%s" % (nops[2], nops[3])).ljust(29), nops[1])).ljust(70) sum_count = sum([nop[0] for nop in nop_lines])
nops = None addr_start = int(nop_lines[0][1].split(':')[0], 16)
print str(count).rjust(8), l.ljust(70), extra addr_end = int(nop_lines[-1][1].split(':')[0], 16)
addr_range = ("%x-%x" % (addr_start, addr_end)).ljust(29)
printLine(sum_count, " %s nop*%d" % (addr_range, len(nop_lines)))
nop_lines = []
printLine(count, l, extra)
assert not counts, counts assert not counts, counts
#!/usr/bin/python2
import sys, subprocess
if "/tmp/perf-" not in sys.argv[-1]:
subprocess.check_call(["objdump"] + sys.argv[1:])
else:
for arg in sys.argv:
if "--start-address=" in arg:
start_addr = int(arg[len("--start-address="):], 16)
subprocess.check_call(["python", "tools/annotate.py", "--no-print-raw-bytes", "--no-print-perf-counts", "0x%x" % start_addr])
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment