Commit 5f37b160 authored by Marius Wachtler's avatar Marius Wachtler

perf: add a script which allows 'perf report' to disassemble JITed functions

parent a5488ae7
......@@ -819,7 +819,7 @@ endef
.PHONY: perf_report
perf_report:
perf report -n
perf report -n --objdump=tools/perf_jit.py
.PHONY: run run_% dbg_% debug_% perf_%
run: run_dbg
......
......@@ -11,6 +11,8 @@ corresponding flags to dump the necessary output, and the collect and process it
There's a tool called annotate.py in the tools/ directory that can combine the results of perf and data dumped from the
run, to get instruction-level profiles; this is supported directly in perf for non-JIT'd functions, but I couldn't
figure out another way to get it working for JIT'd ones.
We also have a script which allows 'perf report' to display bjit and LLVM jited functions directly, it works by pretending to be objdump
so the usage is: perf report --objdump=tools/perf_jit.py
Note: this tool will show the *final* assembly code that was output, ie with all the patchpoints filled in with whatever
code they had at the exit of the program.
......
......@@ -303,7 +303,7 @@ void ASTInterpreter::startJITing(CFGBlock* block, int exit_offset) {
code_block = code_blocks[code_blocks.size() - 1].get();
if (!code_block || code_block->shouldCreateNewBlock()) {
code_blocks.push_back(std::unique_ptr<JitCodeBlock>(new JitCodeBlock(source_info->getName()->s())));
code_blocks.push_back(llvm::make_unique<JitCodeBlock>(source_info->getName()->s()));
code_block = code_blocks[code_blocks.size() - 1].get();
exit_offset = 0;
}
......
......@@ -72,6 +72,8 @@ JitCodeBlock::MemoryManager::MemoryManager() {
JitCodeBlock::MemoryManager::~MemoryManager() {
munmap(addr, JitCodeBlock::memory_size);
addr = NULL;
RELEASE_ASSERT(0, "we have to unregister this block from g.func_addr_registry");
}
JitCodeBlock::JitCodeBlock(llvm::StringRef name)
......@@ -111,7 +113,9 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name)
registerDynamicEhFrame((uint64_t)code, code_size, (uint64_t)eh_frame_addr, size - 4);
registerEHFrames((uint8_t*)eh_frame_addr, (uint64_t)eh_frame_addr, size);
g.func_addr_registry.registerFunction(("bjit_" + name).str(), code, code_size, NULL);
static int num_block = 0;
auto unique_name = ("bjit_" + name + "_" + llvm::Twine(num_block++)).str();
g.func_addr_registry.registerFunction(unique_name, code, code_size, NULL);
}
std::unique_ptr<JitFragmentWriter> JitCodeBlock::newFragment(CFGBlock* block, int patch_jump_offset) {
......
......@@ -9,15 +9,33 @@ def get_objdump(func):
for l in open(args.perf_map_dir + "/index.txt"):
addr, this_func = l.split()
if this_func == func:
# print ' '.join(["objdump", "-b", "binary", "-m", "i386", "-D", "perf_map/" + func, "--adjust-vma=0x%s" % addr])
p = subprocess.Popen(["objdump", "-b", "binary", "-m", "i386:x86-64", "-D", args.perf_map_dir + "/" + func, "--adjust-vma=0x%s" % addr], stdout=subprocess.PIPE)
obj_args = ["objdump", "-b", "binary", "-m", "i386:x86-64", "-D", args.perf_map_dir + "/" + func, "--adjust-vma=0x%s" % addr]
if not args.print_raw_bytes:
obj_args += ["--no-show-raw"]
p = subprocess.Popen(obj_args, stdout=subprocess.PIPE)
r = p.communicate()[0]
assert p.wait() == 0
return r
raise Exception("Couldn't find function %r to objdump" % func)
def getNameForAddr(addr):
for l in open(args.perf_map_dir + "/index.txt"):
this_addr, this_func = l.split()
if int(this_addr, 16) == addr:
return this_func
raise Exception("Couldn't find function with addr %x" % addr)
_symbols = None
def demangle(sym):
if os.path.exists("tools/demangle"):
demangled = commands.getoutput("tools/demangle %s" % sym)
if demangled == "Error: unable to demangle":
demangled = sym
else:
demangled = commands.getoutput("c++filt %s" % sym)
return demangled
def lookupAsSymbol(n):
global _symbols
if _symbols is None:
......@@ -31,12 +49,11 @@ def lookupAsSymbol(n):
if not sym:
return sym
demangled = None
if sym.startswith('_') and os.path.exists("tools/demangle"):
demangled = commands.getoutput("tools/demangle %s" % sym)
if demangled != "Error: unable to demangle":
return demangled
return sym + "()"
if sym.startswith('_'):
demangled = demangle(sym)
# perf report does not like '<'
return demangled.replace("<", "_")
return sym
_heap_proc = None
heapmap_args = None
......@@ -81,21 +98,41 @@ def lookupAsHeapAddr(n):
def lookupConstant(n):
sym = lookupAsSymbol(n)
if sym:
return "# " + sym
return "; " + sym
heap = lookupAsHeapAddr(n)
if heap:
return "# " + heap
return "; " + heap
return ""
def getCommentForInst(inst):
patterns = ["movabs \\$0x([0-9a-f]+),",
"mov \\$0x([0-9a-f]+),",
"cmpq \\$0x([0-9a-f]+),",
"callq 0x([0-9a-f]+)",
]
for pattern in patterns:
m = re.search(pattern, inst)
if m:
n = int(m.group(1), 16)
if n:
return lookupConstant(n)
return None
def printLine(count, inst, extra = ""):
if args.print_perf_counts:
print str(count).rjust(8),
print inst.ljust(70), extra
if __name__ == "__main__":
# TODO: if it's not passed, maybe default to annotating the
# first function in the profile (the one in which the plurality of
# the time is spent)?
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("func_name", metavar="FUNC_NAME")
parser.add_argument("func_name", metavar="FUNC_NAME_OR_ADDR", help="name or address of function to inspect")
parser.add_argument("--collapse-nops", dest="collapse_nops", action="store", default=5, type=int)
parser.add_argument("--no-collapse-nops", dest="collapse_nops", action="store_false")
parser.add_argument("--heap-map-args", nargs='+', help="""
......@@ -109,9 +146,18 @@ equivalent to '--heap-map-args ./pyston_release -i BENCHMARK'.
""".strip())
parser.add_argument("--perf-data", default="perf.data")
parser.add_argument("--perf-map-dir", default="perf_map")
parser.add_argument("--print-raw-bytes", default=True, action='store_true')
parser.add_argument("--no-print-raw-bytes", dest="print_raw_bytes", action='store_false')
parser.add_argument("--print-perf-counts", default=True, action='store_true')
parser.add_argument("--no-print-perf-counts", dest="print_perf_counts", action='store_false')
args = parser.parse_args()
func = args.func_name
if args.func_name.lower().startswith("0x"):
addr = int(args.func_name, 16)
func = getNameForAddr(addr)
else:
func = args.func_name
if args.heap_map_args:
heapmap_args = args.heap_map_args
elif args.heap_map_target:
......@@ -134,39 +180,26 @@ equivalent to '--heap-map-args ./pyston_release -i BENCHMARK'.
counts[addr] = int(count)
nops = None # (count, num, start, end)
nop_lines = [] # list of tuple(int(count), str(line))
for l in objdump.split('\n')[7:]:
addr = l.split(':')[0]
count = counts.pop(addr.strip(), 0)
extra = ""
m = re.search("movabs \\$0x([0-9a-f]{4,}),", l)
if m:
n = int(m.group(1), 16)
extra = lookupConstant(n)
m = re.search("mov \\$0x([0-9a-f]{4,}),", l)
if m:
n = int(m.group(1), 16)
extra = lookupConstant(n)
extra = getCommentForInst(l) or ""
if args.collapse_nops and l.endswith("\tnop"):
addr = l.split()[0][:-1]
if not nops:
nops = (count, 1, addr, addr)
else:
nops = (nops[0] + count, nops[1] + 1, nops[2], addr)
nop_lines.append((count, l))
else:
if nops:
if int(nops[3], 16) - int(nops[2], 16) + 1 <= args.collapse_nops:
nop_count = nops[0]
for addr in xrange(int(nops[2], 16), int(nops[3], 16) + 1):
print str(nop_count).rjust(8), (" %s nop" % ("%x: 90" % addr).ljust(29)).ljust(70)
nop_count = 0
if len(nop_lines):
if len(nop_lines) <= args.collapse_nops:
for nop in nop_lines:
printLine(nop[0], nop[1])
else:
print str(nops[0]).rjust(8), (" %s nop*%d" % (("%s-%s" % (nops[2], nops[3])).ljust(29), nops[1])).ljust(70)
nops = None
print str(count).rjust(8), l.ljust(70), extra
sum_count = sum([nop[0] for nop in nop_lines])
addr_start = int(nop_lines[0][1].split(':')[0], 16)
addr_end = int(nop_lines[-1][1].split(':')[0], 16)
addr_range = ("%x-%x" % (addr_start, addr_end)).ljust(29)
printLine(sum_count, " %s nop*%d" % (addr_range, len(nop_lines)))
nop_lines = []
printLine(count, l, extra)
assert not counts, counts
#!/usr/bin/python2
import sys, subprocess
if "/tmp/perf-" not in sys.argv[-1]:
subprocess.check_call(["objdump"] + sys.argv[1:])
else:
for arg in sys.argv:
if "--start-address=" in arg:
start_addr = int(arg[len("--start-address="):], 16)
subprocess.check_call(["python", "tools/annotate.py", "--no-print-raw-bytes", "--no-print-perf-counts", "0x%x" % start_addr])
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment