Commit 9e83c11f authored by Austin Clements's avatar Austin Clements

runtime/pprof: expand inlined frames in symbolized proto profiles

Currently proto symbolization uses runtime.FuncForPC and assumes each
PC maps to a single frame. This isn't true in the presence of inlining
(even with leaf-only inlining this can get incorrect results).

Change PC symbolization to use runtime.CallersFrames to expand each PC
to all of the frames at that PC.

Change-Id: I8d20dff7495a5de495ae07f569122c225d433ced
Reviewed-on: https://go-review.googlesource.com/41256
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarMichael Matloob <matloob@golang.org>
parent 1dc0f969
...@@ -400,9 +400,7 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro ...@@ -400,9 +400,7 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro
locs = locs[:0] locs = locs[:0]
for _, addr := range p.Stack(index[k]) { for _, addr := range p.Stack(index[k]) {
// For count profiles, all stack addresses are // For count profiles, all stack addresses are
// return PCs. Adjust them to be call PCs for // return PCs, which is what locForPC expects.
// locForPC.
addr--
locs = append(locs, b.locForPC(addr)) locs = append(locs, b.locForPC(addr))
} }
b.pbSample(values, locs, nil) b.pbSample(values, locs, nil)
......
...@@ -87,6 +87,33 @@ func TestCPUProfileMultithreaded(t *testing.T) { ...@@ -87,6 +87,33 @@ func TestCPUProfileMultithreaded(t *testing.T) {
}) })
} }
func TestCPUProfileInlining(t *testing.T) {
testCPUProfile(t, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, func(dur time.Duration) {
cpuHogger(inlinedCaller, dur)
})
}
func inlinedCaller() {
inlinedCallee()
}
func inlinedCallee() {
// We could just use cpuHog1, but for loops prevent inlining
// right now. :(
foo := salt1
i := 0
loop:
if foo > 0 {
foo *= foo
} else {
foo *= foo + 1
}
if i++; i < 1e5 {
goto loop
}
salt1 = foo
}
func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) { func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) {
p, err := profile.Parse(bytes.NewReader(valBytes)) p, err := profile.Parse(bytes.NewReader(valBytes))
if err != nil { if err != nil {
......
...@@ -43,7 +43,7 @@ type profileBuilder struct { ...@@ -43,7 +43,7 @@ type profileBuilder struct {
strings []string strings []string
stringMap map[string]int stringMap map[string]int
locs map[uintptr]int locs map[uintptr]int
funcs map[*runtime.Func]int funcs map[string]int // Package path-qualified function name to Function.ID
mem []memMap mem []memMap
} }
...@@ -183,24 +183,60 @@ func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file ...@@ -183,24 +183,60 @@ func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file
} }
// locForPC returns the location ID for addr. // locForPC returns the location ID for addr.
// addr must be a call address (not a return address). // addr must be a return PC. This returns the location of the call.
// It may emit to b.pb, so there must be no message encoding in progress. // It may emit to b.pb, so there must be no message encoding in progress.
func (b *profileBuilder) locForPC(addr uintptr) uint64 { func (b *profileBuilder) locForPC(addr uintptr) uint64 {
id := uint64(b.locs[addr]) id := uint64(b.locs[addr])
if id != 0 { if id != 0 {
return id return id
} }
f := runtime.FuncForPC(addr)
if f != nil && f.Name() == "runtime.goexit" { // Expand this one address using CallersFrames so we can cache
// each expansion. In general, CallersFrames takes a whole
// stack, but in this case we know there will be no skips in
// the stack and we have return PCs anyway.
frames := runtime.CallersFrames([]uintptr{addr})
frame, more := frames.Next()
if frame.Function == "runtime.goexit" {
// Short-circuit if we see runtime.goexit so the loop
// below doesn't allocate a useless empty location.
return 0 return 0
} }
funcID, lineno := b.funcForPC(addr)
if frame.PC == 0 {
// If we failed to resolve the frame, at least make up
// a reasonable call PC. This mostly happens in tests.
frame.PC = addr - 1
}
// We can't write out functions while in the middle of the
// Location message, so record new functions we encounter and
// write them out after the Location.
type newFunc struct {
id uint64
name, file string
}
newFuncs := make([]newFunc, 0, 8)
id = uint64(len(b.locs)) + 1 id = uint64(len(b.locs)) + 1
b.locs[addr] = int(id) b.locs[addr] = int(id)
start := b.pb.startMessage() start := b.pb.startMessage()
b.pb.uint64Opt(tagLocation_ID, id) b.pb.uint64Opt(tagLocation_ID, id)
b.pb.uint64Opt(tagLocation_Address, uint64(addr)) b.pb.uint64Opt(tagLocation_Address, uint64(frame.PC))
b.pbLine(tagLocation_Line, funcID, int64(lineno)) for frame.Function != "runtime.goexit" {
// Write out each line in frame expansion.
funcID := uint64(b.funcs[frame.Function])
if funcID == 0 {
funcID = uint64(len(b.funcs)) + 1
b.funcs[frame.Function] = int(funcID)
newFuncs = append(newFuncs, newFunc{funcID, frame.Function, frame.File})
}
b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
if !more {
break
}
frame, more = frames.Next()
}
if len(b.mem) > 0 { if len(b.mem) > 0 {
i := sort.Search(len(b.mem), func(i int) bool { i := sort.Search(len(b.mem), func(i int) bool {
return b.mem[i].end > addr return b.mem[i].end > addr
...@@ -210,34 +246,19 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 { ...@@ -210,34 +246,19 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 {
} }
} }
b.pb.endMessage(tagProfile_Location, start) b.pb.endMessage(tagProfile_Location, start)
b.flush()
return id
}
// funcForPC returns the func ID and line number for addr. // Write out functions we found during frame expansion.
// It may emit to b.pb, so there must be no message encoding in progress. for _, fn := range newFuncs {
func (b *profileBuilder) funcForPC(addr uintptr) (funcID uint64, lineno int) { start := b.pb.startMessage()
f := runtime.FuncForPC(addr) b.pb.uint64Opt(tagFunction_ID, fn.id)
if f == nil { b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
return 0, 0 b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
} b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
file, lineno := f.FileLine(addr) b.pb.endMessage(tagProfile_Function, start)
funcID = uint64(b.funcs[f])
if funcID != 0 {
return funcID, lineno
} }
funcID = uint64(len(b.funcs)) + 1
b.funcs[f] = int(funcID)
name := f.Name()
start := b.pb.startMessage()
b.pb.uint64Opt(tagFunction_ID, funcID)
b.pb.int64Opt(tagFunction_Name, b.stringIndex(name))
b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(name))
b.pb.int64Opt(tagFunction_Filename, b.stringIndex(file))
b.pb.endMessage(tagProfile_Function, start)
b.flush() b.flush()
return funcID, lineno return id
} }
// newProfileBuilder returns a new profileBuilder. // newProfileBuilder returns a new profileBuilder.
...@@ -253,7 +274,7 @@ func newProfileBuilder(w io.Writer) *profileBuilder { ...@@ -253,7 +274,7 @@ func newProfileBuilder(w io.Writer) *profileBuilder {
strings: []string{""}, strings: []string{""},
stringMap: map[string]int{"": 0}, stringMap: map[string]int{"": 0},
locs: map[uintptr]int{}, locs: map[uintptr]int{},
funcs: map[*runtime.Func]int{}, funcs: map[string]int{},
} }
b.readMapping() b.readMapping()
return b return b
...@@ -351,11 +372,14 @@ func (b *profileBuilder) build() error { ...@@ -351,11 +372,14 @@ func (b *profileBuilder) build() error {
locs = locs[:0] locs = locs[:0]
for i, addr := range e.stk { for i, addr := range e.stk {
// Addresses from stack traces point to the next instruction after // Addresses from stack traces point to the
// each call. Adjust by -1 to land somewhere on the actual call // next instruction after each call, except
// (except for the leaf, which is not a call). // for the leaf, which points to where the
if i > 0 { // signal occurred. locForPC expects return
addr-- // PCs, so increment the leaf address to look
// like a return PC.
if i == 0 {
addr++
} }
l := b.locForPC(addr) l := b.locForPC(addr)
if l == 0 { // runtime.goexit if l == 0 { // runtime.goexit
......
...@@ -29,9 +29,8 @@ func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64) error ...@@ -29,9 +29,8 @@ func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64) error
for tries := 0; tries < 2; tries++ { for tries := 0; tries < 2; tries++ {
for _, addr := range r.Stack() { for _, addr := range r.Stack() {
// For heap profiles, all stack // For heap profiles, all stack
// addresses are return PCs. Adjust // addresses are return PCs, which is
// them to be call PCs for locForPC. // what locForPC expects.
addr--
if hideRuntime { if hideRuntime {
if f := runtime.FuncForPC(addr); f != nil && strings.HasPrefix(f.Name(), "runtime.") { if f := runtime.FuncForPC(addr); f != nil && strings.HasPrefix(f.Name(), "runtime.") {
continue continue
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment