Commit e25de44e authored by Hana (Hyang-Ah) Kim's avatar Hana (Hyang-Ah) Kim Committed by Hyang-Ah Hana Kim

runtime/pprof: correct inlined function location encoding for non-CPU profiles

Change-Id: Id270a3477bf1a581755c4311eb12f990aa2260b5
Reviewed-on: https://go-review.googlesource.com/c/go/+/205097
Run-TryBot: Hyang-Ah Hana Kim <hyangah@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent f5e89c22
......@@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !js
package pprof
import (
......@@ -10,6 +12,7 @@ import (
"reflect"
"regexp"
"runtime"
"runtime/pprof/internal/profile"
"testing"
"unsafe"
)
......@@ -27,6 +30,10 @@ func allocateTransient2M() {
memSink = make([]byte, 2<<20)
}
func allocateTransient2MInline() {
memSink = make([]byte, 2<<20)
}
type Obj32 struct {
link *Obj32
pad [32 - unsafe.Sizeof(uintptr(0))]byte
......@@ -71,42 +78,93 @@ func TestMemoryProfiler(t *testing.T) {
// Do the interesting allocations.
allocateTransient1M()
allocateTransient2M()
allocateTransient2MInline()
allocatePersistent1K()
allocateReflect()
memSink = nil
runtime.GC() // materialize stats
var buf bytes.Buffer
if err := Lookup("heap").WriteTo(&buf, 1); err != nil {
t.Fatalf("failed to write heap profile: %v", err)
}
memoryProfilerRun++
tests := []string{
fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime/pprof\.allocatePersistent1K\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:40
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:74
tests := []struct {
stk []string
legacy string
}{{
stk: []string{"runtime/pprof.allocatePersistent1K", "runtime/pprof.TestMemoryProfiler"},
legacy: fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime/pprof\.allocatePersistent1K\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:47
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:82
`, 32*memoryProfilerRun, 1024*memoryProfilerRun, 32*memoryProfilerRun, 1024*memoryProfilerRun),
fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient1M\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:21
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:72
}, {
stk: []string{"runtime/pprof.allocateTransient1M", "runtime/pprof.TestMemoryProfiler"},
legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient1M\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:24
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:79
`, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun),
fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient2M\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:27
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:73
}, {
stk: []string{"runtime/pprof.allocateTransient2M", "runtime/pprof.TestMemoryProfiler"},
legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient2M\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:30
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:80
`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
fmt.Sprintf(`0: 0 \[%v: %v\] @( 0x[0-9,a-f]+)+
# 0x[0-9,a-f]+ runtime/pprof\.allocateReflectTransient\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:48
}, {
stk: []string{"runtime/pprof.allocateTransient2MInline", "runtime/pprof.TestMemoryProfiler"},
legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient2MInline\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:34
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:81
`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
}
}, {
stk: []string{"runtime/pprof.allocateReflectTransient"},
legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @( 0x[0-9,a-f]+)+
# 0x[0-9,a-f]+ runtime/pprof\.allocateReflectTransient\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:55
`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
}}
for _, test := range tests {
if !regexp.MustCompile(test).Match(buf.Bytes()) {
t.Fatalf("The entry did not match:\n%v\n\nProfile:\n%v\n", test, buf.String())
t.Run("debug=1", func(t *testing.T) {
var buf bytes.Buffer
if err := Lookup("heap").WriteTo(&buf, 1); err != nil {
t.Fatalf("failed to write heap profile: %v", err)
}
}
for _, test := range tests {
if !regexp.MustCompile(test.legacy).Match(buf.Bytes()) {
t.Fatalf("The entry did not match:\n%v\n\nProfile:\n%v\n", test.legacy, buf.String())
}
}
})
t.Run("proto", func(t *testing.T) {
var buf bytes.Buffer
if err := Lookup("heap").WriteTo(&buf, 0); err != nil {
t.Fatalf("failed to write heap profile: %v", err)
}
p, err := profile.Parse(&buf)
if err != nil {
t.Fatalf("failed to parse heap profile: %v", err)
}
t.Logf("Profile = %v", p)
stks := stacks(p)
for _, test := range tests {
if !containsStack(stks, test.stk) {
t.Fatalf("No matching stack entry for %q\n\nProfile:\n%v\n", test.stk, p)
}
}
// Check the inlined function location is encoded correctly.
for _, loc := range p.Location {
inlinedCaller, inlinedCallee := false, false
for _, line := range loc.Line {
if line.Function.Name == "runtime/pprof.allocateTransient2MInline" {
inlinedCallee = true
}
if inlinedCallee && line.Function.Name == "runtime/pprof.TestMemoryProfiler" {
inlinedCaller = true
}
}
if inlinedCallee != inlinedCaller {
t.Errorf("want allocateTransient2MInline after TestMemoryProfiler in one location, got separate location entries:\n%v", loc)
}
}
})
}
......@@ -386,16 +386,9 @@ func printCountCycleProfile(w io.Writer, countName, cycleName string, scaler fun
count, nanosec := scaler(r.Count, float64(r.Cycles)/cpuGHz)
values[0] = count
values[1] = int64(nanosec)
locs = locs[:0]
for _, addr := range r.Stack() {
// For count profiles, all stack addresses are
// return PCs, which is what locForPC expects.
l := b.locForPC(addr)
if l == 0 { // runtime.goexit
continue
}
locs = append(locs, l)
}
// For count profiles, all stack addresses are
// return PCs, which is what appendLocsForStack expects.
locs = b.appendLocsForStack(locs[:0], r.Stack())
b.pbSample(values, locs, nil)
}
b.build()
......@@ -451,16 +444,9 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro
var locs []uint64
for _, k := range keys {
values[0] = int64(count[k])
locs = locs[:0]
for _, addr := range p.Stack(index[k]) {
// For count profiles, all stack addresses are
// return PCs, which is what locForPC expects.
l := b.locForPC(addr)
if l == 0 { // runtime.goexit
continue
}
locs = append(locs, l)
}
// For count profiles, all stack addresses are
// return PCs, which is what appendLocsForStack expects.
locs = b.appendLocsForStack(locs[:0], p.Stack(index[k]))
b.pbSample(values, locs, nil)
}
b.build()
......
......@@ -44,6 +44,7 @@ type profileBuilder struct {
locs map[uintptr]locInfo // list of locInfo starting with the given PC.
funcs map[string]int // Package path-qualified function name to Function.ID
mem []memMap
deck pcDeck
}
type memMap struct {
......@@ -428,8 +429,6 @@ func (b *profileBuilder) build() {
}
values := []int64{0, 0}
var deck = &pcDeck{}
var locs []uint64
for e := b.m.all; e != nil; e = e.nextAll {
......@@ -445,61 +444,13 @@ func (b *profileBuilder) build() {
}
}
deck.reset()
locs = locs[:0]
// Addresses from stack traces point to the next instruction after each call,
// except for the leaf, which points to where the signal occurred.
// deck.add+emitLocation expects return PCs so increment the leaf address to
// appendLocsForStack expects return PCs so increment the leaf address to
// look like a return PC.
e.stk[0] += 1
for stk := e.stk; len(stk) > 0; {
addr := stk[0]
if l, ok := b.locs[addr]; ok {
// first record the location if there is any pending accumulated info.
if id := b.emitLocation(deck); id > 0 {
locs = append(locs, id)
}
// then, record the cached location.
locs = append(locs, l.id)
stk = stk[len(l.pcs):] // skip the matching pcs.
continue
}
frames, symbolizeResult := allFrames(addr)
if len(frames) == 0 { // runtime.goexit.
if id := b.emitLocation(deck); id > 0 {
locs = append(locs, id)
}
stk = stk[1:]
continue
}
if added := deck.tryAdd(addr, frames, symbolizeResult); added {
stk = stk[1:]
continue
}
// add failed because this addr is not inlined with
// the existing PCs in the deck. Flush the deck and retry to
// handle this pc.
if id := b.emitLocation(deck); id > 0 {
locs = append(locs, id)
}
// check cache again - previous emitLocation added a new entry
if l, ok := b.locs[addr]; ok {
locs = append(locs, l.id)
stk = stk[len(l.pcs):] // skip the matching pcs.
} else {
deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
stk = stk[1:]
}
}
if id := b.emitLocation(deck); id > 0 { // emit remaining location.
locs = append(locs, id)
}
e.stk[0] -= 1 // undo the adjustment on the leaf done before the loop.
locs = b.appendLocsForStack(locs[:0], e.stk)
e.stk[0] -= 1 // undo the adjustment on the leaf.
b.pbSample(values, locs, labels)
}
......@@ -517,6 +468,62 @@ func (b *profileBuilder) build() {
b.zw.Close()
}
// appendLocsForStack appends the location IDs for the given stack trace to the given
// location ID slice, locs. The addresses in the stack are return PCs or 1 + the PC of
// an inline marker as the runtime traceback function returns.
//
// It may emit to b.pb, so there must be no message encoding in progress.
func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) {
b.deck.reset()
for len(stk) > 0 {
addr := stk[0]
if l, ok := b.locs[addr]; ok {
// first record the location if there is any pending accumulated info.
if id := b.emitLocation(); id > 0 {
locs = append(locs, id)
}
// then, record the cached location.
locs = append(locs, l.id)
stk = stk[len(l.pcs):] // skip the matching pcs.
continue
}
frames, symbolizeResult := allFrames(addr)
if len(frames) == 0 { // runtime.goexit.
if id := b.emitLocation(); id > 0 {
locs = append(locs, id)
}
stk = stk[1:]
continue
}
if added := b.deck.tryAdd(addr, frames, symbolizeResult); added {
stk = stk[1:]
continue
}
// add failed because this addr is not inlined with
// the existing PCs in the deck. Flush the deck and retry to
// handle this pc.
if id := b.emitLocation(); id > 0 {
locs = append(locs, id)
}
// check cache again - previous emitLocation added a new entry
if l, ok := b.locs[addr]; ok {
locs = append(locs, l.id)
stk = stk[len(l.pcs):] // skip the matching pcs.
} else {
b.deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
stk = stk[1:]
}
}
if id := b.emitLocation(); id > 0 { // emit remaining location.
locs = append(locs, id)
}
return locs
}
// pcDeck is a helper to detect a sequence of inlined functions from
// a stack trace returned by the runtime.
//
......@@ -583,15 +590,14 @@ func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symb
// and returns the location ID encoded in the profile protobuf.
// It emits to b.pb, so there must be no message encoding in progress.
// It resets the deck.
func (b *profileBuilder) emitLocation(deck *pcDeck) uint64 {
defer deck.reset()
if len(deck.pcs) == 0 {
func (b *profileBuilder) emitLocation() uint64 {
if len(b.deck.pcs) == 0 {
return 0
}
defer b.deck.reset()
addr := deck.pcs[0]
firstFrame := deck.frames[0]
addr := b.deck.pcs[0]
firstFrame := b.deck.frames[0]
// We can't write out functions while in the middle of the
// Location message, so record new functions we encounter and
......@@ -603,12 +609,12 @@ func (b *profileBuilder) emitLocation(deck *pcDeck) uint64 {
newFuncs := make([]newFunc, 0, 8)
id := uint64(len(b.locs)) + 1
b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, deck.pcs...)}
b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, b.deck.pcs...)}
start := b.pb.startMessage()
b.pb.uint64Opt(tagLocation_ID, id)
b.pb.uint64Opt(tagLocation_Address, uint64(firstFrame.PC))
for _, frame := range deck.frames {
for _, frame := range b.deck.frames {
// Write out each line in frame expansion.
funcID := uint64(b.funcs[frame.Function])
if funcID == 0 {
......@@ -623,7 +629,7 @@ func (b *profileBuilder) emitLocation(deck *pcDeck) uint64 {
b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
m := b.mem[i]
m.funcs |= deck.symbolizeResult
m.funcs |= b.deck.symbolizeResult
b.mem[i] = m
break
}
......
......@@ -27,30 +27,27 @@ func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defau
values := []int64{0, 0, 0, 0}
var locs []uint64
for _, r := range p {
locs = locs[:0]
hideRuntime := true
for tries := 0; tries < 2; tries++ {
for _, addr := range r.Stack() {
// For heap profiles, all stack
// addresses are return PCs, which is
// what locForPC expects.
if hideRuntime {
stk := r.Stack()
// For heap profiles, all stack
// addresses are return PCs, which is
// what appendLocsForStack expects.
if hideRuntime {
for i, addr := range stk {
if f := runtime.FuncForPC(addr); f != nil && strings.HasPrefix(f.Name(), "runtime.") {
continue
}
// Found non-runtime. Show any runtime uses above it.
hideRuntime = false
stk = stk[i:]
break
}
l := b.locForPC(addr)
if l == 0 { // runtime.goexit
continue
}
locs = append(locs, l)
}
locs = b.appendLocsForStack(locs[:0], stk)
if len(locs) > 0 {
break
}
hideRuntime = false // try again, and show all frames
hideRuntime = false // try again, and show all frames next time.
}
values[0], values[1] = scaleHeapSample(r.AllocObjects, r.AllocBytes, rate)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment