Commit e038c7e4 authored by Hana (Hyang-Ah) Kim's avatar Hana (Hyang-Ah) Kim Committed by Hyang-Ah Hana Kim

runtime/pprof: correctly encode inlined functions in CPU profile

The pprof profile proto message expects inlined functions of a PC
to be encoded in one Location entry using multiple Line entries.
https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184

runtime/pprof has encoded the symbolization information by creating
a Location for each PC found in the stack trace and including info
from all the frames expanded from the PC using runtime.CallersFrames.
This assumes inlined functions are represented as a single PC in the
stack trace. (https://go-review.googlesource.com/41256)

In the recent years, behavior around inlining and the traceback
changed significantly (e.g. https://golang.org/cl/152537,
https://golang.org/issue/29582, and many changes). Now the PCs
in the stack trace represent user frames even including inline
marks. As a result, the profile proto started to allocate a Location
entry for each user frame, lose the inline information (so pprof
presented incorrect results when inlined functions are involved),
and confuse the pprof tool with those PCs made up for inline marks.

This CL attempts to detect inlined call frames from the stack traces
of CPU profiles, and organize the Location information as intended.
Currently, runtime does not provide a reliable and convenient way to
detect inlined call frames and expand user frames from a given externally
recognizable PCs. So we use heuristics to recover the groups
  - inlined call frames have nil Func field
  - inlined call frames will have the same Entry point
  - but must be careful with recursive functions that have the
    same Entry point by definition, and non-Go functions that
    may lack most of the fields of Frame.

The followup CL will address the issue with other profile types.

Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db
Reviewed-on: https://go-review.googlesource.com/c/go/+/204636Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 33dfd352
...@@ -49,8 +49,12 @@ var ( ...@@ -49,8 +49,12 @@ var (
// Must not call other functions nor access heap/globals in the loop, // Must not call other functions nor access heap/globals in the loop,
// otherwise under race detector the samples will be in the race runtime. // otherwise under race detector the samples will be in the race runtime.
func cpuHog1(x int) int { func cpuHog1(x int) int {
return cpuHog0(x, 1e5)
}
func cpuHog0(x, n int) int {
foo := x foo := x
for i := 0; i < 1e5; i++ { for i := 0; i < n; i++ {
if foo > 0 { if foo > 0 {
foo *= foo foo *= foo
} else { } else {
...@@ -101,34 +105,69 @@ func TestCPUProfileMultithreaded(t *testing.T) { ...@@ -101,34 +105,69 @@ func TestCPUProfileMultithreaded(t *testing.T) {
} }
func TestCPUProfileInlining(t *testing.T) { func TestCPUProfileInlining(t *testing.T) {
testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) { p := testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) {
cpuHogger(inlinedCaller, &salt1, dur) cpuHogger(inlinedCaller, &salt1, dur)
}) })
// Check if inlined function locations are encoded correctly. The inlinedCalee and inlinedCaller should be in one location.
for _, loc := range p.Location {
hasInlinedCallerAfterInlinedCallee, hasInlinedCallee := false, false
for _, line := range loc.Line {
if line.Function.Name == "runtime/pprof.inlinedCallee" {
hasInlinedCallee = true
}
if hasInlinedCallee && line.Function.Name == "runtime/pprof.inlinedCaller" {
hasInlinedCallerAfterInlinedCallee = true
}
}
if hasInlinedCallee != hasInlinedCallerAfterInlinedCallee {
t.Fatalf("want inlinedCallee followed by inlinedCaller, got separate Location entries:\n%v", p)
}
}
} }
func inlinedCaller(x int) int { func inlinedCaller(x int) int {
x = inlinedCallee(x) x = inlinedCallee(x, 1e5)
return x return x
} }
func inlinedCallee(x int) int { func inlinedCallee(x, n int) int {
// We could just use cpuHog1, but for loops prevent inlining return cpuHog0(x, n)
// right now. :( }
foo := x
i := 0 func TestCPUProfileRecursion(t *testing.T) {
loop: p := testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.recursionCallee", "runtime/pprof.recursionCaller"}, avoidFunctions(), func(dur time.Duration) {
if foo > 0 { cpuHogger(recursionCaller, &salt1, dur)
foo *= foo })
} else {
foo *= foo + 1 // check the Location encoding was not confused by recursive calls.
for i, loc := range p.Location {
recursionFunc := 0
for _, line := range loc.Line {
if name := line.Function.Name; name == "runtime/pprof.recursionCaller" || name == "runtime/pprof.recursionCallee" {
recursionFunc++
}
}
if recursionFunc > 1 {
t.Fatalf("want at most one recursionCaller or recursionCallee in one Location, got a violating Location (index: %d):\n%v", i, p)
} }
if i++; i < 1e5 {
goto loop
} }
return foo
} }
func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) { func recursionCaller(x int) int {
y := recursionCallee(3, x)
return y
}
func recursionCallee(n, x int) int {
if n == 0 {
return 1
}
y := inlinedCallee(x, 1e4)
return y * recursionCallee(n-1, x)
}
func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) *profile.Profile {
p, err := profile.Parse(bytes.NewReader(valBytes)) p, err := profile.Parse(bytes.NewReader(valBytes))
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
...@@ -137,11 +176,12 @@ func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Loca ...@@ -137,11 +176,12 @@ func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Loca
count := uintptr(sample.Value[0]) count := uintptr(sample.Value[0])
f(count, sample.Location, sample.Label) f(count, sample.Location, sample.Label)
} }
return p
} }
// testCPUProfile runs f under the CPU profiler, checking for some conditions specified by need, // testCPUProfile runs f under the CPU profiler, checking for some conditions specified by need,
// as interpreted by matches. // as interpreted by matches, and returns the parsed profile.
func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []string, f func(dur time.Duration)) { func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []string, f func(dur time.Duration)) *profile.Profile {
switch runtime.GOOS { switch runtime.GOOS {
case "darwin": case "darwin":
switch runtime.GOARCH { switch runtime.GOARCH {
...@@ -195,8 +235,8 @@ func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []stri ...@@ -195,8 +235,8 @@ func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []stri
f(duration) f(duration)
StopCPUProfile() StopCPUProfile()
if profileOk(t, matches, need, avoid, prof, duration) { if p, ok := profileOk(t, matches, need, avoid, prof, duration); ok {
return return p
} }
duration *= 2 duration *= 2
...@@ -217,6 +257,7 @@ func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []stri ...@@ -217,6 +257,7 @@ func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []stri
t.Skip("ignore the failure in QEMU; see golang.org/issue/9605") t.Skip("ignore the failure in QEMU; see golang.org/issue/9605")
} }
t.FailNow() t.FailNow()
return nil
} }
func contains(slice []string, s string) bool { func contains(slice []string, s string) bool {
...@@ -242,7 +283,7 @@ func stackContains(spec string, count uintptr, stk []*profile.Location, labels m ...@@ -242,7 +283,7 @@ func stackContains(spec string, count uintptr, stk []*profile.Location, labels m
type matchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool type matchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool
func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (ok bool) { func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (_ *profile.Profile, ok bool) {
ok = true ok = true
// Check that profile is well formed, contains 'need', and does not contain // Check that profile is well formed, contains 'need', and does not contain
...@@ -251,7 +292,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p ...@@ -251,7 +292,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
avoidSamples := make([]uintptr, len(avoid)) avoidSamples := make([]uintptr, len(avoid))
var samples uintptr var samples uintptr
var buf bytes.Buffer var buf bytes.Buffer
parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) { p := parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) {
fmt.Fprintf(&buf, "%d:", count) fmt.Fprintf(&buf, "%d:", count)
fprintStack(&buf, stk) fprintStack(&buf, stk)
samples += count samples += count
...@@ -278,7 +319,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p ...@@ -278,7 +319,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
// not enough samples due to coarse timer // not enough samples due to coarse timer
// resolution. Let it go. // resolution. Let it go.
t.Log("too few samples on Windows (golang.org/issue/10842)") t.Log("too few samples on Windows (golang.org/issue/10842)")
return false return p, false
} }
// Check that we got a reasonable number of samples. // Check that we got a reasonable number of samples.
...@@ -300,7 +341,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p ...@@ -300,7 +341,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
} }
if len(need) == 0 { if len(need) == 0 {
return ok return p, ok
} }
var total uintptr var total uintptr
...@@ -323,7 +364,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p ...@@ -323,7 +364,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
ok = false ok = false
} }
} }
return ok return p, ok
} }
// Fork can hang if preempted with signals frequently enough (see issue 5517). // Fork can hang if preempted with signals frequently enough (see issue 5517).
......
This diff is collapsed.
...@@ -358,6 +358,17 @@ func TestMapping(t *testing.T) { ...@@ -358,6 +358,17 @@ func TestMapping(t *testing.T) {
continue continue
} }
} }
if traceback == "Go+C" {
// The test code was arranged to have PCs from C and
// they are not symbolized.
// Check no Location containing those unsymbolized PCs contains multiple lines.
for i, loc := range prof.Location {
if !symbolized(loc) && len(loc.Line) > 1 {
t.Errorf("Location[%d] contains unsymbolized PCs and multiple lines: %v", i, loc)
}
}
}
}) })
} }
} }
......
...@@ -17,8 +17,7 @@ package main ...@@ -17,8 +17,7 @@ package main
int cpuHogCSalt1 = 0; int cpuHogCSalt1 = 0;
int cpuHogCSalt2 = 0; int cpuHogCSalt2 = 0;
void CPUHogCFunction() { void CPUHogCFunction0(int foo) {
int foo = cpuHogCSalt1;
int i; int i;
for (i = 0; i < 100000; i++) { for (i = 0; i < 100000; i++) {
if (foo > 0) { if (foo > 0) {
...@@ -30,6 +29,10 @@ void CPUHogCFunction() { ...@@ -30,6 +29,10 @@ void CPUHogCFunction() {
} }
} }
void CPUHogCFunction() {
CPUHogCFunction0(cpuHogCSalt1);
}
struct CgoTracebackArg { struct CgoTracebackArg {
uintptr_t context; uintptr_t context;
uintptr_t sigContext; uintptr_t sigContext;
...@@ -39,8 +42,9 @@ struct CgoTracebackArg { ...@@ -39,8 +42,9 @@ struct CgoTracebackArg {
void CollectCgoTraceback(void* parg) { void CollectCgoTraceback(void* parg) {
struct CgoTracebackArg* arg = (struct CgoTracebackArg*)(parg); struct CgoTracebackArg* arg = (struct CgoTracebackArg*)(parg);
arg->buf[0] = (uintptr_t)(CPUHogCFunction); arg->buf[0] = (uintptr_t)(CPUHogCFunction0);
arg->buf[1] = 0; arg->buf[1] = (uintptr_t)(CPUHogCFunction);
arg->buf[2] = 0;
}; };
*/ */
import "C" import "C"
...@@ -81,7 +85,6 @@ var salt1 int ...@@ -81,7 +85,6 @@ var salt1 int
var salt2 int var salt2 int
func cpuHogGoFunction() { func cpuHogGoFunction() {
// Generates CPU profile samples including a Go call path.
for { for {
foo := salt1 foo := salt1
for i := 0; i < 1e5; i++ { for i := 0; i < 1e5; i++ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment