Commit 948b21a3 authored by Josh Bleecher Snyder's avatar Josh Bleecher Snyder

testing: only call ReadMemStats if necessary when benchmarking

When running benchmarks with -cpuprofile,
the entire process gets profiled,
and ReadMemStats is surprisingly expensive.
Running the sort benchmarks right now with
-cpuprofile shows almost half of all execution
time in ReadMemStats.

Since ReadMemStats is not required if the benchmark
does not need allocation stats, simply skip it.
This will make cpu profiles nicer to read
and significantly speed up the process of running benchmarks.
It might also make sense to toggle cpu profiling
on/off as we begin/end individual benchmarks,
but that wouldn't get us the time savings of
skipping ReadMemStats, so this CL is useful in itself.

Change-Id: I425197b1ee11be4bc91d22b929e2caf648ebd7c5
Reviewed-on: https://go-review.googlesource.com/36791
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent 5157039f
...@@ -73,9 +73,11 @@ type B struct { ...@@ -73,9 +73,11 @@ type B struct {
// a call to StopTimer. // a call to StopTimer.
func (b *B) StartTimer() { func (b *B) StartTimer() {
if !b.timerOn { if !b.timerOn {
runtime.ReadMemStats(&memStats) if *benchmarkMemory || b.showAllocResult {
b.startAllocs = memStats.Mallocs runtime.ReadMemStats(&memStats)
b.startBytes = memStats.TotalAlloc b.startAllocs = memStats.Mallocs
b.startBytes = memStats.TotalAlloc
}
b.start = time.Now() b.start = time.Now()
b.timerOn = true b.timerOn = true
} }
...@@ -87,9 +89,11 @@ func (b *B) StartTimer() { ...@@ -87,9 +89,11 @@ func (b *B) StartTimer() {
func (b *B) StopTimer() { func (b *B) StopTimer() {
if b.timerOn { if b.timerOn {
b.duration += time.Now().Sub(b.start) b.duration += time.Now().Sub(b.start)
runtime.ReadMemStats(&memStats) if *benchmarkMemory || b.showAllocResult {
b.netAllocs += memStats.Mallocs - b.startAllocs runtime.ReadMemStats(&memStats)
b.netBytes += memStats.TotalAlloc - b.startBytes b.netAllocs += memStats.Mallocs - b.startAllocs
b.netBytes += memStats.TotalAlloc - b.startBytes
}
b.timerOn = false b.timerOn = false
} }
} }
...@@ -98,9 +102,11 @@ func (b *B) StopTimer() { ...@@ -98,9 +102,11 @@ func (b *B) StopTimer() {
// It does not affect whether the timer is running. // It does not affect whether the timer is running.
func (b *B) ResetTimer() { func (b *B) ResetTimer() {
if b.timerOn { if b.timerOn {
runtime.ReadMemStats(&memStats) if *benchmarkMemory || b.showAllocResult {
b.startAllocs = memStats.Mallocs runtime.ReadMemStats(&memStats)
b.startBytes = memStats.TotalAlloc b.startAllocs = memStats.Mallocs
b.startBytes = memStats.TotalAlloc
}
b.start = time.Now() b.start = time.Now()
} }
b.duration = 0 b.duration = 0
...@@ -294,6 +300,8 @@ func (b *B) launch() { ...@@ -294,6 +300,8 @@ func (b *B) launch() {
} }
// The results of a benchmark run. // The results of a benchmark run.
// MemAllocs and MemBytes may be zero if memory benchmarking is not requested
// using B.ReportAllocs or the -benchmem command line flag.
type BenchmarkResult struct { type BenchmarkResult struct {
N int // The number of iterations. N int // The number of iterations.
T time.Duration // The total time taken. T time.Duration // The total time taken.
...@@ -316,6 +324,7 @@ func (r BenchmarkResult) mbPerSec() float64 { ...@@ -316,6 +324,7 @@ func (r BenchmarkResult) mbPerSec() float64 {
return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds() return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
} }
// AllocsPerOp returns r.MemAllocs / r.N.
func (r BenchmarkResult) AllocsPerOp() int64 { func (r BenchmarkResult) AllocsPerOp() int64 {
if r.N <= 0 { if r.N <= 0 {
return 0 return 0
...@@ -323,6 +332,7 @@ func (r BenchmarkResult) AllocsPerOp() int64 { ...@@ -323,6 +332,7 @@ func (r BenchmarkResult) AllocsPerOp() int64 {
return int64(r.MemAllocs) / int64(r.N) return int64(r.MemAllocs) / int64(r.N)
} }
// AllocsPerOp returns r.MemBytes / r.N.
func (r BenchmarkResult) AllocedBytesPerOp() int64 { func (r BenchmarkResult) AllocedBytesPerOp() int64 {
if r.N <= 0 { if r.N <= 0 {
return 0 return 0
...@@ -350,6 +360,7 @@ func (r BenchmarkResult) String() string { ...@@ -350,6 +360,7 @@ func (r BenchmarkResult) String() string {
return fmt.Sprintf("%8d\t%s%s", r.N, ns, mb) return fmt.Sprintf("%8d\t%s%s", r.N, ns, mb)
} }
// MemString returns r.AllocedBytesPerOp and r.AllocsPerOp in the same format as 'go test'.
func (r BenchmarkResult) MemString() string { func (r BenchmarkResult) MemString() string {
return fmt.Sprintf("%8d B/op\t%8d allocs/op", return fmt.Sprintf("%8d B/op\t%8d allocs/op",
r.AllocedBytesPerOp(), r.AllocsPerOp()) r.AllocedBytesPerOp(), r.AllocsPerOp())
......
...@@ -457,8 +457,14 @@ func TestBRun(t *T) { ...@@ -457,8 +457,14 @@ func TestBRun(t *T) {
_ = append([]byte(nil), buf[:]...) _ = append([]byte(nil), buf[:]...)
} }
} }
b.Run("", func(b *B) { alloc(b) }) b.Run("", func(b *B) {
b.Run("", func(b *B) { alloc(b) }) alloc(b)
b.ReportAllocs()
})
b.Run("", func(b *B) {
alloc(b)
b.ReportAllocs()
})
// runtime.MemStats sometimes reports more allocations than the // runtime.MemStats sometimes reports more allocations than the
// benchmark is responsible for. Luckily the point of this test is // benchmark is responsible for. Luckily the point of this test is
// to ensure that the results are not underreported, so we can // to ensure that the results are not underreported, so we can
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment