Commit a6d8b483 authored by Russ Cox's avatar Russ Cox

runtime: make garbage collector faster by deleting code

Suggested by Sanjay Ghemawat.  5-20% faster depending
on the benchmark.

Add tree2 garbage benchmark.
Update other garbage benchmarks to build again.

R=golang-dev, r, adg
CC=golang-dev
https://golang.org/cl/5530074
parent 2b628811
...@@ -123,10 +123,9 @@ enum ...@@ -123,10 +123,9 @@ enum
// Max number of threads to run garbage collection. // Max number of threads to run garbage collection.
// 2, 3, and 4 are all plausible maximums depending // 2, 3, and 4 are all plausible maximums depending
// on the hardware details of the machine. The second // on the hardware details of the machine. The garbage
// proc is the one that helps the most (after the first), // collector scales well to 4 cpus.
// so start with just 2 for now. MaxGcproc = 4,
MaxGcproc = 2,
}; };
// A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).) // A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).)
......
...@@ -53,9 +53,6 @@ enum { ...@@ -53,9 +53,6 @@ enum {
#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial) #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
// TODO: Make these per-M. // TODO: Make these per-M.
static uint64 nlookup;
static uint64 nsizelookup;
static uint64 naddrlookup;
static uint64 nhandoff; static uint64 nhandoff;
static int32 gctrace; static int32 gctrace;
...@@ -212,8 +209,6 @@ scanblock(byte *b, int64 n) ...@@ -212,8 +209,6 @@ scanblock(byte *b, int64 n)
// Otherwise consult span table to find beginning. // Otherwise consult span table to find beginning.
// (Manually inlined copy of MHeap_LookupMaybe.) // (Manually inlined copy of MHeap_LookupMaybe.)
nlookup++;
naddrlookup++;
k = (uintptr)obj>>PageShift; k = (uintptr)obj>>PageShift;
x = k; x = k;
if(sizeof(void*) == 8) if(sizeof(void*) == 8)
...@@ -301,49 +296,8 @@ scanblock(byte *b, int64 n) ...@@ -301,49 +296,8 @@ scanblock(byte *b, int64 n)
b = *--wp; b = *--wp;
nobj--; nobj--;
// Figure out n = size of b. Start by loading bits for b. // Ask span about size class.
off = (uintptr*)b - (uintptr*)arena_start;
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
xbits = *bitp;
bits = xbits >> shift;
// Might be small; look for nearby block boundary.
// A block boundary is marked by either bitBlockBoundary
// or bitAllocated being set (see notes near their definition).
enum {
boundary = bitBlockBoundary|bitAllocated
};
// Look for a block boundary both after and before b
// in the same bitmap word.
//
// A block boundary j words after b is indicated by
// bits>>j & boundary
// assuming shift+j < bitShift. (If shift+j >= bitShift then
// we'll be bleeding other bit types like bitMarked into our test.)
// Instead of inserting the conditional shift+j < bitShift into the loop,
// we can let j range from 1 to bitShift as long as we first
// apply a mask to keep only the bits corresponding
// to shift+j < bitShift aka j < bitShift-shift.
bits &= (boundary<<(bitShift-shift)) - boundary;
// A block boundary j words before b is indicated by
// xbits>>(shift-j) & boundary
// (assuming shift >= j). There is no cleverness here
// avoid the test, because when j gets too large the shift
// turns negative, which is undefined in C.
for(j=1; j<bitShift; j++) {
if(((bits>>j)&boundary) != 0 || shift>=j && ((xbits>>(shift-j))&boundary) != 0) {
n = j*PtrSize;
goto scan;
}
}
// Fall back to asking span about size class.
// (Manually inlined copy of MHeap_Lookup.) // (Manually inlined copy of MHeap_Lookup.)
nlookup++;
nsizelookup++;
x = (uintptr)b>>PageShift; x = (uintptr)b>>PageShift;
if(sizeof(void*) == 8) if(sizeof(void*) == 8)
x -= (uintptr)arena_start>>PageShift; x -= (uintptr)arena_start>>PageShift;
...@@ -352,7 +306,6 @@ scanblock(byte *b, int64 n) ...@@ -352,7 +306,6 @@ scanblock(byte *b, int64 n)
n = s->npages<<PageShift; n = s->npages<<PageShift;
else else
n = runtime·class_to_size[s->sizeclass]; n = runtime·class_to_size[s->sizeclass];
scan:;
} }
} }
...@@ -953,9 +906,6 @@ runtime·gc(int32 force) ...@@ -953,9 +906,6 @@ runtime·gc(int32 force)
} }
t0 = runtime·nanotime(); t0 = runtime·nanotime();
nlookup = 0;
nsizelookup = 0;
naddrlookup = 0;
nhandoff = 0; nhandoff = 0;
m->gcing = 1; m->gcing = 1;
...@@ -1020,11 +970,11 @@ runtime·gc(int32 force) ...@@ -1020,11 +970,11 @@ runtime·gc(int32 force)
runtime·printf("pause %D\n", t3-t0); runtime·printf("pause %D\n", t3-t0);
if(gctrace) { if(gctrace) {
runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-%D) objects %D pointer lookups (%D size, %D addr) %D handoff\n", runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-%D) objects %D handoff\n",
mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000, mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000,
heap0>>20, heap1>>20, obj0, obj1, heap0>>20, heap1>>20, obj0, obj1,
mstats.nmalloc, mstats.nfree, mstats.nmalloc, mstats.nfree,
nlookup, nsizelookup, naddrlookup, nhandoff); nhandoff);
} }
runtime·semrelease(&gcsema); runtime·semrelease(&gcsema);
......
...@@ -8,6 +8,7 @@ ALL=\ ...@@ -8,6 +8,7 @@ ALL=\
parser\ parser\
peano\ peano\
tree\ tree\
tree2\
all: $(addsuffix .out, $(ALL)) all: $(addsuffix .out, $(ALL))
......
...@@ -24,15 +24,15 @@ import ( ...@@ -24,15 +24,15 @@ import (
var serve = flag.String("serve", "", "serve http on this address at end") var serve = flag.String("serve", "", "serve http on this address at end")
func isGoFile(dir *os.FileInfo) bool { func isGoFile(dir os.FileInfo) bool {
return dir.IsRegular() && return !dir.IsDir() &&
!strings.HasPrefix(dir.Name, ".") && // ignore .files !strings.HasPrefix(dir.Name(), ".") && // ignore .files
path.Ext(dir.Name) == ".go" path.Ext(dir.Name()) == ".go"
} }
func isPkgFile(dir *os.FileInfo) bool { func isPkgFile(dir os.FileInfo) bool {
return isGoFile(dir) && return isGoFile(dir) &&
!strings.HasSuffix(dir.Name, "_test.go") // ignore test files !strings.HasSuffix(dir.Name(), "_test.go") // ignore test files
} }
func pkgName(filename string) string { func pkgName(filename string) string {
...@@ -49,7 +49,7 @@ func parseDir(dirpath string) map[string]*ast.Package { ...@@ -49,7 +49,7 @@ func parseDir(dirpath string) map[string]*ast.Package {
_, pkgname := path.Split(dirpath) _, pkgname := path.Split(dirpath)
// filter function to select the desired .go files // filter function to select the desired .go files
filter := func(d *os.FileInfo) bool { filter := func(d os.FileInfo) bool {
if isPkgFile(d) { if isPkgFile(d) {
// Some directories contain main packages: Only accept // Some directories contain main packages: Only accept
// files that belong to the expected package so that // files that belong to the expected package so that
...@@ -57,7 +57,7 @@ func parseDir(dirpath string) map[string]*ast.Package { ...@@ -57,7 +57,7 @@ func parseDir(dirpath string) map[string]*ast.Package {
// found" errors. // found" errors.
// Additionally, accept the special package name // Additionally, accept the special package name
// fakePkgName if we are looking at cmd documentation. // fakePkgName if we are looking at cmd documentation.
name := pkgName(dirpath + "/" + d.Name) name := pkgName(dirpath + "/" + d.Name())
return name == pkgname return name == pkgname
} }
return false return false
...@@ -82,7 +82,7 @@ func main() { ...@@ -82,7 +82,7 @@ func main() {
flag.Parse() flag.Parse()
var lastParsed []map[string]*ast.Package var lastParsed []map[string]*ast.Package
var t0 int64 var t0 time.Time
pkgroot := runtime.GOROOT() + "/src/pkg/" pkgroot := runtime.GOROOT() + "/src/pkg/"
for pass := 0; pass < 2; pass++ { for pass := 0; pass < 2; pass++ {
// Once the heap is grown to full size, reset counters. // Once the heap is grown to full size, reset counters.
...@@ -91,7 +91,7 @@ func main() { ...@@ -91,7 +91,7 @@ func main() {
// the average look much better than it actually is. // the average look much better than it actually is.
st.NumGC = 0 st.NumGC = 0
st.PauseTotalNs = 0 st.PauseTotalNs = 0
t0 = time.Nanoseconds() t0 = time.Now()
for i := 0; i < *n; i++ { for i := 0; i < *n; i++ {
parsed := make([]map[string]*ast.Package, *p) parsed := make([]map[string]*ast.Package, *p)
...@@ -105,7 +105,7 @@ func main() { ...@@ -105,7 +105,7 @@ func main() {
runtime.GC() runtime.GC()
runtime.GC() runtime.GC()
} }
t1 := time.Nanoseconds() t1 := time.Now()
fmt.Printf("Alloc=%d/%d Heap=%d Mallocs=%d PauseTime=%.3f/%d = %.3f\n", fmt.Printf("Alloc=%d/%d Heap=%d Mallocs=%d PauseTime=%.3f/%d = %.3f\n",
st.Alloc, st.TotalAlloc, st.Alloc, st.TotalAlloc,
...@@ -120,7 +120,7 @@ func main() { ...@@ -120,7 +120,7 @@ func main() {
} }
*/ */
// Standard gotest benchmark output, collected by build dashboard. // Standard gotest benchmark output, collected by build dashboard.
gcstats("BenchmarkParser", *n, t1-t0) gcstats("BenchmarkParser", *n, t1.Sub(t0))
if *serve != "" { if *serve != "" {
log.Fatal(http.ListenAndServe(*serve, nil)) log.Fatal(http.ListenAndServe(*serve, nil))
...@@ -130,18 +130,17 @@ func main() { ...@@ -130,18 +130,17 @@ func main() {
var packages = []string{ var packages = []string{
"archive/tar", "archive/tar",
"asn1", "encoding/asn1",
"big", "math/big",
"bufio", "bufio",
"bytes", "bytes",
"cmath", "math/cmplx",
"compress/flate", "compress/flate",
"compress/gzip", "compress/gzip",
"compress/zlib", "compress/zlib",
"container/heap", "container/heap",
"container/list", "container/list",
"container/ring", "container/ring",
"container/vector",
"crypto/aes", "crypto/aes",
"crypto/blowfish", "crypto/blowfish",
"crypto/hmac", "crypto/hmac",
...@@ -161,16 +160,14 @@ var packages = []string{ ...@@ -161,16 +160,14 @@ var packages = []string{
"debug/macho", "debug/macho",
"debug/elf", "debug/elf",
"debug/gosym", "debug/gosym",
"ebnf", "exp/ebnf",
"encoding/ascii85", "encoding/ascii85",
"encoding/base64", "encoding/base64",
"encoding/binary", "encoding/binary",
"encoding/git85", "encoding/git85",
"encoding/hex", "encoding/hex",
"encoding/pem", "encoding/pem",
"exec", "os/exec",
"exp/datafmt",
"expvar",
"flag", "flag",
"fmt", "fmt",
"go/ast", "go/ast",
...@@ -179,18 +176,18 @@ var packages = []string{ ...@@ -179,18 +176,18 @@ var packages = []string{
"go/printer", "go/printer",
"go/scanner", "go/scanner",
"go/token", "go/token",
"gob", "encoding/gob",
"hash", "hash",
"hash/adler32", "hash/adler32",
"hash/crc32", "hash/crc32",
"hash/crc64", "hash/crc64",
"http", "net/http",
"image", "image",
"image/jpeg", "image/jpeg",
"image/png", "image/png",
"io", "io",
"io/ioutil", "io/ioutil",
"json", "encoding/json",
"log", "log",
"math", "math",
"mime", "mime",
...@@ -199,29 +196,29 @@ var packages = []string{ ...@@ -199,29 +196,29 @@ var packages = []string{
"os/signal", "os/signal",
"patch", "patch",
"path", "path",
"rand", "math/rand",
"reflect", "reflect",
"regexp", "regexp",
"rpc", "net/rpc",
"runtime", "runtime",
"scanner", "text/scanner",
"sort", "sort",
"smtp", "net/smtp",
"strconv", "strconv",
"strings", "strings",
"sync", "sync",
"syscall", "syscall",
"syslog", "log/syslog",
"tabwriter", "text/tabwriter",
"template", "text/template",
"testing", "testing",
"testing/iotest", "testing/iotest",
"testing/quick", "testing/quick",
"testing/script", "testing/script",
"time", "time",
"unicode", "unicode",
"utf8", "unicode/utf8",
"utf16", "unicode/utf16",
"websocket", "websocket",
"xml", "encoding/xml",
} }
...@@ -108,15 +108,14 @@ func verify() { ...@@ -108,15 +108,14 @@ func verify() {
// ------------------------------------- // -------------------------------------
// Factorial // Factorial
func main() { func main() {
t0 := time.Nanoseconds() t0 := time.Now()
verify() verify()
for i := 0; i <= 9; i++ { for i := 0; i <= 9; i++ {
print(i, "! = ", count(fact(gen(i))), "\n") print(i, "! = ", count(fact(gen(i))), "\n")
} }
runtime.GC() runtime.GC()
t1 := time.Nanoseconds() t1 := time.Now()
gcstats("BenchmarkPeano", 1, t1-t0) gcstats("BenchmarkPeano", 1, t1.Sub(t0))
} }
...@@ -8,12 +8,13 @@ import ( ...@@ -8,12 +8,13 @@ import (
"fmt" "fmt"
"runtime" "runtime"
"sort" "sort"
"time"
) )
func gcstats(name string, n int, t int64) { func gcstats(name string, n int, t time.Duration) {
st := &runtime.MemStats st := &runtime.MemStats
fmt.Printf("garbage.%sMem Alloc=%d/%d Heap=%d NextGC=%d Mallocs=%d\n", name, st.Alloc, st.TotalAlloc, st.Sys, st.NextGC, st.Mallocs) fmt.Printf("garbage.%sMem Alloc=%d/%d Heap=%d NextGC=%d Mallocs=%d\n", name, st.Alloc, st.TotalAlloc, st.Sys, st.NextGC, st.Mallocs)
fmt.Printf("garbage.%s %d %d ns/op\n", name, n, t/int64(n)) fmt.Printf("garbage.%s %d %d ns/op\n", name, n, t.Nanoseconds()/int64(n))
fmt.Printf("garbage.%sLastPause 1 %d ns/op\n", name, st.PauseNs[(st.NumGC-1)%uint32(len(st.PauseNs))]) fmt.Printf("garbage.%sLastPause 1 %d ns/op\n", name, st.PauseNs[(st.NumGC-1)%uint32(len(st.PauseNs))])
fmt.Printf("garbage.%sPause %d %d ns/op\n", name, st.NumGC, int64(st.PauseTotalNs)/int64(st.NumGC)) fmt.Printf("garbage.%sPause %d %d ns/op\n", name, st.NumGC, int64(st.PauseTotalNs)/int64(st.NumGC))
nn := int(st.NumGC) nn := int(st.NumGC)
......
...@@ -68,7 +68,7 @@ const minDepth = 4 ...@@ -68,7 +68,7 @@ const minDepth = 4
func main() { func main() {
flag.Parse() flag.Parse()
t0 := time.Nanoseconds() t0 := time.Now()
maxDepth := *n maxDepth := *n
if minDepth+2 > *n { if minDepth+2 > *n {
...@@ -93,8 +93,8 @@ func main() { ...@@ -93,8 +93,8 @@ func main() {
} }
fmt.Printf("long lived tree of depth %d\t check: %d\n", maxDepth, longLivedTree.itemCheck()) fmt.Printf("long lived tree of depth %d\t check: %d\n", maxDepth, longLivedTree.itemCheck())
t1 := time.Nanoseconds() t1 := time.Now()
// Standard gotest benchmark output, collected by build dashboard. // Standard gotest benchmark output, collected by build dashboard.
gcstats("BenchmarkTree", *n, t1-t0) gcstats("BenchmarkTree", *n, t1.Sub(t0))
} }
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"flag"
"fmt"
"log"
"os"
"runtime"
"runtime/pprof"
"unsafe"
)
const BranchingFactor = 4
type Object struct {
child [BranchingFactor]*Object
}
var (
cpus = flag.Int("cpus", 1, "number of cpus to use")
heapsize = flag.Int64("heapsize", 100*1024*1024, "size of the heap in bytes")
cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
lastPauseNs uint64 = 0
lastFree uint64 = 0
heap *Object
calls [20]int
numobjects int64
)
func buildHeap() {
objsize := int64(unsafe.Sizeof(Object{}))
heap, _ = buildTree(float64(objsize), float64(*heapsize), 0)
fmt.Printf("*** built heap: %.0f MB; (%d objects * %d bytes)\n",
float64(*heapsize)/1048576, numobjects, objsize)
}
func buildTree(objsize, size float64, depth int) (*Object, float64) {
calls[depth]++
x := &Object{}
numobjects++
subtreeSize := (size - objsize) / BranchingFactor
alloc := objsize
for i := 0; i < BranchingFactor && alloc < size; i++ {
c, n := buildTree(objsize, subtreeSize, depth+1)
x.child[i] = c
alloc += n
}
return x, alloc
}
func gc() {
runtime.GC()
runtime.UpdateMemStats()
pause := runtime.MemStats.PauseTotalNs
inuse := runtime.MemStats.Alloc
free := runtime.MemStats.TotalAlloc - inuse
fmt.Printf("gc pause: %8.3f ms; collect: %8.0f MB; heapsize: %8.0f MB\n",
float64(pause-lastPauseNs)/1e6,
float64(free-lastFree)/1048576,
float64(inuse)/1048576)
lastPauseNs = pause
lastFree = free
}
func main() {
flag.Parse()
buildHeap()
runtime.GOMAXPROCS(*cpus)
runtime.UpdateMemStats()
lastPauseNs = runtime.MemStats.PauseTotalNs
lastFree = runtime.MemStats.TotalAlloc - runtime.MemStats.Alloc
if *cpuprofile != "" {
f, err := os.Create(*cpuprofile)
if err != nil {
log.Fatal(err)
}
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
}
for i := 0; i < 10; i++ {
gc()
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment