Commit ca2f85fd authored by Matthew Dempsky's avatar Matthew Dempsky

cmd/compile: add indexed export format

This CL introduces a new indexed data format for package export
data. This improves on the previous (sequential) binary format by
allowing the compiler to selectively (and lazily) load only the data
that's actually needed for compilation.

In large Go projects, the package export data can become very large
due to transitive type declaration dependencies and inline
function/method bodies. By lazily loading these declarations and
bodies as needed, we avoid wasting time and memory processing
unnecessary and/or redundant data.

In the benchmarks below, "old" is -iexport=false and "new" is
-iexport=true. The suffixes indicate the compiler concurrency (-c) and
inlining (-l) settings used for the build (using -gcflags=all=-foo).
Benchmarks were run on an HP Z620.

Juju is "go build -a github.com/juju/juju/cmd/...":

name          old real-time/op  new real-time/op  delta
Juju/c=1/l=0        44.0s ± 1%        38.7s ± 9%  -11.97%  (p=0.001 n=7+7)
Juju/c=1/l=4        53.7s ± 3%        45.3s ± 4%  -15.53%  (p=0.001 n=7+7)
Juju/c=4/l=0        39.7s ± 8%        32.0s ± 4%  -19.38%  (p=0.001 n=7+7)
Juju/c=4/l=4        46.3s ± 4%        38.0s ± 4%  -18.06%  (p=0.001 n=7+7)

name          old user-time/op  new user-time/op  delta
Juju/c=1/l=0         371s ± 1%         300s ± 0%  -19.07%  (p=0.001 n=7+6)
Juju/c=1/l=4         482s ± 0%         374s ± 1%  -22.37%  (p=0.001 n=7+7)
Juju/c=4/l=0         410s ± 1%         340s ± 1%  -17.19%  (p=0.001 n=7+7)
Juju/c=4/l=4         532s ± 1%         424s ± 1%  -20.26%  (p=0.001 n=7+7)

name          old sys-time/op   new sys-time/op   delta
Juju/c=1/l=0        33.4s ± 1%        28.4s ± 2%  -15.02%  (p=0.001 n=7+7)
Juju/c=1/l=4        40.7s ± 2%        32.8s ± 3%  -19.51%  (p=0.001 n=7+7)
Juju/c=4/l=0        39.8s ± 2%        34.4s ± 2%  -13.74%  (p=0.001 n=7+7)
Juju/c=4/l=4        48.4s ± 2%        40.4s ± 2%  -16.50%  (p=0.001 n=7+7)

Kubelet is "go build -a k8s.io/kubernetes/cmd/kubelet":

name             old real-time/op  new real-time/op  delta
Kubelet/c=1/l=0        42.0s ± 1%        34.8s ± 1%  -17.27%  (p=0.008 n=5+5)
Kubelet/c=1/l=4        55.4s ± 3%        45.4s ± 3%  -18.06%  (p=0.002 n=6+6)
Kubelet/c=4/l=0        37.4s ± 3%        29.9s ± 1%  -20.25%  (p=0.004 n=6+5)
Kubelet/c=4/l=4        48.1s ± 2%        39.0s ± 5%  -18.93%  (p=0.002 n=6+6)

name             old user-time/op  new user-time/op  delta
Kubelet/c=1/l=0         291s ± 1%         233s ± 1%  -19.96%  (p=0.002 n=6+6)
Kubelet/c=1/l=4         385s ± 1%         298s ± 1%  -22.51%  (p=0.002 n=6+6)
Kubelet/c=4/l=0         325s ± 0%         268s ± 1%  -17.48%  (p=0.004 n=5+6)
Kubelet/c=4/l=4         429s ± 1%         343s ± 1%  -20.08%  (p=0.002 n=6+6)

name             old sys-time/op   new sys-time/op   delta
Kubelet/c=1/l=0        25.1s ± 2%        20.9s ± 4%  -16.69%  (p=0.002 n=6+6)
Kubelet/c=1/l=4        31.2s ± 3%        24.4s ± 0%  -21.67%  (p=0.010 n=6+4)
Kubelet/c=4/l=0        30.2s ± 2%        25.6s ± 1%  -15.34%  (p=0.002 n=6+6)
Kubelet/c=4/l=4        37.3s ± 1%        30.9s ± 2%  -17.11%  (p=0.002 n=6+6)

Change-Id: Ie43eb3bbe1392cbb61c86792a17a57b33b9561f0
Reviewed-on: https://go-review.googlesource.com/106796
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarRobert Griesemer <gri@golang.org>
parent 03f546eb
...@@ -598,6 +598,7 @@ var knownFormats = map[string]string{ ...@@ -598,6 +598,7 @@ var knownFormats = map[string]string{
"*cmd/internal/obj.LSym %v": "", "*cmd/internal/obj.LSym %v": "",
"*math/big.Int %#x": "", "*math/big.Int %#x": "",
"*math/big.Int %s": "", "*math/big.Int %s": "",
"*math/big.Int %v": "",
"[16]byte %x": "", "[16]byte %x": "",
"[]*cmd/compile/internal/gc.Node %v": "", "[]*cmd/compile/internal/gc.Node %v": "",
"[]*cmd/compile/internal/ssa.Block %v": "", "[]*cmd/compile/internal/ssa.Block %v": "",
...@@ -612,6 +613,7 @@ var knownFormats = map[string]string{ ...@@ -612,6 +613,7 @@ var knownFormats = map[string]string{
"bool %v": "", "bool %v": "",
"byte %08b": "", "byte %08b": "",
"byte %c": "", "byte %c": "",
"byte %v": "",
"cmd/compile/internal/arm.shift %d": "", "cmd/compile/internal/arm.shift %d": "",
"cmd/compile/internal/gc.Class %d": "", "cmd/compile/internal/gc.Class %d": "",
"cmd/compile/internal/gc.Class %s": "", "cmd/compile/internal/gc.Class %s": "",
...@@ -631,6 +633,7 @@ var knownFormats = map[string]string{ ...@@ -631,6 +633,7 @@ var knownFormats = map[string]string{
"cmd/compile/internal/gc.Val %v": "", "cmd/compile/internal/gc.Val %v": "",
"cmd/compile/internal/gc.fmtMode %d": "", "cmd/compile/internal/gc.fmtMode %d": "",
"cmd/compile/internal/gc.initKind %d": "", "cmd/compile/internal/gc.initKind %d": "",
"cmd/compile/internal/gc.itag %v": "",
"cmd/compile/internal/ssa.BranchPrediction %d": "", "cmd/compile/internal/ssa.BranchPrediction %d": "",
"cmd/compile/internal/ssa.Edge %v": "", "cmd/compile/internal/ssa.Edge %v": "",
"cmd/compile/internal/ssa.GCNode %v": "", "cmd/compile/internal/ssa.GCNode %v": "",
......
...@@ -442,6 +442,8 @@ func makepartialcall(fn *Node, t0 *types.Type, meth *types.Sym) *Node { ...@@ -442,6 +442,8 @@ func makepartialcall(fn *Node, t0 *types.Type, meth *types.Sym) *Node {
xfunc.Func.SetDupok(true) xfunc.Func.SetDupok(true)
xfunc.Func.SetNeedctxt(true) xfunc.Func.SetNeedctxt(true)
tfn.Type.SetPkg(t0.Pkg())
// Declare and initialize variable holding receiver. // Declare and initialize variable holding receiver.
cv := nod(OCLOSUREVAR, nil, nil) cv := nod(OCLOSUREVAR, nil, nil)
......
...@@ -5,8 +5,6 @@ ...@@ -5,8 +5,6 @@
package gc package gc
import ( import (
"bufio"
"bytes"
"cmd/compile/internal/types" "cmd/compile/internal/types"
"cmd/internal/bio" "cmd/internal/bio"
"cmd/internal/src" "cmd/internal/src"
...@@ -14,6 +12,8 @@ import ( ...@@ -14,6 +12,8 @@ import (
) )
var ( var (
flagiexport bool // if set, use indexed export data format
Debug_export int // if set, print debugging information about export data Debug_export int // if set, print debugging information about export data
) )
...@@ -72,32 +72,15 @@ func (x methodbyname) Swap(i, j int) { x[i], x[j] = x[j], x[i] } ...@@ -72,32 +72,15 @@ func (x methodbyname) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
func (x methodbyname) Less(i, j int) bool { return x[i].Sym.Name < x[j].Sym.Name } func (x methodbyname) Less(i, j int) bool { return x[i].Sym.Name < x[j].Sym.Name }
func dumpexport(bout *bio.Writer) { func dumpexport(bout *bio.Writer) {
size := 0 // size of export section without enclosing markers
// The linker also looks for the $$ marker - use char after $$ to distinguish format. // The linker also looks for the $$ marker - use char after $$ to distinguish format.
exportf(bout, "\n$$B\n") // indicate binary export format exportf(bout, "\n$$B\n") // indicate binary export format
if debugFormat { off := bout.Offset()
// save a copy of the export data if flagiexport {
var copy bytes.Buffer iexport(bout.Writer)
bcopy := bufio.NewWriter(&copy)
size = export(bcopy, Debug_export != 0)
bcopy.Flush() // flushing to bytes.Buffer cannot fail
if n, err := bout.Write(copy.Bytes()); n != size || err != nil {
Fatalf("error writing export data: got %d bytes, want %d bytes, err = %v", n, size, err)
}
// export data must contain no '$' so that we can find the end by searching for "$$"
// TODO(gri) is this still needed?
if bytes.IndexByte(copy.Bytes(), '$') >= 0 {
Fatalf("export data contains $")
}
// verify that we can read the copied export data back in
// (use empty package map to avoid collisions)
types.CleanroomDo(func() {
Import(types.NewPkg("", ""), bufio.NewReader(&copy)) // must not die
})
} else { } else {
size = export(bout.Writer, Debug_export != 0) export(bout.Writer, Debug_export != 0)
} }
size := bout.Offset() - off
exportf(bout, "\n$$\n") exportf(bout, "\n$$\n")
if Debug_export != 0 { if Debug_export != 0 {
...@@ -108,6 +91,14 @@ func dumpexport(bout *bio.Writer) { ...@@ -108,6 +91,14 @@ func dumpexport(bout *bio.Writer) {
func importsym(ipkg *types.Pkg, pos src.XPos, s *types.Sym, op Op) *Node { func importsym(ipkg *types.Pkg, pos src.XPos, s *types.Sym, op Op) *Node {
n := asNode(s.Def) n := asNode(s.Def)
if n == nil { if n == nil {
// iimport should have created a stub ONONAME
// declaration for all imported symbols. The exception
// is declarations for Runtimepkg, which are populated
// by loadsys instead.
if flagiexport && s.Pkg != Runtimepkg {
Fatalf("missing ONONAME for %v\n", s)
}
n = dclname(s) n = dclname(s)
s.Def = asTypesNode(n) s.Def = asTypesNode(n)
s.Importdef = ipkg s.Importdef = ipkg
......
This diff is collapsed.
This diff is collapsed.
...@@ -59,6 +59,10 @@ func fnpkg(fn *Node) *types.Pkg { ...@@ -59,6 +59,10 @@ func fnpkg(fn *Node) *types.Pkg {
func typecheckinl(fn *Node) { func typecheckinl(fn *Node) {
lno := setlineno(fn) lno := setlineno(fn)
if flagiexport {
expandInline(fn)
}
// typecheckinl is only for imported functions; // typecheckinl is only for imported functions;
// their bodies may refer to unsafe as long as the package // their bodies may refer to unsafe as long as the package
// was marked safe during import (which was checked then). // was marked safe during import (which was checked then).
......
...@@ -244,6 +244,7 @@ func Main(archInit func(*Arch)) { ...@@ -244,6 +244,7 @@ func Main(archInit func(*Arch)) {
flag.StringVar(&blockprofile, "blockprofile", "", "write block profile to `file`") flag.StringVar(&blockprofile, "blockprofile", "", "write block profile to `file`")
flag.StringVar(&mutexprofile, "mutexprofile", "", "write mutex profile to `file`") flag.StringVar(&mutexprofile, "mutexprofile", "", "write mutex profile to `file`")
flag.StringVar(&benchfile, "bench", "", "append benchmark times to `file`") flag.StringVar(&benchfile, "bench", "", "append benchmark times to `file`")
flag.BoolVar(&flagiexport, "iexport", false, "export indexed package data")
objabi.Flagparse(usage) objabi.Flagparse(usage)
// Record flags that affect the build result. (And don't // Record flags that affect the build result. (And don't
...@@ -1107,7 +1108,20 @@ func importfile(f *Val) *types.Pkg { ...@@ -1107,7 +1108,20 @@ func importfile(f *Val) *types.Pkg {
fmt.Printf("importing %s (%s)\n", path_, file) fmt.Printf("importing %s (%s)\n", path_, file)
} }
imp.ReadByte() // skip \n after $$B imp.ReadByte() // skip \n after $$B
c, err = imp.ReadByte()
if err != nil {
yyerror("import %s: reading input: %v", file, err)
errorexit()
}
if c == 'i' {
iimport(importpkg, imp)
} else {
// Old export format always starts with 'c', 'd', or 'v'.
imp.UnreadByte()
Import(importpkg, imp.Reader) Import(importpkg, imp.Reader)
}
default: default:
yyerror("no import in %q", path_) yyerror("no import in %q", path_)
......
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build darwin dragonfly freebsd linux netbsd openbsd
package gc
import (
"os"
"reflect"
"syscall"
"unsafe"
)
// TODO(mdempsky): Is there a higher-level abstraction that still
// works well for iimport?
// mapFile returns length bytes from the file starting at the
// specified offset as a string.
func mapFile(f *os.File, offset, length int64) (string, error) {
// POSIX mmap: "The implementation may require that off is a
// multiple of the page size."
x := offset & int64(os.Getpagesize()-1)
offset -= x
length += x
buf, err := syscall.Mmap(int(f.Fd()), offset, int(length), syscall.PROT_READ, syscall.MAP_SHARED)
keepAlive(f)
if err != nil {
return "", err
}
buf = buf[x:]
pSlice := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
var res string
pString := (*reflect.StringHeader)(unsafe.Pointer(&res))
pString.Data = pSlice.Data
pString.Len = pSlice.Len
return res, nil
}
// keepAlive is a reimplementation of runtime.KeepAlive, which wasn't
// added until Go 1.7, whereas we need to compile with Go 1.4.
var keepAlive = func(interface{}) {}
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd
package gc
import (
"io"
"os"
)
func mapFile(f *os.File, offset, length int64) (string, error) {
buf := make([]byte, length)
_, err := io.ReadFull(io.NewSectionReader(f, offset, length), buf)
if err != nil {
return "", err
}
return string(buf), nil
}
...@@ -67,6 +67,17 @@ func parseFiles(filenames []string) uint { ...@@ -67,6 +67,17 @@ func parseFiles(filenames []string) uint {
localpkg.Height = myheight localpkg.Height = myheight
if flagiexport {
// init.go requires all imported init functions to be
// fully resolved.
// TODO(mdempsky): Can this be done elsewhere more cleanly?
for _, s := range types.InitSyms {
if n := asNode(s.Def); n != nil && s.Pkg != localpkg {
resolve(n)
}
}
}
return lines return lines
} }
......
...@@ -1588,6 +1588,7 @@ func structargs(tl *types.Type, mustname bool) []*Node { ...@@ -1588,6 +1588,7 @@ func structargs(tl *types.Type, mustname bool) []*Node {
gen++ gen++
} }
a := symfield(s, t.Type) a := symfield(s, t.Type)
a.Pos = t.Pos
a.SetIsddd(t.Isddd()) a.SetIsddd(t.Isddd())
args = append(args, a) args = append(args, a)
} }
...@@ -1705,7 +1706,13 @@ func genwrapper(rcvr *types.Type, method *types.Field, newnam *types.Sym) { ...@@ -1705,7 +1706,13 @@ func genwrapper(rcvr *types.Type, method *types.Field, newnam *types.Sym) {
Curfn = fn Curfn = fn
typecheckslice(fn.Nbody.Slice(), Etop) typecheckslice(fn.Nbody.Slice(), Etop)
// TODO(mdempsky): Investigate why this doesn't work with
// indexed export. For now, we disable even in non-indexed
// mode to ensure fair benchmark comparisons and to track down
// unintended compilation differences.
if false {
inlcalls(fn) inlcalls(fn)
}
escAnalyze([]*Node{fn}, false) escAnalyze([]*Node{fn}, false)
Curfn = nil Curfn = nil
......
...@@ -32,21 +32,30 @@ var typecheckdefstack []*Node ...@@ -32,21 +32,30 @@ var typecheckdefstack []*Node
// resolve ONONAME to definition, if any. // resolve ONONAME to definition, if any.
func resolve(n *Node) *Node { func resolve(n *Node) *Node {
if n != nil && n.Op == ONONAME && n.Sym != nil { if n == nil || n.Op != ONONAME {
return n
}
if n.Sym.Pkg != localpkg {
expandDecl(n)
return n
}
r := asNode(n.Sym.Def) r := asNode(n.Sym.Def)
if r != nil { if r == nil {
if r.Op != OIOTA { return n
n = r
} else if len(typecheckdefstack) > 0 {
x := typecheckdefstack[len(typecheckdefstack)-1]
if x.Op == OLITERAL {
n = nodintconst(x.Iota())
} }
if r.Op == OIOTA {
if i := len(typecheckdefstack); i > 0 {
if x := typecheckdefstack[i-1]; x.Op == OLITERAL {
return nodintconst(x.Iota())
} }
} }
return n
} }
return n return r
} }
func typecheckslice(l []*Node, top int) { func typecheckslice(l []*Node, top int) {
......
...@@ -77,3 +77,18 @@ func IsDclstackValid() bool { ...@@ -77,3 +77,18 @@ func IsDclstackValid() bool {
} }
return true return true
} }
// PkgDef returns the definition associated with s at package scope.
func (s *Sym) PkgDef() *Node {
// Look for outermost saved declaration, which must be the
// package scope definition, if present.
for _, d := range dclstack {
if s == d.sym {
return d.def
}
}
// Otherwise, the declaration hasn't been shadowed within a
// function scope.
return s.Def
}
...@@ -97,3 +97,11 @@ func (w *Writer) Close() error { ...@@ -97,3 +97,11 @@ func (w *Writer) Close() error {
} }
return err return err
} }
func (r *Reader) File() *os.File {
return r.f
}
func (w *Writer) File() *os.File {
return w.f
}
...@@ -59,6 +59,10 @@ func BImportData(fset *token.FileSet, imports map[string]*types.Package, data [] ...@@ -59,6 +59,10 @@ func BImportData(fset *token.FileSet, imports map[string]*types.Package, data []
} }
}() }()
if len(data) > 0 && data[0] == 'i' {
return iImportData(fset, imports, data[1:], path)
}
p := importer{ p := importer{
imports: imports, imports: imports,
data: data, data: data,
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment