Commit 4dab6d01 authored by Russ Cox's avatar Russ Cox

cmd/go: make -a apply to the standard library, like in Go 1.3

This CL adds a very long comment explaining how isStale and
the new build IDs work. As part of writing the comment I realized:

// When the go command makes the wrong build decision and does not
// rebuild something it should, users fall back to adding the -a flag.
// Any common use of the -a flag should be considered prima facie evidence
// that isStale is returning an incorrect false result in some important case.
// Bugs reported in the behavior of -a itself should prompt the question
// ``Why is -a being used at all? What bug does that indicate?''

The two uses of -a that are most commonly mentioned in bugs filed
against the go command are:

	go install -a ./...
	go build -tags netgo -a myprog

Both of these commands now do the right thing without needing -a.

The -a exception we introduced in Go 1.4 was for the first form, and
it broke the second form. Again, neither needs -a anymore, so restore
the old, simpler, easier to explain, less surprising meaning used in Go 1.3:
if -a is given, rebuild EVERYTHING.

See the comment for more justification and history.

Summary of recent CLs (to link bugs to this one):

Fixes #3036. Now 'go install ./...' works.
Fixes #6534. Now 'go install ./...' works.
Fixes #8290. Now 'go install ./...' works.
Fixes #9369. Now 'go build -tags netgo myprog' works.
Fixes #10702. Now using one GOPATH with Go 1.5 and Go 1.6 works.
  (Each time you switch, everything needed gets rebuilt.
  Switching from Go 1.4 to Go 1.5 will rebuild properly.
  Switching from Go 1.5 back to Go 1.4 still needs -a when
  invoking the Go 1.4 go command.)

Change-Id: I19f9eb5286efaa50de7c8326602e94604ab572eb
Reviewed-on: https://go-review.googlesource.com/10761Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent 3ab9ff11
...@@ -585,7 +585,7 @@ func TestGoBuilDashAInReleaseBranch(t *testing.T) { ...@@ -585,7 +585,7 @@ func TestGoBuilDashAInReleaseBranch(t *testing.T) {
tg.run("install", "math") // should be up to date already but just in case tg.run("install", "math") // should be up to date already but just in case
tg.setenv("TESTGO_IS_GO_RELEASE", "1") tg.setenv("TESTGO_IS_GO_RELEASE", "1")
tg.run("build", "-v", "-a", "math") tg.run("build", "-v", "-a", "math")
tg.grepStderrNot("runtime", "testgo build -a math in dev branch DID build runtime, but should NOT have") tg.grepStderr("runtime", "testgo build -a math in dev branch did not build runtime, but should have")
} }
func TestGoInstallCleansUpAfterGoBuild(t *testing.T) { func TestGoInstallCleansUpAfterGoBuild(t *testing.T) {
......
...@@ -21,7 +21,6 @@ import ( ...@@ -21,7 +21,6 @@ import (
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
"time"
"unicode" "unicode"
) )
...@@ -696,57 +695,7 @@ func (p *Package) load(stk *importStack, bp *build.Package, err error) *Package ...@@ -696,57 +695,7 @@ func (p *Package) load(stk *importStack, bp *build.Package, err error) *Package
} }
} }
// Compute build ID for this package. computeBuildID(p)
// Build ID is hash of information we want to detect changes in.
// The mtime-based checks in computeStale take care of most
// of that information, but they cannot detect the removal of a
// source file from a directory (with no changes to files that remain
// and no new files in that directory). We hash the list of source
// files (without full path, to allow moving the entire tree)
// so that if one is removed, we detect it via the build IDs.
// In the future we might include other relevant information,
// like build tags or whether we're using the race detector or
// (if it becomes cheap enough) file contents.
h := sha1.New()
inputFiles := stringList(
p.GoFiles,
p.CgoFiles,
p.CFiles,
p.CXXFiles,
p.MFiles,
p.HFiles,
p.SFiles,
p.SysoFiles,
p.SwigFiles,
p.SwigCXXFiles,
)
fmt.Fprintf(h, "%d files\n", len(inputFiles))
for _, file := range inputFiles {
fmt.Fprintf(h, "%s\n", file)
}
// Include the content of runtime/zversion.go in the hash
// for package runtime. This will give package runtime a
// different build ID in each Go release.
if p.Standard && p.ImportPath == "runtime" {
data, _ := ioutil.ReadFile(filepath.Join(p.Dir, "zversion.go"))
fmt.Fprintf(h, "zversion %q\n", string(data))
}
// Include the build IDs of any dependencies in the hash.
// This, combined with the runtime/zversion content,
// will cause packages to have different build IDs when
// compiled with different Go releases.
// This helps the go command know to recompile when
// people use the same GOPATH but switch between
// different Go releases. See golang.org/issue/10702.
for _, dep := range p.Deps {
p1 := deps[dep]
fmt.Fprintf(h, "dep %s %s\n", p1.ImportPath, p1.buildID)
}
p.buildID = fmt.Sprintf("%x", h.Sum(nil))
return p return p
} }
...@@ -796,6 +745,267 @@ func computeStale(pkgs ...*Package) { ...@@ -796,6 +745,267 @@ func computeStale(pkgs ...*Package) {
// inspecting the version. // inspecting the version.
var isGoRelease = strings.HasPrefix(runtime.Version(), "go1") var isGoRelease = strings.HasPrefix(runtime.Version(), "go1")
// isStale and computeBuildID
//
// Theory of Operation
//
// There is an installed copy of the package (or binary).
// Can we reuse the installed copy, or do we need to build a new one?
//
// We can use the installed copy if it matches what we'd get
// by building a new one. The hard part is predicting that without
// actually running a build.
//
// To start, we must know the set of inputs to the build process that can
// affect the generated output. At a minimum, that includes the source
// files for the package and also any compiled packages imported by those
// source files. The *Package has these, and we use them. One might also
// argue for including in the input set: the build tags, whether the race
// detector is in use, the target operating system and architecture, the
// compiler and linker binaries being used, the additional flags being
// passed to those, the cgo binary being used, the additional flags cgo
// passes to the host C compiler, the host C compiler being used, the set
// of host C include files and installed C libraries, and so on.
// We include some but not all of this information.
//
// Once we have decided on a set of inputs, we must next decide how to
// tell whether the content of that set has changed since the last build
// of p. If there have been no changes, then we assume a new build would
// produce the same result and reuse the installed package or binary.
// But if there have been changes, then we assume a new build might not
// produce the same result, so we rebuild.
//
// There are two common ways to decide whether the content of the set has
// changed: modification times and content hashes. We use a mixture of both.
//
// The use of modification times (mtimes) was pioneered by make:
// assuming that a file's mtime is an accurate record of when that file was last written,
// and assuming that the modification time of an installed package or
// binary is the time that it was built, if the mtimes of the inputs
// predate the mtime of the installed object, then the build of that
// object saw those versions of the files, and therefore a rebuild using
// those same versions would produce the same object. In contrast, if any
// mtime of an input is newer than the mtime of the installed object, a
// change has occurred since the build, and the build should be redone.
//
// Modification times are attractive because the logic is easy to
// understand and the file system maintains the mtimes automatically
// (less work for us). Unfortunately, there are a variety of ways in
// which the mtime approach fails to detect a change and reuses a stale
// object file incorrectly. (Making the opposite mistake, rebuilding
// unnecessarily, is only a performance problem and not a correctness
// problem, so we ignore that one.)
//
// As a warmup, one problem is that to be perfectly precise, we need to
// compare the input mtimes against the time at the beginning of the
// build, but the object file time is the time at the end of the build.
// If an input file changes after being read but before the object is
// written, the next build will see an object newer than the input and
// will incorrectly decide that the object is up to date. We make no
// attempt to detect or solve this problem.
//
// Another problem is that due to file system imprecision, an input and
// output that are actually ordered in time have the same mtime.
// This typically happens on file systems with 1-second (or, worse,
// 2-second) mtime granularity and with automated scripts that write an
// input and then immediately run a build, or vice versa. If an input and
// an output have the same mtime, the conservative behavior is to treat
// the output as out-of-date and rebuild. This can cause one or more
// spurious rebuilds, but only for 1 second, until the object finally has
// an mtime later than the input.
//
// Another problem is that binary distributions often set the mtime on
// all files to the same time. If the distribution includes both inputs
// and cached build outputs, tthe conservative solution to the previous
// problem will cause unnecessary rebuilds. Worse, in such a binary
// distribution, those rebuilds might not even have permission to update
// the cached build output. To avoid these write errors, if an input and
// output have the same mtime, we assume the output is up-to-date.
// This is the opposite of what the previous problem would have us do,
// but binary distributions are more common than instances of the
// previous problem.
//
// A variant of the last problem is that some binary distributions do not
// set the mtime on all files to the same time. Instead they let the file
// system record mtimes as the distribution is unpacked. If the outputs
// are unpacked before the inputs, they'll be older and a build will try
// to rebuild them. That rebuild might hit the same write errors as in
// the last scenario. We don't make any attempt to solve this, and we
// haven't had many reports of it. Perhaps the only time this happens is
// when people manually unpack the distribution, and most of the time
// that's done as the same user who will be using it, so an initial
// rebuild on first use succeeds quietly.
//
// More generally, people and programs change mtimes on files. The last
// few problems were specific examples of this, but it's a general problem.
// For example, instead of a binary distribution, copying a home
// directory from one directory or machine to another might copy files
// but not preserve mtimes. If the inputs are new than the outputs on the
// first machine but copied first, they end up older than the outputs on
// the second machine.
//
// Because many other build systems have the same sensitivity to mtimes,
// most programs manipulating source code take pains not to break the
// mtime assumptions. For example, Git does not set the mtime of files
// during a checkout operation, even when checking out an old version of
// the code. This decision was made specifically to work well with
// mtime-based build systems.
//
// The killer problem, though, for mtime-based build systems is that the
// build only has access to the mtimes of the inputs that still exist.
// If it is possible to remove an input without changing any other inputs,
// a later build will think the object is up-to-date when it is not.
// This happens for Go because a package is made up of all source
// files in a directory. If a source file is removed, there is no newer
// mtime available recording that fact. The mtime on the directory could
// be used, but it also changes when unrelated files are added to or
// removed from the directory, so including the directory mtime would
// cause unnecessary rebuilds, possibly many. It would also exacerbate
// the problems mentioned earlier, since even programs that are careful
// to maintain mtimes on files rarely maintain mtimes on directories.
//
// A variant of the last problem is when the inputs change for other
// reasons. For example, Go 1.4 and Go 1.5 both install $GOPATH/src/mypkg
// into the same target, $GOPATH/pkg/$GOOS_$GOARCH/mypkg.a.
// If Go 1.4 has built mypkg into mypkg.a, a build using Go 1.5 must
// rebuild mypkg.a, but from mtimes alone mypkg.a looks up-to-date.
// If Go 1.5 has just been installed, perhaps the compiler will have a
// newer mtime; since the compiler is considered an input, that would
// trigger a rebuild. But only once, and only the last Go 1.4 build of
// mypkg.a happened before Go 1.5 was installed. If a user has the two
// versions installed in different locations and flips back and forth,
// mtimes alone cannot tell what to do. Changing the toolchain is
// changing the set of inputs, without affecting any mtimes.
//
// To detect the set of inputs changing, we turn away from mtimes and to
// an explicit data comparison. Specifically, we build a list of the
// inputs to the build, compute its SHA1 hash, and record that as the
// ``build ID'' in the generated object. At the next build, we can
// recompute the buid ID and compare it to the one in the generated
// object. If they differ, the list of inputs has changed, so the object
// is out of date and must be rebuilt.
//
// Because this build ID is computed before the build begins, the
// comparison does not have the race that mtime comparison does.
//
// Making the build sensitive to changes in other state is
// straightforward: include the state in the build ID hash, and if it
// changes, so does the build ID, triggering a rebuild.
//
// To detect changes in toolchain, we include the toolchain version in
// the build ID hash for package runtime, and then we include the build
// IDs of all imported packages in the build ID for p.
//
// It is natural to think about including build tags in the build ID, but
// the naive approach of just dumping the tags into the hash would cause
// spurious rebuilds. For example, 'go install' and 'go install -tags neverusedtag'
// produce the same binaries (assuming neverusedtag is never used).
// A more precise approach would be to include only tags that have an
// effect on the build. But the effect of a tag on the build is to
// include or exclude a file from the compilation, and that file list is
// already in the build ID hash. So the build ID is already tag-sensitive
// in a perfectly precise way. So we do NOT explicitly add build tags to
// the build ID hash.
//
// We do not include as part of the build ID the operating system,
// architecture, or whether the race detector is enabled, even though all
// three have an effect on the output, because that information is used
// to decide the install location. Binaries for linux and binaries for
// darwin are written to different directory trees; including that
// information in the build ID is unnecessary (although it would be
// harmless).
//
// TODO(rsc): Investigate the cost of putting source file content into
// the build ID hash as a replacement for the use of mtimes. Using the
// file content would avoid all the mtime problems, but it does require
// reading all the source files, something we avoid today (we read the
// beginning to find the build tags and the imports, but we stop as soon
// as we see the import block is over). If the package is stale, the compiler
// is going to read the files anyway. But if the package is up-to-date, the
// read is overhead.
//
// TODO(rsc): Investigate the complexity of making the build more
// precise about when individual results are needed. To be fully precise,
// there are two results of a compilation: the entire .a file used by the link
// and the subpiece used by later compilations (__.PKGDEF only).
// If a rebuild is needed but produces the previous __.PKGDEF, then
// no more recompilation due to the rebuilt package is needed, only
// relinking. To date, there is nothing in the Go command to express this.
//
// Special Cases
//
// When the go command makes the wrong build decision and does not
// rebuild something it should, users fall back to adding the -a flag.
// Any common use of the -a flag should be considered prima facie evidence
// that isStale is returning an incorrect false result in some important case.
// Bugs reported in the behavior of -a itself should prompt the question
// ``Why is -a being used at all? What bug does that indicate?''
//
// There is a long history of changes to isStale to try to make -a into a
// suitable workaround for bugs in the mtime-based decisions.
// It is worth recording that history to inform (and, as much as possible, deter) future changes.
//
// (1) Before the build IDs were introduced, building with alternate tags
// would happily reuse installed objects built without those tags.
// For example, "go build -tags netgo myprog.go" would use the installed
// copy of package net, even if that copy had been built without netgo.
// (The netgo tag controls whether package net uses cgo or pure Go for
// functionality such as name resolution.)
// Using the installed non-netgo package defeats the purpose.
//
// Users worked around this with "go build -tags netgo -a myprog.go".
//
// Build IDs have made that workaround unnecessary:
// "go build -tags netgo myprog.go"
// cannot use a non-netgo copy of package net.
//
// (2) Before the build IDs were introduced, building with different toolchains,
// especially changing between toolchains, tried to reuse objects stored in
// $GOPATH/pkg, resulting in link-time errors about object file mismatches.
//
// Users worked around this with "go install -a ./...".
//
// Build IDs have made that workaround unnecessary:
// "go install ./..." will rebuild any objects it finds that were built against
// a different toolchain.
//
// (3) The common use of "go install -a ./..." led to reports of problems
// when the -a forced the rebuild of the standard library, which for some
// users was not writable. Because we didn't understand that the real
// problem was the bug -a was working around, we changed -a not to
// apply to the standard library.
//
// (4) The common use of "go build -tags netgo -a myprog.go" broke
// when we changed -a not to apply to the standard library, because
// if go build doesn't rebuild package net, it uses the non-netgo version.
//
// Users worked around this with "go build -tags netgo -installsuffix barf myprog.go".
// The -installsuffix here is making the go command look for packages
// in pkg/$GOOS_$GOARCH_barf instead of pkg/$GOOS_$GOARCH.
// Since the former presumably doesn't exist, go build decides to rebuild
// everything, including the standard library. Since go build doesn't
// install anything it builds, nothing is ever written to pkg/$GOOS_$GOARCH_barf,
// so repeated invocations continue to work.
//
// If the use of -a wasn't a red flag, the use of -installsuffix to point to
// a non-existent directory in a command that installs nothing should
// have been.
//
// (5) Now that (1) and (2) no longer need -a, we have removed the kludge
// introduced in (3): once again, -a means ``rebuild everything,'' not
// ``rebuild everything except the standard library.'' Only Go 1.4 had
// the restricted meaning.
//
// In addition to these cases trying to trigger rebuilds, there are
// special cases trying NOT to trigger rebuilds. The main one is that for
// a variety of reasons (see above), the install process for a Go release
// cannot be relied upon to set the mtimes such that the go command will
// think the standard library is up to date. So the mtime evidence is
// ignored for the standard library if we find ourselves in a release
// version of Go. Build ID-based staleness checks still apply to the
// standard library, even in release versions. This makes
// 'go build -tags netgo' work, among other things.
// isStale reports whether package p needs to be rebuilt. // isStale reports whether package p needs to be rebuilt.
func isStale(p *Package) bool { func isStale(p *Package) bool {
if p.Standard && (p.ImportPath == "unsafe" || buildContext.Compiler == "gccgo") { if p.Standard && (p.ImportPath == "unsafe" || buildContext.Compiler == "gccgo") {
...@@ -816,24 +1026,57 @@ func isStale(p *Package) bool { ...@@ -816,24 +1026,57 @@ func isStale(p *Package) bool {
return false return false
} }
// If the -a flag is given, rebuild everything.
if buildA {
return true
}
// If there's no install target or it's already marked stale, we have to rebuild. // If there's no install target or it's already marked stale, we have to rebuild.
if p.target == "" || p.Stale { if p.target == "" || p.Stale {
return true return true
} }
// Package is stale if completely unbuilt. // Package is stale if completely unbuilt.
var built time.Time fi, err := os.Stat(p.target)
if fi, err := os.Stat(p.target); err == nil { if err != nil {
built = fi.ModTime() return true
}
// Package is stale if the expected build ID differs from the
// recorded build ID. This catches changes like a source file
// being removed from a package directory. See issue 3895.
// It also catches changes in build tags that affect the set of
// files being compiled. See issue 9369.
// It also catches changes in toolchain, like when flipping between
// two versions of Go compiling a single GOPATH.
// See issue 8290 and issue 10702.
targetBuildID, err := readBuildID(p)
if err == nil && targetBuildID != p.buildID {
return true
} }
if built.IsZero() {
// Package is stale if a dependency is.
for _, p1 := range p.deps {
if p1.Stale {
return true return true
} }
}
// If we are running a release copy of Go, do not rebuild the standard packages. // The checks above are content-based staleness.
// We assume they are always accurate.
//
// The checks below are mtime-based staleness.
// We hope they are accurate, but we know that they fail in the case of
// prebuilt Go installations that don't preserve the build mtimes
// (for example, if the pkg/ mtimes are before the src/ mtimes).
// See the large comment above isStale for details.
// If we are running a release copy of Go and didn't find a content-based
// reason to rebuild the standard packages, do not rebuild them.
// They may not be writable anyway, but they are certainly not changing. // They may not be writable anyway, but they are certainly not changing.
// This makes 'go build' and 'go build -a' skip the standard packages when // This makes 'go build' skip the standard packages when
// using an official release. See issue 3036, issue 3149, issue 4106, issue 8290. // using an official release, even when the mtimes have been changed.
// See issue 3036, issue 3149, issue 4106, issue 8290.
// (If a change to a release tree must be made by hand, the way to force the // (If a change to a release tree must be made by hand, the way to force the
// install is to run make.bash, which will remove the old package archives // install is to run make.bash, which will remove the old package archives
// before rebuilding.) // before rebuilding.)
...@@ -841,10 +1084,9 @@ func isStale(p *Package) bool { ...@@ -841,10 +1084,9 @@ func isStale(p *Package) bool {
return false return false
} }
// If the -a flag is given, rebuild everything (except standard packages; see above). // Time-based staleness.
if buildA {
return true built := fi.ModTime()
}
olderThan := func(file string) bool { olderThan := func(file string) bool {
fi, err := os.Stat(file) fi, err := os.Stat(file)
...@@ -853,19 +1095,11 @@ func isStale(p *Package) bool { ...@@ -853,19 +1095,11 @@ func isStale(p *Package) bool {
// Package is stale if a dependency is, or if a dependency is newer. // Package is stale if a dependency is, or if a dependency is newer.
for _, p1 := range p.deps { for _, p1 := range p.deps {
if p1.Stale || p1.target != "" && olderThan(p1.target) { if p1.target != "" && olderThan(p1.target) {
return true return true
} }
} }
// Package is stale if the expected build ID differs from the
// recorded build ID. This catches changes like a source file
// being removed from a package directory. See issue 3895.
targetBuildID, err := readBuildID(p)
if err == nil && targetBuildID != p.buildID {
return true
}
// As a courtesy to developers installing new versions of the compiler // As a courtesy to developers installing new versions of the compiler
// frequently, define that packages are stale if they are // frequently, define that packages are stale if they are
// older than the compiler, and commands if they are older than // older than the compiler, and commands if they are older than
...@@ -936,6 +1170,53 @@ func isStale(p *Package) bool { ...@@ -936,6 +1170,53 @@ func isStale(p *Package) bool {
return false return false
} }
// computeBuildID computes the build ID for p, leaving it in p.buildID.
// Build ID is a hash of the information we want to detect changes in.
// See the long comment in isStale for details.
func computeBuildID(p *Package) {
h := sha1.New()
// Include the list of files compiled as part of the package.
// This lets us detect removed files. See issue 3895.
inputFiles := stringList(
p.GoFiles,
p.CgoFiles,
p.CFiles,
p.CXXFiles,
p.MFiles,
p.HFiles,
p.SFiles,
p.SysoFiles,
p.SwigFiles,
p.SwigCXXFiles,
)
for _, file := range inputFiles {
fmt.Fprintf(h, "file %s\n", file)
}
// Include the content of runtime/zversion.go in the hash
// for package runtime. This will give package runtime a
// different build ID in each Go release.
if p.Standard && p.ImportPath == "runtime" {
data, _ := ioutil.ReadFile(filepath.Join(p.Dir, "zversion.go"))
fmt.Fprintf(h, "zversion %q\n", string(data))
}
// Include the build IDs of any dependencies in the hash.
// This, combined with the runtime/zversion content,
// will cause packages to have different build IDs when
// compiled with different Go releases.
// This helps the go command know to recompile when
// people use the same GOPATH but switch between
// different Go releases. See issue 10702.
// This is also a better fix for issue 8290.
for _, p1 := range p.deps {
fmt.Fprintf(h, "dep %s %s\n", p1.ImportPath, p1.buildID)
}
p.buildID = fmt.Sprintf("%x", h.Sum(nil))
}
var cwd, _ = os.Getwd() var cwd, _ = os.Getwd()
var cmdCache = map[string]*Package{} var cmdCache = map[string]*Package{}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment