Commit 06ad3b2d authored by Russ Cox's avatar Russ Cox

cmd/cgo: stop using compiler error message text to analyze C names

The old approach to determining whether "name" was a type, constant,
or expression was to compile the C program

        name;

and scan the errors and warnings generated by the compiler.
This requires looking for specific substrings in the errors and warnings,
which ties the implementation to specific compiler versions.
As compilers change their errors or drop warnings, cgo breaks.
This happens slowly but it does happen.
Clang in particular (now required on OS X) has a significant churn rate.

The new approach compiles a slightly more complex program
that is either valid C or not valid C depending on what kind of
thing "name" is. It uses only the presence or absence of an error
message on a particular line, not the error text itself. The program is:

        // error if and only if name is undeclared
        void f1(void) { typeof(name) *x; }

        // error if and only if name is not a type
        void f2(void) { name *x; }

        // error if and only if name is not an integer constant
        void f3(void) { enum { x = (name)*1 }; }

I had not been planning to do this until Go 1.3, because it is a
non-trivial change, but it fixes a real Xcode 5 problem in Go 1.2,
and the new code is easier to understand than the old code.
It should be significantly more robust.

Fixes #6596.
Fixes #6612.

R=golang-dev, r, james, iant
CC=golang-dev
https://golang.org/cl/15070043
parent 20f99ffa
......@@ -49,5 +49,6 @@ func TestFpVar(t *testing.T) { testFpVar(t) }
func Test4339(t *testing.T) { test4339(t) }
func Test6390(t *testing.T) { test6390(t) }
func Test5986(t *testing.T) { test5986(t) }
func TestNaming(t *testing.T) { testNaming(t) }
func BenchmarkCgoCall(b *testing.B) { benchCgoCall(b) }
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// golang.org/issue/6612
// Test new scheme for deciding whether C.name is an expression, type, constant.
// Clang silences some warnings when the name is a #defined macro, so test those too
// (even though we now use errors exclusively, not warnings).
package cgotest
/*
void myfunc(void) {}
int myvar = 5;
const char *mytext = "abcdef";
typedef int mytype;
enum {
myenum = 1234,
};
#define myfunc_def myfunc
#define myvar_def myvar
#define mytext_def mytext
#define mytype_def mytype
#define myenum_def myenum
#define myint_def 12345
#define myfloat_def 1.5
#define mystring_def "hello"
*/
import "C"
import "testing"
func testNaming(t *testing.T) {
C.myfunc()
C.myfunc_def()
if v := C.myvar; v != 5 {
t.Errorf("C.myvar = %d, want 5", v)
}
if v := C.myvar_def; v != 5 {
t.Errorf("C.myvar_def = %d, want 5", v)
}
if s := C.GoString(C.mytext); s != "abcdef" {
t.Errorf("C.mytext = %q, want %q", s, "abcdef")
}
if s := C.GoString(C.mytext_def); s != "abcdef" {
t.Errorf("C.mytext_def = %q, want %q", s, "abcdef")
}
if c := C.myenum; c != 1234 {
t.Errorf("C.myenum = %v, want 1234", c)
}
if c := C.myenum_def; c != 1234 {
t.Errorf("C.myenum_def = %v, want 1234", c)
}
{
const c = C.myenum
if c != 1234 {
t.Errorf("C.myenum as const = %v, want 1234", c)
}
}
{
const c = C.myenum_def
if c != 1234 {
t.Errorf("C.myenum as const = %v, want 1234", c)
}
}
if c := C.myint_def; c != 12345 {
t.Errorf("C.myint_def = %v, want 12345", c)
}
{
const c = C.myint_def
if c != 12345 {
t.Errorf("C.myint as const = %v, want 12345", c)
}
}
// This would be nice, but it has never worked.
/*
if c := C.myfloat_def; c != 1.5 {
t.Errorf("C.myint_def = %v, want 1.5", c)
}
{
const c = C.myfloat_def
if c != 1.5 {
t.Errorf("C.myint as const = %v, want 1.5", c)
}
}
*/
if s := C.mystring_def; s != "hello" {
t.Errorf("C.mystring_def = %q, want %q", s, "hello")
}
}
......@@ -269,29 +269,30 @@ Next, cgo needs to identify the kinds for each identifier. For the
identifiers C.foo and C.bar, cgo generates this C program:
<preamble>
void __cgo__f__(void) {
#line 1 "cgo-test"
foo;
enum { _cgo_enum_0 = foo };
bar;
enum { _cgo_enum_1 = bar };
}
This program will not compile, but cgo can look at the error messages
to infer the kind of each identifier. The line number given in the
error tells cgo which identifier is involved.
An error like "unexpected type name" or "useless type name in empty
declaration" or "declaration does not declare anything" tells cgo that
the identifier is a type.
An error like "statement with no effect" or "expression result unused"
tells cgo that the identifier is not a type, but not whether it is a
constant, function, or global variable.
An error like "not an integer constant" tells cgo that the identifier
is not a constant. If it is also not a type, it must be a function or
global variable. For now, those can be treated the same.
#line 1 "not-declared"
void __cgo_f_xxx_1(void) { typeof(foo) *__cgo_undefined__; }
#line 1 "not-type"
void __cgo_f_xxx_2(void) { foo *__cgo_undefined__; }
#line 1 "not-const"
void __cgo_f_xxx_3(void) { enum { __cgo_undefined__ = (foo)*1 }; }
#line 2 "not-declared"
void __cgo_f_xxx_1(void) { typeof(bar) *__cgo_undefined__; }
#line 2 "not-type"
void __cgo_f_xxx_2(void) { bar *__cgo_undefined__; }
#line 2 "not-const"
void __cgo_f_xxx_3(void) { enum { __cgo_undefined__ = (bar)*1 }; }
This program will not compile, but cgo can use the presence or absence
of an error message on a given line to deduce the information it
needs. The program is syntactically valid regardless of whether each
name is a type or an ordinary identifier, so there will be no syntax
errors that might stop parsing early.
An error on not-declared:1 indicates that foo is undeclared.
An error on not-type:1 indicates that foo is not a type (if declared at all, it is an identifier).
An error on not-const:1 indicates that foo is not an integer constant.
The line number specifies the name involved. In the example, 1 is foo and 2 is bar.
Next, cgo must learn the details of each type, variable, function, or
constant. It can do this by reading object files. If cgo has decided
......
......@@ -226,42 +226,22 @@ func (p *Package) loadDefines(f *File) {
// name xxx for the references C.xxx in the Go input.
// The kind is either a constant, type, or variable.
func (p *Package) guessKinds(f *File) []*Name {
// Coerce gcc into telling us whether each name is
// a type, a value, or undeclared. We compile a function
// containing the line:
// name;
// If name is a type, gcc will print:
// cgo-test:2: warning: useless type name in empty declaration
// If name is a value, gcc will print
// cgo-test:2: warning: statement with no effect
// If name is undeclared, gcc will print
// cgo-test:2: error: 'name' undeclared (first use in this function)
// A line number directive causes the line number to
// correspond to the index in the names array.
//
// The line also has an enum declaration:
// name; enum { _cgo_enum_1 = name };
// If name is not a constant, gcc will print:
// cgo-test:4: error: enumerator value for '_cgo_enum_4' is not an integer constant
// we assume lines without that error are constants.
// Make list of names that need sniffing, type lookup.
toSniff := make([]*Name, 0, len(f.Name))
needType := make([]*Name, 0, len(f.Name))
// Determine kinds for names we already know about,
// like #defines or 'struct foo', before bothering with gcc.
var names, needType []*Name
for _, n := range f.Name {
// If we've already found this name as a #define
// and we can translate it as a constant value, do so.
if n.Define != "" {
ok := false
isConst := false
if _, err := strconv.Atoi(n.Define); err == nil {
ok = true
isConst = true
} else if n.Define[0] == '"' || n.Define[0] == '\'' {
if _, err := parser.ParseExpr(n.Define); err == nil {
ok = true
isConst = true
}
}
if ok {
if isConst {
n.Kind = "const"
// Turn decimal into hex, just for consistency
// with enum-derived constants. Otherwise
......@@ -281,126 +261,139 @@ func (p *Package) guessKinds(f *File) []*Name {
}
}
// If this is a struct, union, or enum type name,
// record the kind but also that we need type information.
needType = append(needType, n)
// If this is a struct, union, or enum type name, no need to guess the kind.
if strings.HasPrefix(n.C, "struct ") || strings.HasPrefix(n.C, "union ") || strings.HasPrefix(n.C, "enum ") {
n.Kind = "type"
i := len(needType)
needType = needType[0 : i+1]
needType[i] = n
continue
}
i := len(toSniff)
toSniff = toSniff[0 : i+1]
toSniff[i] = n
// Otherwise, we'll need to find out from gcc.
names = append(names, n)
}
if len(toSniff) == 0 {
// Bypass gcc if there's nothing left to find out.
if len(names) == 0 {
return needType
}
// Coerce gcc into telling us whether each name is a type, a value, or undeclared.
// For names, find out whether they are integer constants.
// We used to look at specific warning or error messages here, but that tied the
// behavior too closely to specific versions of the compilers.
// Instead, arrange that we can infer what we need from only the presence or absence
// of an error on a specific line.
//
// For each name, we generate these lines, where xxx is the index in toSniff plus one.
//
// #line xxx "not-declared"
// void __cgo_f_xxx_1(void) { typeof(name) *__cgo_undefined__; }
// #line xxx "not-type"
// void __cgo_f_xxx_2(void) { name *__cgo_undefined__; }
// #line xxx "not-const"
// void __cgo_f_xxx_3(void) { enum { __cgo_undefined__ = (name)*1 }; }
//
// If we see an error at not-declared:xxx, the corresponding name is not declared.
// If we see an error at not-type:xxx, the corresponding name is a type.
// If we see an error at not-const:xxx, the corresponding name is not an integer constant.
// If we see no errors, we assume the name is an expression but not a constant
// (so a variable or a function).
//
// The specific input forms are chosen so that they are valid C syntax regardless of
// whether name denotes a type or an expression.
var b bytes.Buffer
b.WriteString(f.Preamble)
b.WriteString(builtinProlog)
b.WriteString("void __cgo__f__(void) {\n")
// For a #defined expression, clang silences the warning about "unused expression".
// http://llvm.org/viewvc/llvm-project?view=revision&revision=172696
// Silencing the warning is not a big deal, because our default assumption is that
// (in the absence of other evidence) names are expressions.
// However, if all the C names we are investigating are #defined expressions,
// clang will print no warnings at all and then exit successfully.
// We want clang to print warnings, so seed the code with a function
// that is guaranteed to provoke a warning (that we will ignore).
// This way, if clang becomes even more broken, we'll find out.
// See golang.org/issue/6128.
fmt.Fprintf(&b, "1;\n")
b.WriteString("#line 1 \"cgo-test\"\n")
for i, n := range toSniff {
fmt.Fprintf(&b, "%s; /* #%d */\nenum { _cgo_enum_%d = %s }; /* #%d */\n", n.C, i, i, n.C, i)
}
b.WriteString("}\n")
for i, n := range names {
fmt.Fprintf(&b, "#line %d \"not-declared\"\n"+
"void __cgo_f_%d_1(void) { typeof(%s) *__cgo_undefined__; }\n"+
"#line %d \"not-type\"\n"+
"void __cgo_f_%d_2(void) { %s *__cgo_undefined__; }\n"+
"#line %d \"not-const\"\n"+
"void __cgo_f_%d_3(void) { enum { __cgo__undefined__ = (%s)*1 }; }\n",
i+1, i+1, n.C,
i+1, i+1, n.C,
i+1, i+1, n.C)
}
fmt.Fprintf(&b, "#line 1 \"completed\"\n"+
"int __cgo__1 = __cgo__2;\n")
stderr := p.gccErrors(b.Bytes())
if stderr == "" {
fatalf("%s produced no output\non input:\n%s", p.gccBaseCmd()[0], b.Bytes())
}
names := make([]*Name, len(toSniff))
copy(names, toSniff)
isConst := make([]bool, len(toSniff))
for i := range isConst {
isConst[i] = true // until proven otherwise
}
completed := false
sniff := make([]int, len(names))
const (
notType = 1 << iota
notConst
)
for _, line := range strings.Split(stderr, "\n") {
if len(line) < 9 || line[0:9] != "cgo-test:" {
// the user will see any compiler errors when the code is compiled later.
continue
}
line = line[9:]
colon := strings.Index(line, ":")
if colon < 0 {
if !strings.Contains(line, ": error:") {
// we only care about errors.
// we tried to turn off warnings on the command line, but one never knows.
continue
}
i, err := strconv.Atoi(line[0:colon])
if err != nil {
c1 := strings.Index(line, ":")
if c1 < 0 {
continue
}
i = (i - 1) / 2
what := ""
switch {
default:
continue
case strings.Contains(line, ": useless type name in empty declaration"),
strings.Contains(line, ": declaration does not declare anything"),
strings.Contains(line, ": unexpected type name"):
what = "type"
isConst[i] = false
case strings.Contains(line, ": statement with no effect"),
strings.Contains(line, ": expression result unused"):
what = "not-type" // const or func or var
case strings.Contains(line, "undeclared"):
error_(token.NoPos, "%s", strings.TrimSpace(line[colon+1:]))
case strings.Contains(line, "is not an integer constant"):
isConst[i] = false
c2 := strings.Index(line[c1+1:], ":")
if c2 < 0 {
continue
}
n := toSniff[i]
if n == nil {
c2 += c1 + 1
filename := line[:c1]
i, _ := strconv.Atoi(line[c1+1 : c2])
i--
if i < 0 || i >= len(names) {
continue
}
toSniff[i] = nil
n.Kind = what
j := len(needType)
needType = needType[0 : j+1]
needType[j] = n
}
for i, b := range isConst {
if b {
names[i].Kind = "const"
if toSniff[i] != nil && names[i].Const == "" {
j := len(needType)
needType = needType[0 : j+1]
needType[j] = names[i]
}
switch filename {
case "completed":
// Strictly speaking, there is no guarantee that seeing the error at completed:1
// (at the end of the file) means we've seen all the errors from earlier in the file,
// but usually it does. Certainly if we don't see the completed:1 error, we did
// not get all the errors we expected.
completed = true
case "not-declared":
error_(token.NoPos, "%s", strings.TrimSpace(line[c2+1:]))
case "not-type":
sniff[i] |= notType
case "not-const":
sniff[i] |= notConst
}
}
for _, n := range toSniff {
if n == nil {
continue
}
if n.Kind != "" {
continue
if !completed {
fatalf("%s did not produce error at completed:1\non input:\n%s", p.gccBaseCmd()[0], b.Bytes())
}
for i, n := range names {
switch sniff[i] {
case 0:
error_(token.NoPos, "could not determine kind of name for C.%s", fixGo(n.Go))
case notType:
n.Kind = "const"
case notConst:
n.Kind = "type"
case notConst | notType:
n.Kind = "not-type"
}
error_(token.NoPos, "could not determine kind of name for C.%s", fixGo(n.Go))
}
if nerrors > 0 {
fatalf("unresolved names")
}
needType = append(needType, names...)
return needType
}
......@@ -739,8 +732,8 @@ func gccTmp() string {
// the input.
func (p *Package) gccCmd() []string {
c := append(p.gccBaseCmd(),
"-Wall", // many warnings
"-Werror", // warnings are errors
"-Wno-all", // no warnings
"-Wno-error", // warnings are not errors
"-o"+gccTmp(), // write object to tmp
"-gdwarf-2", // generate DWARF v2 debugging symbols
"-fno-eliminate-unused-debug-types", // gets rid of e.g. untyped enum otherwise
......@@ -876,14 +869,6 @@ func (p *Package) gccErrors(stdin []byte) string {
// TODO(rsc): require failure
args := p.gccCmd()
// GCC 4.8.0 has a bug: it sometimes does not apply
// -Wunused-value to values that are macros defined in system
// headers. See issue 5118. Adding -Wsystem-headers avoids
// that problem. This will produce additional errors, but it
// doesn't matter because we will ignore all errors that are
// not marked for the cgo-test file.
args = append(args, "-Wsystem-headers")
if *debugGcc {
fmt.Fprintf(os.Stderr, "$ %s <<EOF\n", strings.Join(args, " "))
os.Stderr.Write(stdin)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment