go/printer, gofmt: don't print incorrect programs

Be careful when printing line comments with incorrect position information. Maintain additional state impliedSemi: when set, a comment containing a newline would imply a semicolon and thus placement must be delayed. Precompute state information pertaining to the next comment for faster checks (the printer is marginally faster now despite additional checks for each comment). No effect on existing src, misc sources. Fixes #1505. R=rsc CC=golang-dev https://golang.org/cl/5598054

go/printer, gofmt: don't print incorrect programs
Be careful when printing line comments with incorrect position information. Maintain additional state impliedSemi: when set, a comment containing a newline would imply a semicolon and thus placement must be delayed. Precompute state information pertaining to the next comment for faster checks (the printer is marginally faster now despite additional checks for each comment). No effect on existing src, misc sources. Fixes #1505. R=rsc CC=golang-dev https://golang.org/cl/5598054
3d6b3685 · Robert Griesemer · e3755434 · 3d6b3685 · 3d6b3685 · 3d6b3685
Commit 3d6b3685 authored Feb 07, 2012 by Robert Griesemer
4 changed files
--- a/src/cmd/fix/timefileinfo_test.go
+++ b/src/cmd/fix/timefileinfo_test.go
@@ -156,6 +156,32 @@ func main() {
 	t2 := time.Now()
 	dt := t2.Sub(t1)
 }
+`,
+	},
+	{
+		Name: "timefileinfo.5", // test for issues 1505, 2636
+		In: `package main
+import (
+	"fmt"
+	"time"
+)
+func main() {
+	fmt.Println(time.SecondsToUTC(now)) // this comment must not introduce an illegal linebreak
+}
+`,
+		Out: `package main
+import (
+	"fmt"
+	"time"
+)
+func main() {
+	fmt.Println(time.Unix(now, 0).UTC( // this comment must not introduce an illegal linebreak
+	))
+}
 `,
 	},
 }
--- a/src/pkg/go/printer/nodes.go
+++ b/src/pkg/go/printer/nodes.go
@@ -76,6 +76,7 @@ func (p *printer) setComment(g *ast.CommentGroup) {
 	}
 	p.comments[0] = g
 	p.cindex = 0
+	p.nextComment() // get comment ready for use
 }
 type exprListMode uint

--- a/src/pkg/go/printer/printer.go
+++ b/src/pkg/go/printer/printer.go
@@ -12,7 +12,6 @@ import (
 	"go/token"
 	"io"
 	"os"
-	"path/filepath"
 	"strconv"
 	"strings"
 	"text/tabwriter"
@@ -52,11 +51,12 @@ type printer struct {
 	fset *token.FileSet
 	// Current state
-	output  bytes.Buffer // raw printer result
+	output      bytes.Buffer // raw printer result
-	indent  int          // current indentation
+	indent      int          // current indentation
-	mode    pmode        // current printer mode
+	mode        pmode        // current printer mode
-	lastTok token.Token  // the last token printed (token.ILLEGAL if it's whitespace)
+	impliedSemi bool         // if set, a linebreak implies a semicolon
-	wsbuf   []whiteSpace // delayed white space
+	lastTok     token.Token  // the last token printed (token.ILLEGAL if it's whitespace)
+	wsbuf       []whiteSpace // delayed white space
 	// The (possibly estimated) position in the generated output;
 	// in AST space (i.e., pos is set whenever a token position is
@@ -73,6 +73,11 @@ type printer struct {
 	cindex          int                 // current comment index
 	useNodeComments bool                // if not set, ignore lead and line comments of nodes
+	// Information about p.comments[p.cindex]; set up by nextComment.
+	comment        *ast.CommentGroup // = p.comments[p.cindex]; or nil
+	commentOffset  int               // = p.posFor(p.comments[p.cindex].List[0].Pos()).Offset; or infinity
+	commentNewline bool              // true if the comment group contains newlines
 	// Cache of already computed node sizes.
 	nodeSizes map[ast.Node]int
@@ -89,6 +94,42 @@ func (p *printer) init(cfg *Config, fset *token.FileSet, nodeSizes map[ast.Node]
 	p.cachedPos = -1
 }
+// commentsHaveNewline reports whether a list of comments belonging to
+// an *ast.CommentGroup contains newlines. Because the position information
+// may only be partially correct, we also have to read the comment text.
+func (p *printer) commentsHaveNewline(list []*ast.Comment) bool {
+	// len(list) > 0
+	line := p.lineFor(list[0].Pos())
+	for i, c := range list {
+		if i > 0 && p.lineFor(list[i].Pos()) != line {
+			// not all comments on the same line
+			return true
+		}
+		if t := c.Text; len(t) >= 2 && (t[1] == '/' || strings.Contains(t, "\n")) {
+			return true
+		}
+	}
+	_ = line
+	return false
+}
+func (p *printer) nextComment() {
+	for p.cindex < len(p.comments) {
+		c := p.comments[p.cindex]
+		p.cindex++
+		if list := c.List; len(list) > 0 {
+			p.comment = c
+			p.commentOffset = p.posFor(list[0].Pos()).Offset
+			p.commentNewline = p.commentsHaveNewline(list)
+			return
+		}
+		// we should not reach here (correct ASTs don't have empty
+		// ast.CommentGroup nodes), but be conservative and try again
+	}
+	// no more comments
+	p.commentOffset = infinity
+}
 func (p *printer) internalError(msg ...interface{}) {
 	if debug {
 		fmt.Print(p.pos.String() + ": ")
@@ -204,8 +245,7 @@ func (p *printer) writeItem(pos token.Position, data string, isLit bool) {
 	}
 	if debug {
 		// do not update p.pos - use write0
-		_, filename := filepath.Split(pos.Filename)
+		fmt.Fprintf(&p.output, "/*%s*/", pos)
-		fmt.Fprintf(&p.output, "[%s:%d:%d]", filename, pos.Line, pos.Column)
 	}
 	p.writeString(data, isLit)
 	p.last = p.pos
@@ -618,12 +658,13 @@ func (p *printer) writeCommentSuffix(needsLinebreak bool) (wroteNewline, dropped
 //
 func (p *printer) intersperseComments(next token.Position, tok token.Token) (wroteNewline, droppedFF bool) {
 	var last *ast.Comment
-	for ; p.commentBefore(next); p.cindex++ {
+	for p.commentBefore(next) {
-		for _, c := range p.comments[p.cindex].List {
+		for _, c := range p.comment.List {
 			p.writeCommentPrefix(p.posFor(c.Pos()), next, last, c, tok.IsKeyword())
 			p.writeComment(c)
 			last = c
 		}
+		p.nextComment()
 	}
 	if last != nil {
@@ -735,22 +776,24 @@ func mayCombine(prev token.Token, next byte) (b bool) {
 // printed, followed by the actual token.
 //
 func (p *printer) print(args ...interface{}) {
-	for _, f := range args {
+	for _, arg := range args {
-		next := p.pos // estimated position of next item
+		// information about the current arg
-		data := ""
+		var data string
-		isLit := false
+		var isLit bool
-		var tok token.Token
+		var impliedSemi bool // value for p.impliedSemi after this arg
-		switch x := f.(type) {
+		switch x := arg.(type) {
 		case pmode:
 			// toggle printer mode
 			p.mode ^= x
+			continue
 		case whiteSpace:
 			if x == ignore {
 				// don't add ignore's to the buffer; they
 				// may screw up "correcting" unindents (see
 				// LabeledStmt)
-				break
+				continue
 			}
 			i := len(p.wsbuf)
 			if i == cap(p.wsbuf) {
@@ -762,13 +805,27 @@ func (p *printer) print(args ...interface{}) {
 			}
 			p.wsbuf = p.wsbuf[0 : i+1]
 			p.wsbuf[i] = x
+			if x == newline || x == formfeed {
+				// newlines affect the current state (p.impliedSemi)
+				// and not the state after printing arg (impliedSemi)
+				// because comments can be interspersed before the arg
+				// in this case
+				p.impliedSemi = false
+			}
+			p.lastTok = token.ILLEGAL
+			continue
 		case *ast.Ident:
 			data = x.Name
-			tok = token.IDENT
+			impliedSemi = true
+			p.lastTok = token.IDENT
 		case *ast.BasicLit:
 			data = x.Value
 			isLit = true
-			tok = x.Kind
+			impliedSemi = true
+			p.lastTok = x.Kind
 		case token.Token:
 			s := x.String()
 			if mayCombine(p.lastTok, s[0]) {
@@ -785,30 +842,40 @@ func (p *printer) print(args ...interface{}) {
 				p.wsbuf[0] = ' '
 			}
 			data = s
-			tok = x
+			// some keywords followed by a newline imply a semicolon
+			switch x {
+			case token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN,
+				token.INC, token.DEC, token.RPAREN, token.RBRACK, token.RBRACE:
+				impliedSemi = true
+			}
+			p.lastTok = x
 		case token.Pos:
 			if x.IsValid() {
-				next = p.posFor(x) // accurate position of next item
+				p.pos = p.posFor(x) // accurate position of next item
 			}
-			tok = p.lastTok
+			continue
 		case string:
 			// incorrect AST - print error message
 			data = x
 			isLit = true
-			tok = token.STRING
+			impliedSemi = true
+			p.lastTok = token.STRING
 		default:
-			fmt.Fprintf(os.Stderr, "print: unsupported argument %v (%T)\n", f, f)
+			fmt.Fprintf(os.Stderr, "print: unsupported argument %v (%T)\n", arg, arg)
 			panic("go/printer type")
 		}
-		p.lastTok = tok
+		// data != ""
-		p.pos = next
-		if data != "" {
+		next := p.pos // estimated/accurate position of next item
-			wroteNewline, droppedFF := p.flush(next, tok)
+		wroteNewline, droppedFF := p.flush(next, p.lastTok)
-			// intersperse extra newlines if present in the source
+		// intersperse extra newlines if present in the source and
-			// (don't do this in flush as it will cause extra newlines
+		// if they don't cause extra semicolons (don't do this in
-			// at the end of a file)
+		// flush as it will cause extra newlines at the end of a file)
+		if !p.impliedSemi {
 			n := nlimit(next.Line - p.pos.Line)
 			// don't exceed maxNewlines if we already wrote one
 			if wroteNewline && n == maxNewlines {
@@ -820,22 +887,25 @@ func (p *printer) print(args ...interface{}) {
 					ch = '\f' // use formfeed since we dropped one before
 				}
 				p.writeByteN(ch, n)
+				impliedSemi = false
 			}
-			p.writeItem(next, data, isLit)
 		}
+		p.writeItem(next, data, isLit)
+		p.impliedSemi = impliedSemi
 	}
 }
-// commentBefore returns true iff the current comment occurs
+// commentBefore returns true iff the current comment group occurs
-// before the next position in the source code.
+// before the next position in the source code and printing it does
+// not introduce implicit semicolons.
 //
-func (p *printer) commentBefore(next token.Position) bool {
+func (p *printer) commentBefore(next token.Position) (result bool) {
-	return p.cindex < len(p.comments) && p.posFor(p.comments[p.cindex].List[0].Pos()).Offset < next.Offset
+	return p.commentOffset < next.Offset && (!p.impliedSemi || !p.commentNewline)
 }
-// Flush prints any pending comments and whitespace occurring textually
+// flush prints any pending comments and whitespace occurring textually
-// before the position of the next token tok. The Flush result indicates
+// before the position of the next token tok. The flush result indicates
 // if a newline was written or if a formfeed was dropped from the whitespace
 // buffer.
 //
@@ -915,6 +985,9 @@ func (p *printer) printNode(node interface{}) error {
 	// if there are no comments, use node comments
 	p.useNodeComments = p.comments == nil
+	// get comments ready for use
+	p.nextComment()
 	// format node
 	switch n := node.(type) {
 	case ast.Expr:
@@ -1068,6 +1141,8 @@ func (cfg *Config) fprint(output io.Writer, fset *token.FileSet, node interface{
 	if err = p.printNode(node); err != nil {
 		return
 	}
+	// print outstanding comments
+	p.impliedSemi = false // EOF acts like a newline
 	p.flush(token.Position{Offset: infinity, Line: infinity}, token.EOF)
 	// redirect output through a trimmer to eliminate trailing whitespace

--- a/src/pkg/go/printer/printer_test.go
+++ b/src/pkg/go/printer/printer_test.go
@@ -171,14 +171,14 @@ func TestLineComments(t *testing.T) {
 	`
 	fset := token.NewFileSet()
-	ast1, err1 := parser.ParseFile(fset, "", src, parser.ParseComments)
+	f, err := parser.ParseFile(fset, "", src, parser.ParseComments)
-	if err1 != nil {
+	if err != nil {
-		panic(err1)
+		panic(err) // error in test
 	}
 	var buf bytes.Buffer
 	fset = token.NewFileSet() // use the wrong file set
-	Fprint(&buf, fset, ast1)
+	Fprint(&buf, fset, f)
 	nlines := 0
 	for _, ch := range buf.Bytes() {
@@ -190,6 +190,7 @@ func TestLineComments(t *testing.T) {
 	const expected = 3
 	if nlines < expected {
 		t.Errorf("got %d, expected %d\n", nlines, expected)
+		t.Errorf("result:\n%s", buf.Bytes())
 	}
 }
@@ -198,9 +199,11 @@ func init() {
 	const name = "foobar"
 	var buf bytes.Buffer
 	if err := Fprint(&buf, fset, &ast.Ident{Name: name}); err != nil {
-		panic(err)
+		panic(err) // error in test
 	}
-	if s := buf.String(); s != name {
+	// in debug mode, the result contains additional information;
+	// ignore it
+	if s := buf.String(); !debug && s != name {
 		panic("got " + s + ", want " + name)
 	}
 }
@@ -211,7 +214,7 @@ func TestBadNodes(t *testing.T) {
 	const res = "package p\nBadDecl\n"
 	f, err := parser.ParseFile(fset, "", src, parser.ParseComments)
 	if err == nil {
-		t.Errorf("expected illegal program")
+		t.Error("expected illegal program") // error in test
 	}
 	var buf bytes.Buffer
 	Fprint(&buf, fset, f)
@@ -219,3 +222,61 @@ func TestBadNodes(t *testing.T) {
 		t.Errorf("got %q, expected %q", buf.String(), res)
 	}
 }
+// Print and parse f with 
+func testComment(t *testing.T, f *ast.File, srclen int, comment *ast.Comment) {
+	f.Comments[0].List[0] = comment
+	var buf bytes.Buffer
+	for offs := 0; offs <= srclen; offs++ {
+		buf.Reset()
+		// Printing f should result in a correct program no
+		// matter what the (incorrect) comment position is.
+		if err := Fprint(&buf, fset, f); err != nil {
+			t.Error(err)
+		}
+		if _, err := parser.ParseFile(fset, "", buf.Bytes(), 0); err != nil {
+			t.Fatalf("incorrect program for pos = %d:\n%s", comment.Slash, buf.String())
+		}
+		// Position information is just an offset.
+		// Move comment one byte down in the source.
+		comment.Slash++
+	}
+}
+// Verify that the printer produces always produces a correct program
+// even if the position information of comments introducing newlines
+// is incorrect.
+func TestBadComments(t *testing.T) {
+	const src = `
+// first comment - text and position changed by test
+package p
+import "fmt"
+const pi = 3.14 // rough circle
+var (
+	x, y, z int = 1, 2, 3
+	u, v float64
+)
+func fibo(n int) {
+	if n < 2 {
+		return n /* seed values */
+	}
+	return fibo(n-1) + fibo(n-2)
+}
+`
+	f, err := parser.ParseFile(fset, "", src, parser.ParseComments)
+	if err != nil {
+		t.Error(err) // error in test
+	}
+	comment := f.Comments[0].List[0]
+	pos := comment.Pos()
+	if fset.Position(pos).Offset != 1 {
+		t.Error("expected offset 1") // error in test
+	}
+	testComment(t, f, len(src), &ast.Comment{pos, "//-style comment"})
+	testComment(t, f, len(src), &ast.Comment{pos, "/*-style comment */"})
+	testComment(t, f, len(src), &ast.Comment{pos, "/*-style \n comment */"})
+	testComment(t, f, len(src), &ast.Comment{pos, "/*-style comment \n\n\n */"})
+}