Commit 82e1651a authored by Matthew Dempsky's avatar Matthew Dempsky

cmd/internal/gc, cmd/yacc: merge yaccerrors.go into cmd/yacc

This extends cmd/yacc with support for

	%error { tokens } : message

syntax to specify custom error messages to use instead of the default
generic ones.  This allows merging go.errors into go.y and removing
the yaccerrors.go tool.

Updates #9968.

Change-Id: I781219c568b86472755f877f48401eaeab00ead5
Reviewed-on: https://go-review.googlesource.com/8563Reviewed-by: default avatarRuss Cox <rsc@golang.org>
parent d4ed3061
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Example-based syntax error messages.
// See yaccerrors.go.
package gc
var yymsg = []struct {
yystate int
yychar int
msg string
}{
// Each line of the form % token list
// is converted by yaccerrors.go into the yystate and yychar caused
// by that token list.
% loadsys package LIMPORT '(' LLITERAL import_package import_there ','
"unexpected comma during import block"},
% loadsys package LIMPORT LNAME ';'
"missing import path; require quoted string"},
% loadsys package imports LFUNC LNAME '(' ')' '{' LIF if_header ';'
"missing { after if clause"},
% loadsys package imports LFUNC LNAME '(' ')' '{' LSWITCH if_header ';'
"missing { after switch clause"},
% loadsys package imports LFUNC LNAME '(' ')' '{' LFOR for_header ';'
"missing { after for clause"},
% loadsys package imports LFUNC LNAME '(' ')' '{' LFOR ';' LBODY
"missing { after for clause"},
% loadsys package imports LFUNC LNAME '(' ')' ';' '{'
"unexpected semicolon or newline before {"},
% loadsys package imports LTYPE LNAME ';'
"unexpected semicolon or newline in type declaration"},
% loadsys package imports LCHAN '}'
"unexpected } in channel type"},
% loadsys package imports LCHAN ')'
"unexpected ) in channel type"},
% loadsys package imports LCHAN ','
"unexpected comma in channel type"},
% loadsys package imports LFUNC LNAME '(' ')' '{' if_stmt ';' LELSE
"unexpected semicolon or newline before else"},
% loadsys package imports LTYPE LNAME LINTERFACE '{' LNAME ',' LNAME
"name list not allowed in interface type"},
% loadsys package imports LFUNC LNAME '(' ')' '{' LFOR LVAR LNAME '=' LNAME
"var declaration not allowed in for initializer"},
% loadsys package imports LVAR LNAME '[' ']' LNAME '{'
"unexpected { at end of statement"},
% loadsys package imports LFUNC LNAME '(' ')' '{' LVAR LNAME '[' ']' LNAME '{'
"unexpected { at end of statement"},
% loadsys package imports LFUNC LNAME '(' ')' '{' LDEFER LNAME ';'
"argument to go/defer must be function call"},
% loadsys package imports LVAR LNAME '=' LNAME '{' LNAME ';'
"need trailing comma before newline in composite literal"},
% loadsys package imports LVAR LNAME '=' comptype '{' LNAME ';'
"need trailing comma before newline in composite literal"},
% loadsys package imports LFUNC LNAME '(' ')' '{' LFUNC LNAME
"nested func not allowed"},
% loadsys package imports LFUNC LNAME '(' ')' '{' LIF if_header loop_body LELSE ';'
"else must be followed by if or statement block"},
}
......@@ -117,7 +117,68 @@ import (
%left ')'
%left PreferToRightParen
// TODO(rsc): Add %error-verbose
%error loadsys package LIMPORT '(' LLITERAL import_package import_there ',':
"unexpected comma during import block"
%error loadsys package LIMPORT LNAME ';':
"missing import path; require quoted string"
%error loadsys package imports LFUNC LNAME '(' ')' '{' LIF if_header ';':
"missing { after if clause"
%error loadsys package imports LFUNC LNAME '(' ')' '{' LSWITCH if_header ';':
"missing { after switch clause"
%error loadsys package imports LFUNC LNAME '(' ')' '{' LFOR for_header ';':
"missing { after for clause"
%error loadsys package imports LFUNC LNAME '(' ')' '{' LFOR ';' LBODY:
"missing { after for clause"
%error loadsys package imports LFUNC LNAME '(' ')' ';' '{':
"unexpected semicolon or newline before {"
%error loadsys package imports LTYPE LNAME ';':
"unexpected semicolon or newline in type declaration"
%error loadsys package imports LCHAN '}':
"unexpected } in channel type"
%error loadsys package imports LCHAN ')':
"unexpected ) in channel type"
%error loadsys package imports LCHAN ',':
"unexpected comma in channel type"
%error loadsys package imports LFUNC LNAME '(' ')' '{' if_stmt ';' LELSE:
"unexpected semicolon or newline before else"
%error loadsys package imports LTYPE LNAME LINTERFACE '{' LNAME ',' LNAME:
"name list not allowed in interface type"
%error loadsys package imports LFUNC LNAME '(' ')' '{' LFOR LVAR LNAME '=' LNAME:
"var declaration not allowed in for initializer"
%error loadsys package imports LVAR LNAME '[' ']' LNAME '{':
"unexpected { at end of statement"
%error loadsys package imports LFUNC LNAME '(' ')' '{' LVAR LNAME '[' ']' LNAME '{':
"unexpected { at end of statement"
%error loadsys package imports LFUNC LNAME '(' ')' '{' LDEFER LNAME ';':
"argument to go/defer must be function call"
%error loadsys package imports LVAR LNAME '=' LNAME '{' LNAME ';':
"need trailing comma before newline in composite literal"
%error loadsys package imports LVAR LNAME '=' comptype '{' LNAME ';':
"need trailing comma before newline in composite literal"
%error loadsys package imports LFUNC LNAME '(' ')' '{' LFUNC LNAME:
"nested func not allowed"
%error loadsys package imports LFUNC LNAME '(' ')' '{' LIF if_header loop_body LELSE ';':
"else must be followed by if or statement block"
%%
file:
......
......@@ -3,7 +3,6 @@
// license that can be found in the LICENSE file.
//go:generate go tool yacc go.y
//go:generate go run yaccerrors.go
//go:generate go run mkbuiltin.go runtime unsafe
package gc
......
......@@ -125,13 +125,6 @@ func Yyerror(format string, args ...interface{}) {
if strings.HasPrefix(msg, "syntax error") {
nsyntaxerrors++
yystate := theparser.(*yyParserImpl).state()
yychar := theparser.Lookahead()
if Debug['x'] != 0 {
fmt.Printf("yyerror: yystate=%d yychar=%d\n", yystate, yychar)
}
// An unexpected EOF caused a syntax error. Use the previous
// line number since getc generated a fake newline character.
if curio.eofnl != 0 {
......@@ -144,14 +137,6 @@ func Yyerror(format string, args ...interface{}) {
}
yyerror_lastsyntax = int(lexlineno)
// look for parse state-specific errors in list (see go.errors).
for i := range yymsg {
if yymsg[i].yystate == yystate && yymsg[i].yychar == yychar {
yyerrorl(int(lexlineno), "syntax error: %s", yymsg[i].msg)
return
}
}
// plain "syntax error" gets "near foo" added
if msg == "syntax error" {
yyerrorl(int(lexlineno), "syntax error near %s", lexbuf.String())
......
This diff is collapsed.
This diff is collapsed.
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// This program implements the core idea from
//
// Clinton L. Jeffery, Generating LR syntax error messages from examples,
// ACM TOPLAS 25(5) (September 2003). http://doi.acm.org/10.1145/937563.937566
//
// It reads Bison's summary of a grammar followed by a file
// like go.errors, replacing lines beginning with % by the
// yystate and yychar that will be active when an error happens
// while parsing that line.
//
// Unlike the system described in the paper, the lines in go.errors
// give grammar symbol name lists, not actual program fragments.
// This is a little less programmer-friendly but doesn't require being
// able to run the text through lex.c.
package main
import (
"bufio"
"fmt"
"io"
"log"
"os"
"strconv"
"strings"
)
func xatoi(s string) int {
n, err := strconv.Atoi(s)
if err != nil {
log.Fatal(err)
}
return n
}
func trimParen(s string) string {
s = strings.TrimPrefix(s, "(")
s = strings.TrimSuffix(s, ")")
return s
}
type action struct {
token string
n int
}
var shift = map[int][]action{}
var reduce = map[int][]action{}
type rule struct {
lhs string
size int
}
var rules = map[int]rule{}
func readYaccOutput() {
r, err := os.Open("y.output")
if err != nil {
log.Fatal(err)
}
defer r.Close()
var state int
scanner := bufio.NewScanner(r)
for scanner.Scan() {
f := strings.Fields(scanner.Text())
nf := len(f)
if nf >= 4 && f[1] == "terminals," && f[3] == "nonterminals" {
// We're done.
break
}
if nf >= 2 && f[0] == "state" {
state = xatoi(f[1])
continue
}
if nf >= 3 && (f[1] == "shift" || f[1] == "goto") {
shift[state] = append(shift[state], action{f[0], xatoi(f[2])})
continue
}
if nf >= 3 && f[1] == "reduce" {
reduce[state] = append(reduce[state], action{f[0], xatoi(f[2])})
continue
}
if nf >= 3 && strings.HasSuffix(f[0], ":") && strings.HasPrefix(f[nf-1], "(") && strings.HasSuffix(f[nf-1], ")") {
n := xatoi(trimParen(f[nf-1]))
size := nf - 2
if size == 1 && f[1] == "." {
size = 0
}
rules[n] = rule{strings.TrimSuffix(f[0], ":"), size}
continue
}
}
}
func runMachine(w io.Writer, s string) {
f := strings.Fields(s)
// Run it through the LR machine and print the induced "yystate, yychar,"
// at the point where the error happens.
var stack []int
state := 0
i := 1
tok := ""
Loop:
if tok == "" && i < len(f) {
tok = f[i]
i++
}
for _, a := range shift[state] {
if a.token == tok {
if false {
fmt.Println("SHIFT ", tok, " ", state, " -> ", a)
}
stack = append(stack, state)
state = a.n
tok = ""
goto Loop
}
}
for _, a := range reduce[state] {
if a.token == tok || a.token == "." {
stack = append(stack, state)
rule, ok := rules[a.n]
if !ok {
log.Fatal("missing rule")
}
stack = stack[:len(stack)-rule.size]
state = stack[len(stack)-1]
stack = stack[:len(stack)-1]
if tok != "" {
i--
}
tok = rule.lhs
if false {
fmt.Println("REDUCE ", stack, " ", state, " ", tok, " rule ", rule)
}
goto Loop
}
}
// No shift or reduce applied - found the error.
fmt.Fprintf(w, "\t{%d, %s,\n", state, tok)
}
func processGoErrors() {
r, err := os.Open("go.errors")
if err != nil {
log.Fatal(err)
}
defer r.Close()
w, err := os.Create("yymsg.go")
if err != nil {
log.Fatal(err)
}
defer w.Close()
fmt.Fprintf(w, "// DO NOT EDIT - generated with go generate\n\n")
scanner := bufio.NewScanner(r)
for scanner.Scan() {
s := scanner.Text()
// Treat % as first field on line as introducing a pattern (token sequence).
if strings.HasPrefix(strings.TrimSpace(s), "%") {
runMachine(w, s)
continue
}
fmt.Fprintln(w, s)
}
}
func main() {
readYaccOutput()
processGoErrors()
}
// DO NOT EDIT - generated with go generate
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Example-based syntax error messages.
// See yaccerrors.go.
package gc
var yymsg = []struct {
yystate int
yychar int
msg string
}{
// Each line of the form % token list
// is converted by yaccerrors.go into the yystate and yychar caused
// by that token list.
{332, ',',
"unexpected comma during import block"},
{89, ';',
"missing import path; require quoted string"},
{390, ';',
"missing { after if clause"},
{387, ';',
"missing { after switch clause"},
{279, ';',
"missing { after for clause"},
{498, LBODY,
"missing { after for clause"},
{17, '{',
"unexpected semicolon or newline before {"},
{111, ';',
"unexpected semicolon or newline in type declaration"},
{78, '}',
"unexpected } in channel type"},
{78, ')',
"unexpected ) in channel type"},
{78, ',',
"unexpected comma in channel type"},
{416, LELSE,
"unexpected semicolon or newline before else"},
{329, ',',
"name list not allowed in interface type"},
{279, LVAR,
"var declaration not allowed in for initializer"},
{25, '{',
"unexpected { at end of statement"},
{371, '{',
"unexpected { at end of statement"},
{122, ';',
"argument to go/defer must be function call"},
{398, ';',
"need trailing comma before newline in composite literal"},
{414, ';',
"need trailing comma before newline in composite literal"},
{124, LNAME,
"nested func not allowed"},
{650, ';',
"else must be followed by if or statement block"},
}
......@@ -128,6 +128,7 @@ const (
TYPEDEF
TYPENAME
UNION
ERROR
)
const ENDFILE = 0
......@@ -325,8 +326,24 @@ var resrv = []Resrv{
{"type", TYPEDEF},
{"union", UNION},
{"struct", UNION},
{"error", ERROR},
}
type Error struct {
lineno int
tokens []string
msg string
}
var errors []Error
type Row struct {
actions []int
defaultAction int
}
var stateTable []Row
var zznewstate = 0
const EOF = -1
......@@ -402,6 +419,27 @@ outer:
}
start = chfind(1, tokname)
case ERROR:
lno := lineno
var tokens []string
for {
t := gettok()
if t == ':' {
break
}
if t != IDENTIFIER && t != IDENTCOLON {
errorf("bad syntax in %%error")
}
tokens = append(tokens, tokname)
if t == IDENTCOLON {
break
}
}
if gettok() != IDENTIFIER {
errorf("bad syntax in %%error")
}
errors = append(errors, Error{lno, tokens, tokname})
case TYPEDEF:
t = gettok()
if t != TYPENAME {
......@@ -2155,6 +2193,10 @@ func output() {
}
fmt.Fprintf(ftable, "\nvar %sExca = [...]int{\n", prefix)
if len(errors) > 0 {
stateTable = make([]Row, nstate)
}
noset := mkset()
// output the stuff for state i
......@@ -2368,6 +2410,15 @@ func wrstate(i int) {
var j0, j1, u int
var pp, qq int
if len(errors) > 0 {
actions := append([]int(nil), temp1...)
defaultAction := ERRCODE
if lastred != 0 {
defaultAction = -lastred
}
stateTable[i] = Row{actions, defaultAction}
}
if foutput == nil {
return
}
......@@ -2914,6 +2965,20 @@ func others() {
}
fmt.Fprintf(ftable, "%d,\n}\n", 0)
// Custom error messages.
fmt.Fprintf(ftable, "\n")
fmt.Fprintf(ftable, "var %sErrorMessages = [...]struct {\n", prefix)
fmt.Fprintf(ftable, "\tstate int\n")
fmt.Fprintf(ftable, "\ttoken int\n")
fmt.Fprintf(ftable, "\tmsg string\n")
fmt.Fprintf(ftable, "}{\n")
for _, error := range errors {
lineno = error.lineno
state, token := runMachine(error.tokens)
fmt.Fprintf(ftable, "\t{%v, %v, %s},\n", state, token, error.msg)
}
fmt.Fprintf(ftable, "}\n")
// copy parser text
ch := getrune(finput)
for ch != EOF {
......@@ -2932,6 +2997,59 @@ func others() {
fmt.Fprintf(ftable, "%v", parts[1])
}
func runMachine(tokens []string) (state, token int) {
var stack []int
i := 0
token = -1
Loop:
if token < 0 {
token = chfind(2, tokens[i])
i++
}
row := stateTable[state]
c := token
if token >= NTBASE {
c = token - NTBASE + ntokens
}
action := row.actions[c]
if action == 0 {
action = row.defaultAction
}
switch {
case action == ACCEPTCODE:
errorf("tokens are accepted")
return
case action == ERRCODE:
if token >= NTBASE {
errorf("error at non-terminal token %s", symnam(token))
}
return
case action > 0:
// Shift to state action.
stack = append(stack, state)
state = action
token = -1
goto Loop
default:
// Reduce by production -action.
prod := prdptr[-action]
if rhsLen := len(prod) - 2; rhsLen > 0 {
n := len(stack) - rhsLen
state = stack[n]
stack = stack[:n]
}
if token >= 0 {
i--
}
token = prod[0]
goto Loop
}
}
func arout(s string, v []int, n int) {
s = prefix + s
fmt.Fprintf(ftable, "var %v = [...]int{\n", s)
......@@ -3212,7 +3330,6 @@ type $$Parser interface {
type $$ParserImpl struct {
lookahead func() int
state func() int
}
func (p *$$ParserImpl) Lookahead() int {
......@@ -3222,7 +3339,6 @@ func (p *$$ParserImpl) Lookahead() int {
func $$NewParser() $$Parser {
p := &$$ParserImpl{
lookahead: func() int { return -1 },
state: func() int { return -1 },
}
return p
}
......@@ -3253,6 +3369,13 @@ func $$ErrorMessage(state, lookAhead int) string {
if !$$ErrorVerbose {
return "syntax error"
}
for _, e := range $$ErrorMessages {
if e.state == state && e.token == lookAhead {
return "syntax error: " + e.msg
}
}
res := "syntax error: unexpected " + $$Tokname(lookAhead)
// To match Bison, suggest at most four expected tokens.
......@@ -3355,7 +3478,6 @@ func ($$rcvr *$$ParserImpl) Parse($$lex $$Lexer) int {
$$state := 0
$$char := -1
$$token := -1 // $$char translated into internal numbering
$$rcvr.state = func() int { return $$state }
$$rcvr.lookahead = func() int { return $$char }
defer func() {
// Make sure we report no lookahead when not parsing.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment