Commit c7fa3c62 authored by Russ Cox's avatar Russ Cox

[dev.cc] cmd/yacc: introduce yyParser to expose parser state

Historically, yacc has supported various kinds of inspections
and manipulations of the parser state, exposed as global variables.
The Go implementation of yacc puts that state (properly) in local
stack variables, so it can only be exposed explicitly.

There is now an explicit parser type, yyParser, returned by a
constructor, yyNewParser.

	type yyParser interface {
		Parse(yyLexer) int
		Lookahead() int
	}

Parse runs a parse. A call to the top-level func Parse
is equivalent to calling yyNewParser().Parse, but constructing
the parser explicitly makes it possible to access additional
parser methods, such as Lookahead.

Lookahead can be called during grammar actions to read
(but not consume) the value of the current lookahead token,
as returned by yylex.Lex. If there is no current lookahead token,
Lookahead returns -1. Invoking Lookahead corresponds to
reading the global variable yychar in a traditional Unix yacc grammar.

To support Lookahead, the internal parsing code now separates
the return value from Lex (yychar) from the reencoding used
by the parsing tables (yytoken). This has the effect that grammars
that read yychar directly in the action (possible since the actions
are in the same function that declares yychar) now correctly see values
from the Lex return value space, not the internal reencoding space.
This can fix bugs in ported grammars not even using SetParse and Lookahead.
(The reencoding was added on Plan 9 for large character sets.
No Plan 9 programs using yacc looked at yychar.)

Other methods may be added to yyParser later as needed.
Obvious candidates include equivalents for the traditional
yyclearin and yyerrok macros.

Change-Id: Iaf7649efcf97e09f44d1f5bc74bb563a11f225de
Reviewed-on: https://go-review.googlesource.com/4850Reviewed-by: default avatarRob Pike <r@golang.org>
parent 2633f2aa
...@@ -24,8 +24,8 @@ The directory $GOROOT/cmd/yacc/testdata/expr is a yacc program ...@@ -24,8 +24,8 @@ The directory $GOROOT/cmd/yacc/testdata/expr is a yacc program
for a very simple expression parser. See expr.y and main.go in that for a very simple expression parser. See expr.y and main.go in that
directory for examples of how to write and build yacc programs. directory for examples of how to write and build yacc programs.
The generated parser is reentrant. Parse expects to be given an The generated parser is reentrant. The parsing function yyParse expects
argument that conforms to the following interface: to be given an argument that conforms to the following interface:
type yyLexer interface { type yyLexer interface {
Lex(lval *yySymType) int Lex(lval *yySymType) int
...@@ -36,8 +36,27 @@ Lex should return the token identifier, and place other token ...@@ -36,8 +36,27 @@ Lex should return the token identifier, and place other token
information in lval (which replaces the usual yylval). information in lval (which replaces the usual yylval).
Error is equivalent to yyerror in the original yacc. Error is equivalent to yyerror in the original yacc.
Code inside the parser may refer to the variable yylex, Code inside the grammar actions may refer to the variable yylex,
which holds the yyLexer passed to Parse. which holds the yyLexer passed to yyParse.
Clients that need to understand more about the parser state can
create the parser separately from invoking it. The function yyNewParser
returns a yyParser conforming to the following interface:
type yyParser interface {
Parse(yyLex) int
Lookahead() int
}
Parse runs the parser; the top-level call yyParse(yylex) is equivalent
to yyNewParser().Parse(yylex).
Lookahead can be called during grammar actions to read (but not consume)
the value of the current lookahead token, as returned by yylex.Lex.
If there is no current lookahead token (because the parser has not called Lex
or has consumed the token returned by the most recent call to Lex),
Lookahead returns -1. Calling Lookahead is equivalent to reading
yychar from within in a grammar action.
Multiple grammars compiled into a single program should be placed in Multiple grammars compiled into a single program should be placed in
distinct packages. If that is impossible, the "-p prefix" flag to distinct packages. If that is impossible, the "-p prefix" flag to
......
...@@ -3205,6 +3205,26 @@ type $$Lexer interface { ...@@ -3205,6 +3205,26 @@ type $$Lexer interface {
Error(s string) Error(s string)
} }
type $$Parser interface {
Parse($$Lexer) int
Lookahead() int
}
type $$ParserImpl struct {
lookahead func() int
}
func (p *$$ParserImpl) Lookahead() int {
return p.lookahead()
}
func $$NewParser() $$Parser {
p := &$$ParserImpl{
lookahead: func() int { return -1 },
}
return p
}
const $$Flag = -1000 const $$Flag = -1000
func $$Tokname(c int) string { func $$Tokname(c int) string {
...@@ -3226,42 +3246,46 @@ func $$Statname(s int) string { ...@@ -3226,42 +3246,46 @@ func $$Statname(s int) string {
return __yyfmt__.Sprintf("state-%v", s) return __yyfmt__.Sprintf("state-%v", s)
} }
func $$lex1(lex $$Lexer, lval *$$SymType) int { func $$lex1(lex $$Lexer, lval *$$SymType) (char, token int) {
c := 0 token = 0
char := lex.Lex(lval) char = lex.Lex(lval)
if char <= 0 { if char <= 0 {
c = $$Tok1[0] token = $$Tok1[0]
goto out goto out
} }
if char < len($$Tok1) { if char < len($$Tok1) {
c = $$Tok1[char] token = $$Tok1[char]
goto out goto out
} }
if char >= $$Private { if char >= $$Private {
if char < $$Private+len($$Tok2) { if char < $$Private+len($$Tok2) {
c = $$Tok2[char-$$Private] token = $$Tok2[char-$$Private]
goto out goto out
} }
} }
for i := 0; i < len($$Tok3); i += 2 { for i := 0; i < len($$Tok3); i += 2 {
c = $$Tok3[i+0] token = $$Tok3[i+0]
if c == char { if token == char {
c = $$Tok3[i+1] token = $$Tok3[i+1]
goto out goto out
} }
} }
out: out:
if c == 0 { if token == 0 {
c = $$Tok2[1] /* unknown char */ token = $$Tok2[1] /* unknown char */
} }
if $$Debug >= 3 { if $$Debug >= 3 {
__yyfmt__.Printf("lex %s(%d)\n", $$Tokname(c), uint(char)) __yyfmt__.Printf("lex %s(%d)\n", $$Tokname(token), uint(char))
} }
return c return char, token
} }
func $$Parse($$lex $$Lexer) int { func $$Parse($$lex $$Lexer) int {
return $$NewParser().Parse($$lex)
}
func ($$rcvr *$$ParserImpl) Parse($$lex $$Lexer) int {
var $$n int var $$n int
var $$lval $$SymType var $$lval $$SymType
var $$VAL $$SymType var $$VAL $$SymType
...@@ -3272,6 +3296,13 @@ func $$Parse($$lex $$Lexer) int { ...@@ -3272,6 +3296,13 @@ func $$Parse($$lex $$Lexer) int {
Errflag := 0 /* error recovery flag */ Errflag := 0 /* error recovery flag */
$$state := 0 $$state := 0
$$char := -1 $$char := -1
$$token := -1 // $$char translated into internal numbering
$$rcvr.lookahead = func() int { return $$char }
defer func() {
// Make sure we report no lookahead when not parsing.
$$char = -1
$$token = -1
}()
$$p := -1 $$p := -1
goto $$stack goto $$stack
...@@ -3284,7 +3315,7 @@ ret1: ...@@ -3284,7 +3315,7 @@ ret1:
$$stack: $$stack:
/* put a state and value onto the stack */ /* put a state and value onto the stack */
if $$Debug >= 4 { if $$Debug >= 4 {
__yyfmt__.Printf("char %v in %v\n", $$Tokname($$char), $$Statname($$state)) __yyfmt__.Printf("char %v in %v\n", $$Tokname($$token), $$Statname($$state))
} }
$$p++ $$p++
...@@ -3302,15 +3333,16 @@ $$newstate: ...@@ -3302,15 +3333,16 @@ $$newstate:
goto $$default /* simple state */ goto $$default /* simple state */
} }
if $$char < 0 { if $$char < 0 {
$$char = $$lex1($$lex, &$$lval) $$char, $$token = $$lex1($$lex, &$$lval)
} }
$$n += $$char $$n += $$token
if $$n < 0 || $$n >= $$Last { if $$n < 0 || $$n >= $$Last {
goto $$default goto $$default
} }
$$n = $$Act[$$n] $$n = $$Act[$$n]
if $$Chk[$$n] == $$char { /* valid shift */ if $$Chk[$$n] == $$token { /* valid shift */
$$char = -1 $$char = -1
$$token = -1
$$VAL = $$lval $$VAL = $$lval
$$state = $$n $$state = $$n
if Errflag > 0 { if Errflag > 0 {
...@@ -3324,7 +3356,7 @@ $$default: ...@@ -3324,7 +3356,7 @@ $$default:
$$n = $$Def[$$state] $$n = $$Def[$$state]
if $$n == -2 { if $$n == -2 {
if $$char < 0 { if $$char < 0 {
$$char = $$lex1($$lex, &$$lval) $$char, $$token = $$lex1($$lex, &$$lval)
} }
/* look through exception table */ /* look through exception table */
...@@ -3337,7 +3369,7 @@ $$default: ...@@ -3337,7 +3369,7 @@ $$default:
} }
for xi += 2; ; xi += 2 { for xi += 2; ; xi += 2 {
$$n = $$Exca[xi+0] $$n = $$Exca[xi+0]
if $$n < 0 || $$n == $$char { if $$n < 0 || $$n == $$token {
break break
} }
} }
...@@ -3354,7 +3386,7 @@ $$default: ...@@ -3354,7 +3386,7 @@ $$default:
Nerrs++ Nerrs++
if $$Debug >= 1 { if $$Debug >= 1 {
__yyfmt__.Printf("%s", $$Statname($$state)) __yyfmt__.Printf("%s", $$Statname($$state))
__yyfmt__.Printf(" saw %s\n", $$Tokname($$char)) __yyfmt__.Printf(" saw %s\n", $$Tokname($$token))
} }
fallthrough fallthrough
...@@ -3382,12 +3414,13 @@ $$default: ...@@ -3382,12 +3414,13 @@ $$default:
case 3: /* no shift yet; clobber input char */ case 3: /* no shift yet; clobber input char */
if $$Debug >= 2 { if $$Debug >= 2 {
__yyfmt__.Printf("error recovery discards %s\n", $$Tokname($$char)) __yyfmt__.Printf("error recovery discards %s\n", $$Tokname($$token))
} }
if $$char == $$EofCode { if $$token == $$EofCode {
goto ret1 goto ret1
} }
$$char = -1 $$char = -1
$$token = -1
goto $$newstate /* try again in the same state */ goto $$newstate /* try again in the same state */
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment